處理多只股票
- 創(chuàng)建空的DataFrame
#Build a DataFrame in pandas
import pandas as pd
def test_run():
start_date = '2010-01-22'
end_date = '2010-01-26'
dates = pd.date_range(start_date, end_date)
df1 = pd.DataFrame(index = dates)
if __name__ == "__main__":
test_run()
- 連結(jié)SPY數(shù)據(jù)
#SPY(標(biāo)普500指數(shù)) 可以用來參考是不是交易日
import pandas as pd
def test_run():
#Define date range
start_date = '2010-01-22'
end_date = '2010-01-26'
dates = pd.date_range(start_date, end_date)
#Create an empty dataframe
df1 = pd.DataFrame(index = dates)
#Read SPY data into temporary DataFrame
dfSPY = pd.read_csv("data/SPY.csv",
index_col = "Date",
parse_dates = True,
usecols = ['Date', 'Adj Close'],
na_values = ['nan'])
#Rename 'Adj Close' column to 'SPY' to prevent clash
dfSPY = dfSPY.rename(columns = {'Adj Close' : 'SPY'})
#Join the two DataFrames, Drop NaN values
df1 = df1.join(dfSPY)
df1 = df1.dropna()
#也可以用 df1 = df1.join(dfSPY, how = 'inner') 來實現(xiàn)
- 讀取更多股票數(shù)據(jù)
#Read in more stocks:
symbols = ['GOOG', 'IBM', 'GLD']
df_temp = pd.read_csv("data/{}.csv".format(symbol),
index_col = 'Date',
parse_dates = True,
usecols = ['Date', 'Adj Close'],
na_values = ['nan'])
#Rename to prevent clash
df_temp = df_temp.rename(columns = {'Adj Close' : symbol})
df1 = df1.join(df_temp)
#use default how = left
- 讀取數(shù)據(jù)的實用函數(shù)
import os
import pandas as pd
def symbol_to_path(symbol, base_dir = "data"):
#Return CSV file path given ticker symbol
return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates)
#Read stock data (Adj Close) for given syb from csv
df = pd.DataFrame(index = dates)
if 'SPY' not in symbols:
#Add SPY for referance if absent
#symbols.insert(0, 'SPY')
for symbol in symbols:
df_temp = pd.read_csv(symbol_to_path(symbol),
index_col = 'Date',
parse_dates = True,
usecols = ['Date', 'Adj Close'],
na_values = ['nan'])
df_temp = df.temp.rename(columns = {'Adj Close' : symbol})
df = df.join(df_temp)
if symbol == 'SPY': #drop dates SPY did not trade
df = df.dropna(subset=["SPY"])
return df
#More slicing
def test_run():
#Define a date range
dates = pd.date_range('2010-01-01', '2010-12-31')
#Choose stock symbols to read
symbols = ['GOOG', 'IBM', 'GLD']
#SPY will be added in get_data()
#Get stock data
df = get_data(symbols, dates)
#Slice by row range (dates) using DataFrame.ix[] selector
print df.ix['2010-01-01':'2010-01-31']
#the month of January
- 繪制多只股票的圖形
def plot_data(df, title = 'Stock prices')
#plot stock prices
ax = df.plot(title = title)
#you can imagine it as an object, axis
ax.set_xlabel("Date")
ax.set_ylabel("Price")
#ax = df.plot(title = title, fontsize = 2) 可改字體
plt.show()
movement(變動):股票的相對漲跌
標(biāo)準化 Normalizing
best way to normalize price data so that all prices start at 1.0:
df1 = df1 / df1.ix[0]
or: df1 = df1 / df1.ix[0,i]
def normalize_data(df):
return df / df.ix[0,i]
- 切片和繪制兩只股票的圖形
def plot_selected(df, columns, start_index, end_index)
plot_data(df.ix[start_index:end_index, columns],
title = "Selected data")
pandas可處理帶有大量統(tǒng)計函數(shù)的ndarry