基本
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
1料皇、創(chuàng)建數(shù)據(jù)框
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})
2、查看數(shù)據(jù)
df.head()
df.tail(3)
df.describe()
df.T
df.sort_index(axis=1, ascending=False) # 按照行名星压、列名排列
df.sort_values(by='B')
3践剂、選擇
# .loc()
# .iloc()
4、缺失值
df['E'] = [1, 2, np.nan, 1, 2, np.nan]
df.dropna(how='any')
df.fillna(value=5)
pd.isnull(df)
5娜膘、運(yùn)用
df.apply(np.cumsum)
6逊脯、交并
df = pd.DataFrame(np.random.randn(10, 4))
pieces = [df[:3], df[3:7], df[7:]]
pd.concat(pieces, axis=0)
pd.concat([df, df], axis=1)
pd.merge(left, right, on='key')
left.merge(right, on='key')
7、groupby
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})
df.groupby('A').sum()
df.groupby(['A', 'B']).sum()
8竣贪、重組數(shù)據(jù)框
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df2 = df[:4]
stacked = df2.stack()
stacked.unstack()
stacked.unstack(1)
stacked.unstack(0)
Pivot Tables
9军洼、時(shí)間序列
rng = pd.date_range('1/1/2012', periods=100, freq='S') # 按秒進(jìn)行
rng2 = pd.date_range('3/6/2012 00:00', periods=5, freq='D') # 按天進(jìn)行
rng3 = pd.date_range('1/1/2012', periods=5, freq='M') # 按月進(jìn)行,保留天數(shù)
ts = pd.Series(np.random.randn(len(rng3)), index=rng3)
ps = ts.to_period() # 天變?yōu)樵卵菰酰瑑H保留月數(shù)
ps.to_timestamp() # 月變?yōu)樘?prng = pd.period_range('1990Q1', '2000Q4', freq='Q-NOV') # 按季度進(jìn)行
ts = pd.Series(np.random.randn(len(prng)), prng)
ts.index = (prng.asfreq('M', 'e') + 1).asfreq('H', 's') + 9 # 季度轉(zhuǎn)化為日期匕争,指定時(shí)間
10、Categoricals 分類(lèi)的使用
df = pd.DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
df["grade"] = df["raw_grade"].astype("category")
df["grade"].cat.categories = ["very good", "good", "very bad"]
df["grade"] = df["grade"].cat.set_categories(["very bad", "bad", "medium", "good", "very good"])
df.sort_values(by="grade")df.groupby("grade").size()
11爷耀、畫(huà)圖
ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))
ts = ts.cumsum()
ts.plot()
df = pd.DataFrame(np.random.randn(1000, 4), index=ts.index, columns=['A', 'B', 'C', 'D'])
df = df.cumsum()df.plot(); plt.legend(loc='best')