pandas庫安裝:
pip3 install pandas
Collecting pandas
Downloading https://files.pythonhosted.org/packages/78/78/50ef81a903eccc4e90e278a143c9a0530f05199f6221d2e1b21025852982/pandas-0.23.4-cp36-cp36m-macosx_10_6_intel.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (14.6MB)
100% |████████████████████████████████| 14.7MB 56kB/s
Requirement already satisfied: numpy>=1.9.0 in /Users/.virtualenvs/py3env/lib/python3.6/site-packages (from pandas) (1.15.4)
Collecting pytz>=2011k (from pandas)
Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ReadTimeoutError("HTTPSConnectionPool(host='pypi.org', port=443): Read timed out. (read timeout=15)",)': /simple/pytz/
Downloading https://files.pythonhosted.org/packages/61/28/1d3920e4d1d50b19bc5d24398a7cd85cc7b9a75a490570d5a30c57622d34/pytz-2018.9-py2.py3-none-any.whl (510kB)
100% |████████████████████████████████| 512kB 43kB/s
Collecting python-dateutil>=2.5.0 (from pandas)
Downloading https://files.pythonhosted.org/packages/74/68/d87d9b36af36f44254a8d512cbfc48369103a3b9e474be9bdfe536abfc45/python_dateutil-2.7.5-py2.py3-none-any.whl (225kB)
100% |████████████████████████████████| 235kB 26kB/s
Requirement already satisfied: six>=1.5 in /Users/.virtualenvs/py3env/lib/python3.6/site-packages (from python-dateutil>=2.5.0->pandas) (1.11.0)
Installing collected packages: pytz, python-dateutil, pandas
Successfully installed pandas-0.23.4 python-dateutil-2.7.5 pytz-2018.9
pandas的Series一維數(shù)組應(yīng)用方法
from pandas import Series, DataFrame
import pandas as pd
obj = Series([4, 5, 6, -7])#pandas一維數(shù)組定義
print(obj)
#輸出結(jié)果如下是帶索引一組數(shù)據(jù)
0 4
1 5
2 6
3 -7
dtype: int64
print( obj.index)
#輸出結(jié)果RangeIndex(start=0, stop=4, step=1)
print ( obj.values)
#輸出結(jié)果[ 4 5 6 -7]
字典中的key由哈希值生成唯一值不能修改,如果相同key值會導(dǎo)致對于value覆蓋桐罕;列表(['a'])和集合({'b'})不能作為字典key,因?yàn)閮?nèi)容會變化。
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'c', 'a'])# 定義帶索引字典
print(obj2)
#輸出結(jié)果如下
d 4
b 7
c -5
a 3
dtype: int64
obj2['c'] = 6# 可以直接給對應(yīng)索引給值
print(obj2)
# 輸出結(jié)果如下
d 4
b 7
c 6
a 3
dtype: int64
print ('f' in obj2)#可查找是否存在此索引
#輸出結(jié)果False
sdata = {
'beijing': 35000,
'shanghai': 71000,
'guangzhou': 16000,
'shenzhen': 5000}
obj3 = Series(sdata)#把字典轉(zhuǎn)換為一維數(shù)組
print( obj3)
#輸出結(jié)果如下
beijing 35000
shanghai 71000
guangzhou 16000
shenzhen 5000
dtype: int64
obj3.index = ['bj', 'gz', 'sh', 'sz']# 修改索引
print( obj3)
# 輸出結(jié)果如下
bj 35000
gz 71000
sh 16000
sz 5000
dtype: int64
pandas的DataFrame多維數(shù)組應(yīng)用方法
from pandas import Series, DataFrame
#字典中添加列表方式定義多維數(shù)據(jù)表格
data = {'city': ['shanghai', 'shanghai', 'shanghai', 'beijing', 'beijing'],
'year': [2016, 2017, 2018, 2017, 2018],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
print(frame)
#輸出結(jié)果如下
city year pop
0 shanghai 2016 1.5
1 shanghai 2017 1.7
2 shanghai 2018 3.6
3 beijing 2017 2.4
4 beijing 2018 2.9
frame2 = DataFrame(data, columns=['year', 'city', 'pop'])#自定義key值排列順序
print(frame2)
#輸出結(jié)果如下
year city pop
0 2016 shanghai 1.5
1 2017 shanghai 1.7
2 2018 shanghai 3.6
3 2017 beijing 2.4
4 2018 beijing 2.9
print(frame2['city'])#提取列值
#輸出結(jié)果如下
0 shanghai
1 shanghai
2 shanghai
3 beijing
4 beijing
Name: city, dtype: object
print(frame2.year)#提取列值另一種方法
#輸出結(jié)果如下
0 2016
1 2017
2 2018
3 2017
4 2018
Name: year, dtype: int64
frame2['new'] = 100#新增列
print(frame2)
#輸出結(jié)果如下
year city pop new
0 2016 shanghai 1.5 100
1 2017 shanghai 1.7 100
2 2018 shanghai 3.6 100
3 2017 beijing 2.4 100
4 2018 beijing 2.9 100
frame2['cap'] = frame2.city == 'beijing'#帶判斷新增列
print( frame2)
#輸出結(jié)果如下
year city pop new cap
0 2016 shanghai 1.5 100 False
1 2017 shanghai 1.7 100 False
2 2018 shanghai 3.6 100 False
3 2017 beijing 2.4 100 True
4 2018 beijing 2.9 100 True
#另一種字典中嵌套方式定義多維數(shù)據(jù)表格
pop = {'beijing': {2008: 1.5, 2009: 2.0},
'shanghai': {2008: 2.0, 2009: 3.6}
}
frame3 = DataFrame(pop)
print(frame3)
#輸出結(jié)果如下
beijing shanghai
2008 1.5 2.0
2009 2.0 3.6
print(frame3.T)#列行互換
#輸出結(jié)果如下
2008 2009
beijing 1.5 2.0
shanghai 2.0 3.6
obj4 = Series([4.5, 7.2, -5.3, 3.6], index=['b', 'd', 'c', 'a'])
obj5 = obj4.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)#調(diào)整索引順序并給新增列給0默認(rèn)值
print(obj5)
#輸出結(jié)果如下
a 3.6
b 4.5
c -5.3
d 7.2
e 0.0
dtype: float64
obj6 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
print( obj6.reindex(range(6),method='bfill'))#給空值添加默認(rèn)值功炮,‘ffill’按上補(bǔ)充值溅潜,‘bfill’按下補(bǔ)充值
#輸出結(jié)果如下
0 blue
1 purple
2 purple
3 yellow
4 yellow
5 NaN
dtype: object
from numpy import nan as NA #應(yīng)用空值
data = Series([1, NA, 2])#給空值
print(data.dropna())#刪除空值
#輸出結(jié)果如下
0 1.0
2 2.0
dtype: float64
data2 = DataFrame([[1., 6.5, 3], [1., NA, NA], [NA, NA, NA]
])
data2[4] = NA#給第4列給空值
print(data2)
#輸出結(jié)果如下
0 1 2 4
0 1.0 6.5 3.0 NaN
1 1.0 NaN NaN NaN
2 NaN NaN NaN NaN
print(data2.dropna(how='all'))#刪除整行為空的行
#輸出結(jié)果如下
0 1 2 4
0 1.0 6.5 3.0 NaN
1 1.0 NaN NaN NaN
print(data2.dropna(axis=1, how='all'))#刪除整列為空的列
#輸出結(jié)果如下
0 1 2
0 1.0 6.5 3.0
1 1.0 NaN NaN
2 NaN NaN NaN
data2.fillna(0)
print(data2.fillna(0, inplace=True))#填充缺失值為0,True為更新結(jié)果到data2
#輸出結(jié)果None
print(data2)#更新結(jié)果后輸出被修改
#輸出結(jié)果如下
0 1 2 4
0 1.0 6.5 3.0 0.0
1 1.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
層次化索引
import numpy as np
#建立兩層索引
data3 = Series(np.random.randn(10),
index=[['a', 'a', 'a', 'b', 'b', 'b', 'c', 'c', 'd', 'd'],
[1, 2, 3, 1, 2, 3, 1, 2, 2, 3]])
print (data3)
#輸出結(jié)果如下
a 1 -0.606962
2 -0.793390
3 0.515835
b 1 -0.269941
2 -0.613685
3 -0.078791
c 1 1.622026
2 -0.342152
d 2 -0.331359
3 0.719142
dtype: float64
print ( data3['b':'c'])#取索引對應(yīng)值
#輸出結(jié)果如下
b 1 0.024265
2 0.140279
3 1.465150
c 1 -1.049863
2 1.673730
dtype: float64
print(data3.unstack())#一維層次化索引轉(zhuǎn)換為二維dataframe數(shù)組
#輸出結(jié)果如下
1 2 3
a 0.052463 -0.868392 0.387425
b 0.041187 0.116177 -0.395136
c 0.585591 -0.465362 NaN
d NaN 0.586438 -0.140192
print(data3.unstack().stack())#還原一維層次化索引
#輸出結(jié)果如下
a 1 0.052463
2 -0.868392
3 0.387425
b 1 0.041187
2 0.116177
3 -0.395136
c 1 0.585591
2 -0.465362
d 2 0.586438
3 -0.140192
dtype: float64