賦值為NaN
t = pd.DataFrame(np.arange(12).reshape(3,4))
#賦值為NaN
t.loc[:1,2:] = np.nan
print(t)
>>>
0 1 2 3
0 0 1 NaN NaN
1 4 5 NaN NaN
2 8 9 10.0 11.0
判斷是不是NaN
#判斷哪些數(shù)據(jù)是NaN,方式1
print(pd.isnull(t))#True表示NaN
>>>
0 1 2 3
0 False False True True
1 False False True True
2 False False False False
#判斷哪些數(shù)據(jù)是NaN,方式2
print(pd.notnull(t))#False表示NaN
>>>
0 1 2 3
0 True True False False
1 True True False False
2 True True True True
#獲取第2列中不為NaN的數(shù)據(jù)
print(pd.notnull(t[2]))
>>>
0 False
1 False
2 True
Name: 2, dtype: bool
print(type(pd.notnull(t[2])))
>>>
<class 'pandas.core.series.Series'>
print(t[pd.notnull(t[2])])
>>>
0 1 2 3
2 8 9 10.0 11.0
刪除NaN
#axis=0表示刪除行掺炭,這里是刪除有NaN數(shù)據(jù)所在的行
print(t.dropna(axis=0))#t.dropna(axis=0,how="any")等價(jià)彻桃,只要行有NaN就刪除該行
>>>
0 1 2 3
2 8 9 10.0 11.0
print(t.dropna(axis=0,subset=[2]))#刪除第2列NaN數(shù)據(jù)所在的行盏檐,和print(t[pd.notnull(t[2])])效果一樣
print(t.dropna(axis=0,how="all"))#如果需要直接修改t本身篱昔,加參數(shù)inplace=True即可崖技。該行全部都是NaN才刪除
>>>
0 1 2 3
0 0 1 NaN NaN
1 4 5 NaN NaN
2 8 9 10.0 11.0
填充NaN
print(t.fillna(100))#使用數(shù)字100填充NaN
>>>
0 1 2 3
0 0 1 100.0 100.0
1 4 5 100.0 100.0
2 8 9 10.0 11.0
print(t.fillna(t.mean()))#使用NaN所在列的均值填充竭讳。計(jì)算均值時(shí),NaN數(shù)據(jù)不計(jì)數(shù)
>>>
0 1 2 3
0 0 1 10.0 11.0
1 4 5 10.0 11.0
2 8 9 10.0 11.0
#用200填充第二列的NaN
tmp = t[2].fillna(200)#Series類型
print(tmp)
>>>
0 200.0
1 200.0
2 10.0
Name: 2, dtype: float64
t[2] = tmp
print(t)
>>>
0 1 2 3
0 0 1 200.0 NaN
1 4 5 200.0 NaN
2 8 9 10.0 11.0