文章作者:Tyan
博客:noahsnail.com ?|? CSDN ?|? 簡書
# pandas是一個用來進(jìn)行數(shù)據(jù)分析的基于numpy的庫
import pandas as pd
import numpy as np
# Series是一個一維的數(shù)據(jù)結(jié)構(gòu)
# 用list構(gòu)建Series
series1 = pd.Series([3, 5, 'test', -5, 0.3])
print series1
0 3
1 5
2 test
3 -5
4 0.3
dtype: object
# 用list, index構(gòu)建Series
series2 = pd.Series([3, 5, 'test', -5, 0.3], index = ['A', 'B', 'C', 'D', 'E'])
print series2
A 3
B 5
C test
D -5
E 0.3
dtype: object
# 通過dict構(gòu)建Series
companies = {'Baidu': 400, 'Alibaba': 500, 'Tecent': 600, 'Jingdong': 300}
series3 = pd.Series(companies)
print series3
Alibaba 500
Baidu 400
Jingdong 300
Tecent 600
dtype: int64
# Series數(shù)據(jù)選擇
# 通過index選擇數(shù)據(jù)
print series3['Baidu']
# 選擇多個數(shù)據(jù)
print series3[['Baidu', 'Tecent']]
400
Baidu 400
Tecent 600
dtype: int64
# 根據(jù)條件選擇數(shù)據(jù)
print series3[series3 < 500]
Baidu 400
Jingdong 300
dtype: int64
# 條件選擇原理
print series3 < 500
temp = series3 < 500
print series3[temp]
Alibaba False
Baidu True
Jingdong True
Tecent False
dtype: bool
Baidu 400
Jingdong 300
dtype: int64
# Series元素賦值
print 'old value: ', series3['Baidu']
series3['Baidu'] = 450
print 'new value: ', series3['Baidu']
old value: 400
new value: 450
# 根據(jù)條件賦值
print 'old series: '
print series3
series3[series3 < 500] = 500
print 'new series: '
print series3
old series:
Alibaba 500
Baidu 400
Jingdong 300
Tecent 600
dtype: int64
new series:
Alibaba 500
Baidu 500
Jingdong 500
Tecent 600
dtype: int64
# Series數(shù)學(xué)運算
print 'Division: '
print series3 / 2
print 'Square: '
print series3 ** 2
print np.square(series3)
Division:
Alibaba 250.0
Baidu 250.0
Jingdong 250.0
Tecent 300.0
dtype: float64
Square:
Alibaba 250000
Baidu 250000
Jingdong 250000
Tecent 360000
dtype: int64
Alibaba 250000
Baidu 250000
Jingdong 250000
Tecent 360000
dtype: int64
# 定義新的Series, 公司人數(shù)
people = {'Baidu': 50000, 'Alibaba': 45000, 'Tecent': 60000, 'Jingdong': 80000, 'Netease': 30000}
series4 = pd.Series(people)
print series4
Alibaba 45000
Baidu 50000
Jingdong 80000
Netease 30000
Tecent 60000
dtype: int64
# Series相加, series3沒有Netease, 因此結(jié)果為NaN
print series3 + series4
Alibaba 45500.0
Baidu 50500.0
Jingdong 80500.0
Netease NaN
Tecent 60600.0
dtype: float64
# 判斷數(shù)據(jù)是否數(shù)據(jù)缺失
print 'Netease' in series3
print 'Baidu' in series3
False
True
# 找出數(shù)據(jù)為null或非null的元素
result = series3 + series4
print result.notnull()
print result.isnull()
print result[result.isnull()]
print result[result.isnull() != True]
Alibaba True
Baidu True
Jingdong True
Netease False
Tecent True
dtype: bool
Alibaba False
Baidu False
Jingdong False
Netease True
Tecent False
dtype: bool
Netease NaN
dtype: float64
Alibaba 45500.0
Baidu 50500.0
Jingdong 80500.0
Tecent 60600.0
dtype: float64