import pandas as pd
df = pd.DataFrame({'a': [10, 20, 30],
'b': [20, 30, 40]})
print(df)
'''
a b
0 10 20
1 20 30
2 30 40
'''
# avg_2函數(shù)內部的計算本質上是向量化的
def avg_2(x, y):
return (x + y)/2
print(avg_2(df['a'], df['b']))
'''
0 15.0
1 25.0
2 35.0
dtype: float64
'''
import numpy as np
# 非向量化計算
def avg_2_mod(x, y):
if x == 20:
return np.NaN
else:
return (x + y)/2
print(avg_2_mod(df['a'], df['b']))
# 報錯
# ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
# 輸入單個值正常工作
print(avg_2_mod(10, 20))
# 15.0
np.vectorize 函數(shù)
對于非向量化的函數(shù)河质,使用np.vectorize創(chuàng)建新函數(shù)實現(xiàn)向量化(對于沒有某個函數(shù)的源代碼時)
avg_2_mod_vec = np.vectorize(avg_2_mod)
print(avg_2_mod_vec(df['a'], df['b']))
# [15. nan 35.]
python裝飾器把函數(shù)向量化夹厌,無需創(chuàng)建新函數(shù)(對于函數(shù)是自定義的)
@np.vectorize
def avg_2_mod(x, y):
if x == 20:
return np.NaN
else:
return (x + y)/2
print(avg_2_mod(df['a'], df['b']))
# [15. nan 35.]
lambda 函數(shù)
當函數(shù)相當簡單時伶氢,可以寫在apply方法中
# 編寫一個模式她渴,從數(shù)據(jù)行中提取所有字母堕仔,并把它們賦給新的列name呼渣。
import regex
p = regex.compile('\w+\s+\w+')
def get_name(s):
return p.match(s).group()
docs = pd.read_csv('data/doctors.csv', header=None)
docs['name_func'] = docs[0].apply(get_name)
print(docs)
'''
0 name_func
0 William Hartnell (1963-66) William Hartnell
1 Patrick Troughton (1966-69) Patrick Troughton
2 Jon Pertwee (1970 74) Jon Pertwee
3 Tom Baker (1974-81) Tom Baker
4 Peter Davison (1982-84) Peter Davison
5 Colin Baker (1984-86) Colin Baker
6 Sylvester McCoy (1987-89) Sylvester McCoy
7 Paul McGann (1996) Paul McGann
8 Christopher Eccleston (2005) Christopher Eccleston
9 David Tennant (2005-10) David Tennant
10 Matt Smith (2010-13) Matt Smith
11 Peter Capaldi (2014-2017) Peter Capaldi
12 Jodie Whittaker (2017) Jodie Whittaker
'''
# 函數(shù)簡單棘伴,直接寫在apply中,lambda會將整列或整行作為第一個參數(shù)
docs['name_lamb'] = docs[0].apply(lambda x: p.match(x).group())
print(docs)
'''
0 name_func name_lamb
0 William Hartnell (1963-66) William Hartnell William Hartnell
1 Patrick Troughton (1966-69) Patrick Troughton Patrick Troughton
2 Jon Pertwee (1970 74) Jon Pertwee Jon Pertwee
3 Tom Baker (1974-81) Tom Baker Tom Baker
4 Peter Davison (1982-84) Peter Davison Peter Davison
5 Colin Baker (1984-86) Colin Baker Colin Baker
6 Sylvester McCoy (1987-89) Sylvester McCoy Sylvester McCoy
7 Paul McGann (1996) Paul McGann Paul McGann
8 Christopher Eccleston (2005) Christopher Eccleston Christopher Eccleston
9 David Tennant (2005-10) David Tennant David Tennant
10 Matt Smith (2010-13) Matt Smith Matt Smith
11 Peter Capaldi (2014-2017) Peter Capaldi Peter Capaldi
12 Jodie Whittaker (2017) Jodie Whittaker Jodie Whittaker
'''