Python CookBook總結(jié)
保留最后N 個(gè)元素
使用deque(maxlen=N) 構(gòu)造函數(shù)會(huì)新建一個(gè)固定大小的隊(duì)列。當(dāng)新的元素加入并且這個(gè)隊(duì)列已滿的時(shí)候葫笼,最老的元素會(huì)自動(dòng)被移除掉。
from collections import deque
def search(lines, pattern, history=5):
previous_lines = deque(maxlen=history)
for li in lines:
if pattern in li:
yield li, previous_lines
previous_lines.append(li)
怎樣從一個(gè)集合中獲得最大或者最小的N 個(gè)元素列表奕谭?
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums)) # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
怎樣實(shí)現(xiàn)一個(gè)鍵對(duì)應(yīng)多個(gè)值的字典(也叫multidict )撵幽?
from collections import defaultdict
d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
d = defaultdict(set)
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)
d = defaultdict(list)
for key, value in pairs:
d[key].append(value)
字典排序斯稳,你想創(chuàng)建一個(gè)字典,并且在迭代或序列化這個(gè)字典的時(shí)候能夠控制元素的順序庄萎。
from collections import OrderedDict
def ordered_dict():
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
# Outputs "foo 1", "bar 2", "spam 3", "grok 4"
for key in d:
print(key, d[key])
字典運(yùn)算踪少,怎樣在數(shù)據(jù)字典中執(zhí)行一些計(jì)算操作(比如求最小值、最大值糠涛、排序等等)援奢?
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
#為了對(duì)字典值執(zhí)行計(jì)算操作,通常需要使用zip() 函數(shù)先將鍵和值
#反轉(zhuǎn)過來忍捡。比如集漾,下面是查找最小和最大股票價(jià)格和股票值的代碼:
min_price = min(zip(prices.values(), prices.keys()))
# min_price is (10.75, 'FB')
max_price = max(zip(prices.values(), prices.keys()))
# max_price is (612.78, 'AAPL')
prices_sorted = sorted(zip(prices.values(), prices.keys()))
# prices_sorted is [(10.75, 'FB'), (37.2, 'HPQ'),
# (45.23, 'ACME'), (205.55, 'IBM'),
# (612.78, 'AAPL')]
#注意:zip() 函數(shù)創(chuàng)建的是一個(gè)只能訪問一次的迭代器
查找兩字典的相同點(diǎn)
#python2.7需將列表加上set()轉(zhuǎn)為集合
a = {'x' : 1,'y' : 2,'z' : 3}
b = {'w' : 10,'x' : 11,'y' : 2}
# Find keys in common
a.keys() & b.keys() # { 'x', 'y' }
# Find keys in a that are not in b
a.keys() - b.keys() # { 'z' }
# Find (key,value) pairs in common
a.items() & b.items() # { ('y', 2) }
#這些操作也可以用于修改或者過濾字典元素切黔。比如,假如你
#想以現(xiàn)有字典構(gòu)造一個(gè)排除幾個(gè)指定鍵的新字典帆竹。下面利用字典推導(dǎo)來實(shí)現(xiàn)這樣的需求:
# Make a new dictionary with certain keys removed
c = {key:a[key] for key in a.keys() - {'z', 'w'}}
# c is {'x': 1, 'y': 2}
命名切片
#內(nèi)置的slice() 函數(shù)創(chuàng)建了一個(gè)切片對(duì)象
>>> a = slice(5, 50, 2)
>>> a.start
5
>>> a.stop
50
>>> a.step
2
record = '....................100 .......513.25 ..........'
SHARES = slice(20, 23)
PRICE = slice(31, 37)
cost = int(record[SHARES]) * float(record[PRICE])
#你還能通過調(diào)用切片的indices(size) 方法將它映射到一個(gè)確定大小的序
#列上绕娘,這個(gè)方法返回一個(gè)三元組(start, stop, step) ,所有值都會(huì)
#被合適的縮小以滿足邊界限制栽连,從而使用的時(shí)候避免出現(xiàn)IndexError 異常
>>> s = 'HelloWorld'
>>> a.indices(len(s))
(5, 10, 2)
>>> for i in range(*a.indices(len(s))):
... print(s[i])
序列中出現(xiàn)次數(shù)最多的元素
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
# 出現(xiàn)頻率最高的3 個(gè)單詞
top_three = word_counts.most_common(3)
print(top_three)
# Outputs [('eyes', 8), ('the', 5), ('look', 4)]
#Counter 實(shí)例一個(gè)鮮為人知的特性是它們可以很容易的跟數(shù)學(xué)運(yùn)算操作相結(jié)合险领。比如:
>>> a = Counter(words)
>>> b = Counter(morewords)
>>> a
Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2,
"you're": 1, "don't": 1, 'under': 1, 'not': 1})
>>> b
Counter({'eyes': 1, 'looking': 1, 'are': 1, 'in': 1, 'not': 1, 'you': 1,
'my': 1, 'why': 1})
>>> # Combine counts
>>> c = a + b
>>> c
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2,
'around': 2, "you're": 1, "don't": 1, 'in': 1, 'why': 1,
'looking': 1, 'are': 1, 'under': 1, 'you': 1})
>>> # Subtract counts
>>> d = a - b
>>> d
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2,
"you're": 1, "don't": 1, 'under': 1})
>>>
通過某個(gè)關(guān)鍵字排序一個(gè)字典列表
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))
print(rows_by_fname)
print(rows_by_uid)
#itemgetter() 有時(shí)候也可以用lambda 表達(dá)式代替
rows_by_fname = sorted(rows, key=lambda r: r['fname'])
rows_by_lfname = sorted(rows, key=lambda r: (r['lname'],r['fname']))
>>> min(rows, key=itemgetter('uid'))
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
>>> max(rows, key=itemgetter('uid'))
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
>>>
通過某個(gè)字段將記錄分組
你有一個(gè)字典或者實(shí)例的序列,然后你想根據(jù)某個(gè)特定的字段比如date 來分組迭代訪問秒紧。
#itertools.groupby() 函數(shù)對(duì)于這樣的數(shù)據(jù)分組操作非常實(shí)用
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
from operator import itemgetter
from itertools import groupby
# Sort by the desired field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
print(date)
for i in items:
print(' ', i)
運(yùn)行結(jié)果:
07/01/2012
{'date': '07/01/2012', 'address': '5412 N CLARK'}
{'date': '07/01/2012', 'address': '4801 N BROADWAY'}
07/02/2012
{'date': '07/02/2012', 'address': '5800 E 58TH'}
{'date': '07/02/2012', 'address': '5645 N RAVENSWOOD'}
{'date': '07/02/2012', 'address': '1060 W ADDISON'}
07/03/2012
{'date': '07/03/2012', 'address': '2122 N CLARK'}
07/04/2012
{'date': '07/04/2012', 'address': '5148 N CLARK'}
{'date': '07/04/2012', 'address': '1039 W GRANVILLE'}
過濾序列元素
#最簡(jiǎn)單的過濾序列元素的方法就是使用列表推導(dǎo)绢陌。比如:
>>> mylist = [1, 4, -5, 10, -7, 2, 3, -1]
>>> [n for n in mylist if n > 0]
[1, 4, 10, 2, 3]
>>> [n for n in mylist if n < 0]
[-5, -7, -1]
>>>
#使用列表推導(dǎo)的一個(gè)潛在缺陷就是如果輸入非常大的時(shí)候會(huì)產(chǎn)生一個(gè)非常大的結(jié)果集,占用大量?jī)?nèi)存熔恢。如果你對(duì)內(nèi)存比較敏感脐湾,那么你可以使生成器表達(dá)式迭代產(chǎn)生過濾的元素。比如:
>>> pos = (n for n in mylist if n > 0)
>>> pos
<generator object <genexpr> at 0x1006a0eb0>
>>> for x in pos:
... print(x)
...
14
10
23
>>>
#filter函數(shù)
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
try:
x = int(val)
return True
except ValueError:
return False
ivals = list(filter(is_int, values))
print(ivals)
# Outputs ['1', '2', '-3', '4', '5']
從字典中提取子集
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}#
Make a dictionary of all prices over 200
p1 = {key: value for key, value in prices.items() if value > 200}
# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
p2 = {key: value for key, value in prices.items() if key in tech_names}
映射名稱到序列元素
>>> from collections import namedtuple
>>> Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
>>> sub = Subscriber('jonesy@example.com', '2012-10-19')
>>> sub
Subscriber(addr='jonesy@example.com', joined='2012-10-19')
>>> sub.addr
'jonesy@example.com'
>>> sub.joined
'2012-10-19'
>>>
轉(zhuǎn)換并同時(shí)計(jì)算數(shù)據(jù),一個(gè)非常優(yōu)雅的方式去結(jié)合數(shù)據(jù)計(jì)算與轉(zhuǎn)換就是使用一個(gè)生成器表達(dá)式參數(shù)
nums = [1, 2, 3, 4, 5]
s = sum(x * x for x in nums)
# Determine if any .py files exist in a directory
import os
files = os.listdir('dirname')
if any(name.endswith('.py') for name in files):
print('There be python!')
else:
print('Sorry, no python.')
# Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(','.join(str(x) for x in s))
# Data reduction across fields of a data structure
portfolio = [
{'name':'GOOG', 'shares': 50},
{'name':'YHOO', 'shares': 75},
{'name':'AOL', 'shares': 20},
{'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
合并多個(gè)字典或映射
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
from collections import ChainMap
c = ChainMap(a,b)
print(c['x']) # Outputs 1 (from a)
print(c['y']) # Outputs 2 (from b)
print(c['z']) # Outputs 3 (from a)