利用Python進(jìn)行分析-Chapter 3
Python內(nèi)置數(shù)據(jù)類型結(jié)構(gòu):Tuple健提、List智蝠、dict、set
-
Tuple(元組)
-
元組的初始化
tup = 4, 5, 6 nested_tup = (4, 5, 6), (7, 8)
-
List封救、string轉(zhuǎn)化為Tuple
tuple([4, 0, 2]) tup = tuple('string')
-
支持索引訪問(wèn)
tup[0]
-
一旦被定義后就無(wú)法修改其中元素
tup = tuple(['foo', [1, 2], True]) tup[2] = False (會(huì)報(bào)錯(cuò))
-
如果其中某個(gè)元素是一個(gè)列表,那么其中那個(gè)列表的元素可以修改
tup[1].append(3) -> ('foo', [1, 2, 3], True)
-
Tuple支持連接
(4, None, 'foo') + (6, 0) + ('bar',) -> (4, None, 'foo', 6, 0, 'bar')
-
Tunple支持乘法操作熙参,其實(shí)際效果是重復(fù)相加
('foo', 'bar') * 4 -> ('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')
-
Tunple支持解壓操作
tup = (4, 5, 6) a, b, c = tup #a, b, c就對(duì)應(yīng)Tunple的三個(gè)元素 b, a = a, b #所以Tunple下值的交換是很方便的,如此a,b即完成了值的交換 seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] for a, b, c in seq: print('a={0}, b={1}, c={2}'.format(a, b, c)) #a,b,c就對(duì)應(yīng)列表中某一元素的三個(gè)值
-
Tunple支持高級(jí)解壓縮
values = 1, 2, 3, 4, 5 a, b, *rest = values # rest=[3,4,5] 意味著可以支持取出任意長(zhǎng)度的后續(xù)字符
-
Tunle不支持修改麦备,因而支持眾多的查詢操作
a = (1, 2, 2, 2, 3, 4, 2) a.count(2) = 4
-
-
List(列表)
-
List定義
a_list = [2, 3, 7, None]
-
tuple轉(zhuǎn)化為list
tup = ('foo', 'bar', 'baz') b_list = list(tup)
-
list支持修改元素
b_list[1] = 'peekaboo'
-
list可以取出迭代器元素
gen = range(10) list(gen) = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
-
增刪元素
b_list.append('dwarf') #在末尾添加 b_list.insert(1, 'red') #在指定位置添加(插隊(duì)),被頂?shù)哪莻€(gè)元素往后排一下 insert比較耗時(shí)昭娩,如果你需要在頭尾都插入元素凛篙,則要考慮collections.deque和double-ended queue b_list.pop(2) #刪除第二個(gè)位置的元素 b_list.remove('foo') #刪除值為“foo”的元素
-
判斷元素是否在list中
'dwarf' in b_list / 'dwarf' not in b_list
-
List相連接
[4, None, 'foo'] + [7, 8, (2, 3)] x = [4, None, 'foo'] x.extend([7, 8, (2, 3)]) # extend函數(shù)也能起到連接的作用,extend函數(shù)要花費(fèi)更少的時(shí)間 everything = [] for chunk in list_of_lists: everything.extend(chunk)
-
List內(nèi)部排序
a = [7, 2, 5, 1, 3] a.sort() #a被排序栏渺,sort中有一個(gè)參數(shù)呛梆,如sort(key=len),即按照字符串的長(zhǎng)度排序 import bisect bisect.bisect(a,2) #對(duì)**已排序的列表**插入一個(gè)元素磕诊,返回的是插入的位置填物,并不真正插入 bisect.insortb(a,2) #真正插入一個(gè)值
-
List切片
seq = [7, 2, 3, 7, 5, 6, 0, 1] seq[1:5] = [2, 3, 7, 5] #前閉后開(kāi)的取值 seq[3:4] = [6, 3] #通過(guò)切片集中修改某些位置的值 seq[:5] seq[3:] #列表首尾皆可忽 seq[-6:-2] #可以符號(hào)反向切片,還是左開(kāi)右閉 seq[::2] #還可以指定步伐霎终。前進(jìn)兩步從頭選到尾 seq[::-1] #如此便可以反轉(zhuǎn)整個(gè)列表滞磺,是非常聰明的方式**
-
-
內(nèi)置的sequence方案
-
enumerate(枚舉)
for i,value in enumerate(a): print(i,value)
enumerate中mapping的妙用
some_list = ['foo', 'bar', 'baz'] mapping = {} for i, v in enumerate(some_list): mapping[v] = i
-
sorted(排序,支持排序參數(shù)莱褒,返回的是一個(gè)新的列表)
a= [2,6,5,4,7] sorted(a) #a不變击困,輸出是排序后的 sorted('horse race') #對(duì)字符的ASCII碼進(jìn)行排序
-
zip(打包功能,看代碼)
seq1 = ['foo', 'bar', 'baz'] seq2 = ['one', 'two', 'three'] zipped = zip(seq1, seq2) list(zipped) #[('foo', 'one'), ('bar', 'two'), ('baz', 'three')] seq3 = [False, True] list(zip(seq1, seq2, seq3)) #[('foo', 'one', False), ('bar', 'two', True)] #結(jié)果取決于最短的那個(gè)seq #### 與enumerate結(jié)合 for i, (a, b) in enumerate(zip(seq1, seq2)): print('{0}: {1}, {2}'.format(i, a, b)) #### 巧用zip實(shí)現(xiàn)unzip pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens')] first_names, last_names = zip(*pitchers) #first_names =('Nolan', 'Roger')
-
反轉(zhuǎn)sequence
list(reversed(range(10))) #[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
-
-
dict(字典)
-
初始化
empty_dict = {} d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]} d1[7] = 'an integer'
-
判斷是否在dict
'b' in d1
-
刪除dict元素
del d1[5] #帶一個(gè)key d1.pop('dummy') #返回的是一個(gè)鍵值阅茶,d1中刪除了這個(gè)鍵值
-
更新dict內(nèi)的鍵值,主要是值
list(d1.keys()) list(d1.values()) #key與value的順序一致的 d1.update({'b' : 'foo', 'c' : 12}) #
-
可以使用二元tunple來(lái)指定一個(gè)dict
mapping = dict(zip(range(5), reversed(range(5))))
-
設(shè)置默認(rèn)值
value = some_dict.get(key, default_value) for word in words: letter = word[0] by_letter.setdefault(letter, []).append(word) #{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']} from collections import defaultdict by_letter = defaultdict(list) for word in words: by_letter[word[0]].append(word)
-
散列性
hash('string') hash((1, 2, (2, 3))) hash((1, 2, [2, 3])) # wrong d[tuple([1, 2, 3])] = 5 # {(1, 2, 3): 5} 可hash
-
-
set(沒(méi)有值的字典)
set([2, 2, 2, 1, 3, 3]) #{1, 2, 3} a = {1, 2, 3, 4, 5} b = {3, 4, 5, 6, 7, 8} a.union(b) #或者a | b a.intersection(b) #或者a & b c = a.copy() ###set內(nèi)元素順序不一致谅海,并不影響set的相等
-
高階用法
-
優(yōu)雅化
# string strings = ['a', 'as', 'bat', 'car', 'dove', 'python'] [x.upper() for x in strings if len(x) > 2] #dict loc_mapping = {val : index for index, val in enumerate(strings)} #set unique_lengths = {len(x) for x in strings} set(map(len, strings))
-
嵌套優(yōu)雅
#list names_of_interest = [] for names in all_data: enough_es = [name for name in names if name.count('e') >= 2] names_of_interest.extend(enough_es) result = [name for names in all_data for name in names if name.count('e') >= 2] #set some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)] flattened = [x for tup in some_tuples for x in tup]#[1, 2, 3, 4, 5, 6, 7, 8, 9] [[x for x in tup] for tup in some_tuples] #[[1, 2, 3], [4, 5, 6], [7, 8, 9]] #注意區(qū)別
-
-
函數(shù)
-
基本函數(shù)定義
def my_function(x, y, z=1.5): # 采用默認(rèn)值的 if z > 1: return z * (x + y) else: return z / (x + y) ## 函數(shù)的多種調(diào)用方式 my_function(5, 6, z=0.7) my_function(3.14, 7, 3.5) my_function(10, 20) my_function(x=5, y=6, z=7) my_function(y=6, x=5, z=7)
-
函數(shù)的作用域:全局與本地
def bind_a_variable(): #本地內(nèi)變量定義為全局脸哀,但是不鼓勵(lì)使用這類變量 global a a = [] bind_a_variable()
-
python函數(shù)可以返回多個(gè)值
def f(): a = 5 b = 6 c = 7 return a, b, c # return {'a' : a, 'b' : b, 'c' : c} 返回一個(gè)字典 a, b, c = f() return_value = f() #return_value就是一個(gè)三元元組
-
做一個(gè)object
states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda', 'south carolina##', 'West virginia?'] ## 常規(guī)操作 import re def clean_strings(strings): result = [] for value in strings: value = value.strip() value = re.sub('[!#?]', '', value) value = value.title() result.append(value) return result ## 把function當(dāng)成一個(gè)object def remove_punctuation(value): return re.sub('[!#?]', '', value) clean_ops = [str.strip, remove_punctuation, str.title] def clean_strings(strings, ops): result = [] for value in strings: for function in ops: value = function(value) result.append(value) return result #clean_strings(states, clean_ops) # map function for x in map(remove_punctuation, states) print(x)
-
lambda函數(shù)
def short_function(x): return x * 2 # equiv_anon = lambda x: x * 2 def apply_to_list(some_list, f): return [f(x) for x in some_list] ints = [4, 0, 1, 5, 6] apply_to_list(ints, lambda x: x * 2) strings = ['foo', 'card', 'bar', 'aaaa', 'abab'] strings.sort(key=lambda x: len(set(list(x))))#['aaaa', 'foo', 'abab', 'bar', 'card'] def add_numbers(x, y): return x + y add_five = lambda y: add_numbers(5, y) from functools import partial add_five = partial(add_numbers, 5)
-
生成器
def squares(n=10): print('Generating squares from 1 to {0}'.format(n ** 2)) for i in range(1, n + 1): yield i ** 2 # 優(yōu)雅:gen = (x ** 2 for x in range(100)) 生成器 # sum(x ** 2 for x in range(100)) # dict((i, i **2) for i in range(5)) gen = squares() for x in gen: print(x, end=' ') import itertools first_letter = lambda x: x[0] names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven'] for letter, names in itertools.groupby(names, first_letter): print(letter, list(names)) # names is a generator ''' A ['Alan', 'Adam'] W ['Wes', 'Will'] A ['Albert'] S ['Steven'] '''
[圖片上傳失敗...(image-e82e5-1579059046663)]
-
報(bào)錯(cuò)
def attempt_float(x): try: return float(x) except: # except (TypeError, ValueError): return x # 不論正確與否,都要繼續(xù)執(zhí)行 f = open(path, 'w') try: write_to_file(f) finally: f.close() # 整套流程 f = open(path, 'w') try: write_to_file(f) except: print('Failed') else: print('Succeeded') finally: f.close()
-
-
文件操作
path = 'examples/segismundo.txt' f = open(path) #默認(rèn)為“r” 扭吁,read-only打開(kāi) #w撞蜂,創(chuàng)建新文件盲镶,如當(dāng)前路徑有再覆蓋;x谅摄,創(chuàng)建但不覆蓋如有就報(bào)錯(cuò) lines = [x.rstrip() for x in open(path)] # rstrip去掉字符串后面的符號(hào)(默認(rèn)為空格) f.close() # 一個(gè)簡(jiǎn)單的方式 with open(path) as f: lines = [x.rstrip() for x in f] #這個(gè)會(huì)自動(dòng)關(guān)閉f #讀取 f = open(path) f.read(10) f.tell() #告知當(dāng)前讀的位置 import sys sys.getdefaultencoding() # 'utf-8' f.seek(3) # 3 f.read(1) #定義到當(dāng)前位置3徒河,再讀第3個(gè)數(shù) # 寫入到文件,按行寫 with open('tmp.txt', 'w') as handle: handle.writelines(x for x in open(path) if len(x) > 1) with open('tmp.txt') as f: lines = f.readlines() # 對(duì)文件解碼 data b'Sue\xc3\xb1a el ' data.decode('utf8') # data[:4].decode('utf8') 不完整則不行送漠,會(huì)報(bào)錯(cuò)
[圖片上傳失敗...(image-d399cd-1579059046663)]