前段時間做leetcode題直接用到了python collections庫里的Counter類(計(jì)數(shù)字典)蛇受,自己騷操作了半天搞了一堆字典數(shù)組,還是沒有優(yōu)雅的達(dá)成目的宏蛉,最終屈服調(diào)用Counter類阵具,結(jié)果不需要我任何其他手段,內(nèi)置方法就搞定了噪叙,也勾起了我的好奇心,來看看Counter類的源碼霉翔。
兩百多行代碼不一下全貼睁蕾,有礙觀瞻,找些重要的研究一下
4
5 class Counter(dict):
6 '''Dict subclass for counting hashable items. Sometimes called a bag
7 or multiset. Elements are stored as dictionary keys and their counts
8 are stored as dictionary values.
9
可見Counter父類為dict
51 def __init__(self, iterable=None, **kwds):
52 '''Create a new, empty Counter object. And if given, count elements
53 from an input iterable. Or, initialize the count from another mapping
54 of elements to their counts.
55
56 >>> c = Counter() # a new, empty counter
57 >>> c = Counter('gallahad') # a new counter from an iterable
58 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
59 >>> c = Counter(a=4, b=2) # a new counter from keyword args
60
61 '''
62 super(Counter, self).__init__()
63 self.update(iterable, **kwds)
64
看到super,去研究了一番子眶,發(fā)現(xiàn)是調(diào)用父類方法瀑凝,比如這里的super就是調(diào)用了父類dict的init方法,即生成了一個字典臭杰,隨后用到了自己類的update方法粤咪,下面來看一下
116 def update(self, iterable=None, **kwds):
117 """ 更新計(jì)數(shù)器,其實(shí)就是增加渴杆;如果原來沒有寥枝,則新建,如果有則加一 """
118 '''Like dict.update() but add counts instead of replacing them.
119
120 Source can be an iterable, a dictionary, or another Counter instance.
121
122 >>> c = Counter('which')
123 >>> c.update('witch') # add elements from another iterable
124 >>> d = Counter('watch')
125 >>> c.update(d) # add elements from another counter
126 >>> c['h'] # four 'h' in which, witch, and watch
127
128 '''
129 # The regular dict.update() operation makes no sense here because the
130 # replace behavior results in the some of original untouched counts
131 # being mixed-in with all of the other counts for a mismash that
132 # doesn't have a straight-forward interpretation in most counting
133 # contexts. Instead, we implement straight-addition. Both the inputs
134 # and outputs are allowed to contain zero and negative counts.
135
136 if iterable is not None:
137 if isinstance(iterable, Mapping):
138 if self:
139 self_get = self.get
140 for elem, count in iterable.iteritems():
141 self[elem] = self_get(elem, 0) + count
142 else:
143 super(Counter, self).update(iterable) # fast path when counter is empty
144 else:
145 self_get = self.get
146 for elem in iterable:
147 self[elem] = self_get(elem, 0) + 1
148 if kwds:
149 self.update(kwds)
update算是該類的核心方法之一了将塑,判斷iterable變量是否傳入脉顿,用isinstance函數(shù)判斷是否是Mapping類(如dict)蝌麸,如果是這類數(shù)據(jù){:}点寥,就直接用mapping的elem值加上本身Counter[elem]值來更新,否則就是加一
71 def most_common(self, n=None):
72
73 '''List the n most common elements and their counts from the most
74 common to the least. If n is None, then list all element counts.
75
76 >>> Counter('abcdeabcdabcaba').most_common(3)
77 [('a', 5), ('b', 4), ('c', 3)]
78
79 '''
80 # Emulate Bag.sortedByCount from Smalltalk
81 if n is None:
82 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
83 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
84
又一個counter類的重要方法来吩,意義很明確敢辩,注意到這里利用iteritems()返回一個迭代器,指定key=_itemgetter(1)弟疆,即使用迭代器第二個域戚长,即counter的value來排序
不指定n時使用sorted函數(shù),指定n時則使用 _heapq.nlargest函數(shù)怠苔,這個之前沒有見過同廉,應(yīng)該是堆排序,度一下它的具體實(shí)現(xiàn)
heap數(shù)據(jù)類型源碼:https://github.com/python/cpython/tree/2.7/Lib/heapq.py
python3.5的heapq模塊代碼比2.7的要清晰很多柑司,本質(zhì)沒什么差別迫肖,主要方法都是建堆(最小堆與最大堆兩種方法),push攒驰,pop蟆湖,pushpop操作,在這些過程中調(diào)用sftup與siftdown方法玻粪,維護(hù)堆的性質(zhì)隅津,另外就是merge(合并兩個堆)以及常用到ksmallest與klargest方法,他們的操作時間都是O(klgk)劲室,加上建堆時間為O(nlgn)伦仍,總體時間復(fù)雜度為O(nlgn),看上去并沒有什么優(yōu)越性,然而在實(shí)際應(yīng)用中是快一些的很洋,回頭具體研究下這個問題充蓝。
由上可見,Counter類除了字典以外沒有使用什么復(fù)雜的數(shù)據(jù)結(jié)構(gòu)蹲缠,其most_common方法棺克,實(shí)際上是調(diào)用heap模塊進(jìn)行了O(nlgn)的操作悠垛,設(shè)計(jì)還是較為簡單清晰的,那么如果我直接在建字典的時候就對其維持堆的性質(zhì)娜谊,會不會效率比Counter更高呢确买?
剩余的Counter類代碼列在下面
85 def elements(self):
86 """ 計(jì)數(shù)器中的所有元素,注:此處非所有元素集合纱皆,而是包含所有元素集合的迭代器 """
87 '''Iterator over elements repeating each as many times as its count.
88
89 >>> c = Counter('ABCABC')
90 >>> sorted(c.elements())
91 ['A', 'A', 'B', 'B', 'C', 'C']
92
93 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
94 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
95 >>> product = 1
96 >>> for factor in prime_factors.elements(): # loop over factors
97 ... product *= factor # and multiply them
98 >>> product
99
100 Note, if an element's count has been set to zero or is a negative
101 number, elements() will ignore it.
102
103 '''
104 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
105 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
106
107 # Override dict methods where necessary
108
109 @classmethod
110 def fromkeys(cls, iterable, v=None):
111 # There is no equivalent method for counters because setting v=1
112 # means that no element can have a count greater than one.
113 raise NotImplementedError(
114 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
115
150
151 def subtract(self, iterable=None, **kwds):
152 """ 相減湾趾,原來的計(jì)數(shù)器中的每一個元素的數(shù)量減去后添加的元素的數(shù)量 """
153 '''Like dict.update() but subtracts counts instead of replacing them.
154 Counts can be reduced below zero. Both the inputs and outputs are
155 allowed to contain zero and negative counts.
156
157 Source can be an iterable, a dictionary, or another Counter instance.
158
159 >>> c = Counter('which')
160 >>> c.subtract('witch') # subtract elements from another iterable
161 >>> c.subtract(Counter('watch')) # subtract elements from another counter
162 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
163 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
164 -1
165
166 '''
167 if iterable is not None:
168 self_get = self.get
169 if isinstance(iterable, Mapping):
170 for elem, count in iterable.items():
171 self[elem] = self_get(elem, 0) - count
172 else:
173 for elem in iterable:
174 self[elem] = self_get(elem, 0) - 1
175 if kwds:
176 self.subtract(kwds)
177
178 def copy(self):
179 """ 拷貝 """
180 'Return a shallow copy.'
181 return self.__class__(self)
182
183 def __reduce__(self):
184 """ 返回一個元組(類型,元組) """
185 return self.__class__, (dict(self),)
186
187 def __delitem__(self, elem):
188 """ 刪除元素 """
189 'Like dict.__delitem__() but does not raise KeyError for missing values.'
190 if elem in self:
191 super(Counter, self).__delitem__(elem)
192
193 def __repr__(self):
194 if not self:
195 return '%s()' % self.__class__.__name__
196 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
197 return '%s({%s})' % (self.__class__.__name__, items)
198
199 # Multiset-style mathematical operations discussed in:
200 # Knuth TAOCP Volume II section 4.6.3 exercise 19
201 # and at http://en.wikipedia.org/wiki/Multiset
202 #
203 # Outputs guaranteed to only include positive counts.
204 #
205 # To strip negative and zero counts, add-in an empty counter:
206 # c += Counter()
207
208 def __add__(self, other):
209 '''Add counts from two counters.
210
211 >>> Counter('abbb') + Counter('bcc')
212 Counter({'b': 4, 'c': 2, 'a': 1})
213
214 '''
215 if not isinstance(other, Counter):
216 return NotImplemented
217 result = Counter()
218 for elem, count in self.items():
219 newcount = count + other[elem]
220 if newcount > 0:
221 result[elem] = newcount
222 for elem, count in other.items():
223 if elem not in self and count > 0:
224 result[elem] = count
225 return result
226
227 def __sub__(self, other):
228 ''' Subtract count, but keep only results with positive counts.
229
230 >>> Counter('abbbc') - Counter('bccd')
231 Counter({'b': 2, 'a': 1})
232
233 '''
234 if not isinstance(other, Counter):
235 return NotImplemented
236 result = Counter()
237 for elem, count in self.items():
238 newcount = count - other[elem]
239 if newcount > 0:
240 result[elem] = newcount
241 for elem, count in other.items():
242 if elem not in self and count < 0:
243 result[elem] = 0 - count
244 return result
245
246 def __or__(self, other):
247 '''Union is the maximum of value in either of the input counters.
248
249 >>> Counter('abbb') | Counter('bcc')
250 Counter({'b': 3, 'c': 2, 'a': 1})
251
252 '''
253 if not isinstance(other, Counter):
254 return NotImplemented
255 result = Counter()
256 for elem, count in self.items():
257 other_count = other[elem]
258 newcount = other_count if count < other_count else count
259 if newcount > 0:
260 result[elem] = newcount
261 for elem, count in other.items():
262 if elem not in self and count > 0:
263 result[elem] = count
264 return result
265
266 def __and__(self, other):
267 ''' Intersection is the minimum of corresponding counts.
268
269 >>> Counter('abbb') & Counter('bcc')
270 Counter({'b': 1})
271
272 '''
273 if not isinstance(other, Counter):
274 return NotImplemented
275 result = Counter()
276 for elem, count in self.items():
277 other_count = other[elem]
278 newcount = count if count < other_count else other_count
279 if newcount > 0:
280 result[elem] = newcount
281 return result
282
283 Counter