python實(shí)現(xiàn)的Huffman coding,給26個英文字母編碼请垛,inspired by Dave. 他只給出了Huffman tree的構(gòu)建步清,并將walk_tree留給了提問者自己完成辆它。我將walk_tree實(shí)現(xiàn)了一下并輸出結(jié)果,做個記錄堡赔,也順便分享給有需要的同學(xué)。
import queue
class HuffmanNode(object):
def __init__(self, left=None, right=None, root=None):
self.left = left
self.right = right
def children(self):
return((self.left, self.right))
freq = [
(8.167, 'a'), (1.492, 'b'), (2.782, 'c'), (4.253, 'd'),
(12.702, 'e'),(2.228, 'f'), (2.015, 'g'), (6.094, 'h'),
(6.966, 'i'), (0.153, 'j'), (0.747, 'k'), (4.025, 'l'),
(2.406, 'm'), (6.749, 'n'), (7.507, 'o'), (1.929, 'p'),
(0.095, 'q'), (5.987, 'r'), (6.327, 's'), (9.056, 't'),
(2.758, 'u'), (1.037, 'v'), (2.365, 'w'), (0.150, 'x'),
(1.974, 'y'), (0.074, 'z') ]
def create_tree(frequencies):
p = queue.PriorityQueue()
for value in frequencies: # 1. Create a leaf node for each symbol
p.put(value) # and add it to the priority queue
while p.qsize() > 1: # 2. While there is more than one node
l, r = p.get(), p.get() # 2a. remove two highest nodes
node = HuffmanNode(l, r) # 2b. create internal node with children
p.put((l[0]+r[0], node)) # 2c. add new node to queue
return p.get() # 3. tree is complete - return root node
node = create_tree(freq)
#=================== 以上是Dave提供的思路 ==================
# 樹里的每一個節(jié)點(diǎn)是一個tuple设联,node[0]是頻率善已,node[1]是HuffmanNode 或者 character
# 下面是我的實(shí)現(xiàn)
def walk_tree(node, prefix="", code={}):
"""
node 是一個tuple(freq, HuffmanNode|character)
"""
if isinstance(node[1], HuffmanNode): # node[1]是一個HuffmanNode
code1 = walk_tree(node[1].left, '0', code.copy()) # 這里如果直接傳入code會出錯
code2 = walk_tree(node[1].right, '1', code.copy())
if len(code1) > 0:
for k, v in code1.items():
code[k] = prefix + v
if len(code2) > 0:
for k, v in code2.items():
code[k] = prefix + v
else: # node[1]是一個字符
code[node[1]] = prefix
return(code)
code = walk_tree(node)
# 輸出每個字母的編碼
for i in sorted(freq, reverse=True):
try:
print(i[1], '{:6.2f}'.format(i[0]), code[i[1]])
except Exception as e:
print(e)
continue
結(jié)果如下:
e 12.70 100
t 9.06 000
a 8.17 1110
o 7.51 1101
i 6.97 1011
n 6.75 1010
s 6.33 0111
h 6.09 0110
r 5.99 0101
d 4.25 11111
l 4.03 11110
c 2.78 01001
u 2.76 01000
m 2.41 00111
w 2.37 00110
f 2.23 00100
g 2.02 110011
y 1.97 110010
p 1.93 110001
b 1.49 110000
v 1.04 001010
k 0.75 0010111
j 0.15 001011011
x 0.15 001011010
q 0.10 001011001
z 0.07 001011000