-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhuffman_decoding.py
118 lines (102 loc) · 2.99 KB
/
huffman_decoding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# Python implementation of Huffman decoding.
# Time complexity of the Huffman coding algorithm
# is O(n log n) where n is the number of characters
# in the input string. The auxiliary space complexity
# is O(n) where n is the number of characters in the
# input string. The time complexity is dominated by
# the creation of the Huffman tree using priority
# queue which takes O(n log n) time. The space
# complexity is dominated by the maps used to store
# the frequency and codes of characters which take
# O(n) space. The recursive functions used to
# print codes and store codes also contribute to
# space complexity.
import heapq
from collections import defaultdict
# to map each character its huffman value
codes = {}
# To store the frequency of character of the input data
freq = defaultdict(int)
# A Huffman tree node
class MinHeapNode:
def __init__(self, data, freq):
self.left = None
self.right = None
self.data = data
self.freq = freq
def __lt__(self, other):
return self.freq < other.freq
# utility function to print characters along with
# there huffman value
def printCodes(root, str):
if root is None:
return
if root.data != '$':
print(root.data, ":", str)
printCodes(root.left, str + "0")
printCodes(root.right, str + "1")
# utility function to store characters along with
# there huffman value in a hash table
def storeCodes(root, str):
if root is None:
return
if root.data != '$':
codes[root.data] = str
storeCodes(root.left, str + "0")
storeCodes(root.right, str + "1")
# function to build the Huffman tree and store it
# in minHeap
def HuffmanCodes(size):
global minHeap
for key in freq:
minHeap.append(MinHeapNode(key, freq[key]))
heapq.heapify(minHeap)
while len(minHeap) != 1:
left = heapq.heappop(minHeap)
right = heapq.heappop(minHeap)
top = MinHeapNode('$', left.freq + right.freq)
top.left = left
top.right = right
heapq.heappush(minHeap, top)
storeCodes(minHeap[0], "")
# utility function to store map each character with its
# frequency in input string
def calcFreq(str, n):
for i in range(n):
freq[str[i]] += 1
# function iterates through the encoded string s
# if s[i]=='1' then move to node->right
# if s[i]=='0' then move to node->left
# if leaf node append the node->data to our output string
def decode_file(root, s):
ans = ""
curr = root
n = len(s)
for i in range(n):
if s[i] == '0':
curr = curr.left
else:
curr = curr.right
# reached leaf node
if curr.left is None and curr.right is None:
ans += curr.data
curr = root
return ans + '\0'
# Driver code
if __name__ == "__main__":
minHeap = []
str = "Chinesephilosophy"
encodedString, decodedString = "", ""
calcFreq(str, len(str))
HuffmanCodes(len(str))
print("Characters With there frequencies:")
for key in sorted(codes):
print(key, codes[key])
for i in str:
encodedString += codes[i]
print("\nEncoded Huffman data:")
print(encodedString)
# Function call
decodedString = decode_file(minHeap[0], encodedString)
print("\nDecoded Huffman Data:")
print(decodedString)