-
Notifications
You must be signed in to change notification settings - Fork 0
/
Huffman.py
123 lines (95 loc) · 4 KB
/
Huffman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import math
import sys
global probabilities
probabilities = []
class HuffmanCode:
def __init__(self,probability):
self.probability = probability
def position(self, value, index):
for j in range(len(self.probability)):
if(value >= self.probability[j]):
return j
return index-1
def characteristics_huffman_code(self, code):
entropy_of_code = 0
for p in probabilities:
entropy_of_code += p * math.log((1/p) , 2)
length_of_code = [len(k) for k in code]
mean_length = sum([a*b for a, b in zip(length_of_code, self.probability)])
print("Average length of the code: %f" % mean_length)
print("Efficiency of the code: %f" % (entropy_of_code/mean_length))
def compute_code(self):
num = len(self.probability)
huffman_code = ['']*num
for i in range(num-2):
val = self.probability[num-i-1] + self.probability[num-i-2]
if(huffman_code[num-i-1] != '' and huffman_code[num-i-2] != ''):
huffman_code[-1] = ['1' + symbol for symbol in huffman_code[-1]]
huffman_code[-2] = ['0' + symbol for symbol in huffman_code[-2]]
elif(huffman_code[num-i-1] != ''):
huffman_code[num-i-2] = '0'
huffman_code[-1] = ['1' + symbol for symbol in huffman_code[-1]]
elif(huffman_code[num-i-2] != ''):
huffman_code[num-i-1] = '1'
huffman_code[-2] = ['0' + symbol for symbol in huffman_code[-2]]
else:
huffman_code[num-i-1] = '1'
huffman_code[num-i-2] = '0'
position = self.position(val, i)
probability = self.probability[0:(len(self.probability) - 2)]
probability.insert(position, val)
if(isinstance(huffman_code[num-i-2], list) and isinstance(huffman_code[num-i-1], list)):
complete_code = huffman_code[num-i-1] + huffman_code[num-i-2]
elif(isinstance(huffman_code[num-i-2], list)):
complete_code = huffman_code[num-i-2] + [huffman_code[num-i-1]]
elif(isinstance(huffman_code[num-i-1], list)):
complete_code = huffman_code[num-i-1] + [huffman_code[num-i-2]]
else:
complete_code = [huffman_code[num-i-2], huffman_code[num-i-1]]
huffman_code = huffman_code[0:(len(huffman_code)-2)]
huffman_code.insert(position, complete_code)
huffman_code[0] = ['0' + symbol for symbol in huffman_code[0]]
huffman_code[1] = ['1' + symbol for symbol in huffman_code[1]]
if(len(huffman_code[1]) == 0):
huffman_code[1] = '1'
count = 0
final_code = ['']*num
for i in range(2):
for j in range(len(huffman_code[i])):
final_code[count] = huffman_code[i][j]
count += 1
final_code = sorted(final_code, key=len)
return final_code
string = input("Enter the string to compute Huffman Code: ")
freq = {}
for c in string:
if c in freq:
freq[c] += 1
else:
freq[c] = 1
freq = sorted(freq.items(), key=lambda x: x[1], reverse=True)
length = len(string)
probabilities = [float("{:.2f}".format(frequency[1]/length)) for frequency in freq]
probabilities = sorted(probabilities, reverse=True)
huffmanClassObject = HuffmanCode(probabilities)
P = probabilities
huffman_code = huffmanClassObject.compute_code()
rmap = {}
string_map = {}
print(' Char | Huffman code ')
print('----------------------')
for id,char in enumerate(freq):
if huffman_code[id]=='':
print(' %-4r |%12s' % (char[0], 1))
continue
print(' %-4r |%12s' % (char[0], huffman_code[id]))
rmap[huffman_code[id]] = char[0]
string_map[char[0]] = huffman_code[id]
huffmanClassObject.characteristics_huffman_code(huffman_code)
s = ""
for id,char in enumerate(freq):
s += huffman_code[id]
real_s = ""
for letter in string:
real_s += string_map[letter]
print("The Encoded Text is:",real_s)