-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathHuffman Encoding.cpp
121 lines (102 loc) · 3.54 KB
/
Huffman Encoding.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
//
// main.cpp
// practice
//
// Created by Mahmud on 01/31/19.
// Copyright © 2018 Mahmud. All rights reserved.
//
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include <cmath>
#include <string>
#include <algorithm>
#include <vector>
#include <queue>
#include <map>
using namespace std;
void read(string& text) {
freopen("input.txt", "r", stdin);
text = "";
string s;
while (cin >> s) {
if (cin.eof())
break;
for (char &ch: s)
ch = tolower(ch);
text += s;
}
}
int main(int argc, const char * argv[]) {
string text = "this is an example for huffman encoding";
read(text);
map<char, int> counters;
for_each(begin(text), end(text), [&](char ch) {
counters[ch] ++;
});
for (auto i: counters) {
cout << "weight of " << i.first << " is " << i.second << endl;
}
//priority_queue<data, vector< data >, greater<data> > Q;
priority_queue<pair<int, vector<char> >, vector<pair<int, vector<char> >>,
greater<pair<int, vector<char> >>> Q;
// I know it is ugly ^.
for (auto i: counters) {
Q.push(make_pair(i.second, vector<char>(1, i.first)));
}
map<char, string> identifiers;
while (int(Q.size()) > 1) {
auto itemSet1 = Q.top();
Q.pop();
auto itemSet2 = Q.top();
Q.pop();
for_each(begin(itemSet1.second), end(itemSet1.second), [&](char ch) {
identifiers[ch] = '0' + identifiers[ch];
});
for_each(begin(itemSet2.second), end(itemSet2.second), [&](char ch) {
identifiers[ch] = '1' + identifiers[ch];
});
int weight = itemSet1.first + itemSet2.first;
vector<char> merged = itemSet1.second;
for (auto ch: itemSet2.second)
merged.push_back(ch);
Q.push(make_pair(weight, merged));
}
cout << endl;
vector<pair<char, string>> result;
for (auto item: identifiers)
result.push_back(make_pair(item.first, item.second));
sort(result.begin(), result.end(), [](pair<char, string> a, pair<char, string> b) {
string s1 = a.second;
string s2 = b.second;
if (s1.length() != s2.length())
return s1.length() < s2.length();
for (int i = 0; i < s1.length(); i ++) {
if (s1[i] != s2[i])
return s1[i] < s2[i];
}
return false;
});
for_each(begin(result), end(result), [&](pair<char, string> item) {
cout << item.second << " " << item.first << endl;
});
cout << endl;
cout << endl;
int huffmanTotal = 0;
for (auto item: counters)
huffmanTotal += item.second * identifiers[item.first].length();
double v = text.length() * log2(int(counters.size()));
cout << "length of the initial text in bits: " << text.length() * log2(int(counters.size())) << endl;
cout << "length via Huffman encoding in bits: " << huffmanTotal << endl;
cout << "the number of the saved bits: " << text.length() * 8 - huffmanTotal << endl;
cout << "the gain in the size in percentages is: " << 100. * (v - huffmanTotal) / v << endl;
cout << endl;
double entropySource = 0.;
for (auto item: counters) {
entropySource += -1. * item.second / text.length() * log2(1. * item.second / text.length());
}
cout << "entropy of the source is: " << entropySource * int(text.length()) << endl;
cout << "average length for huffman encoding is: " << 1. * huffmanTotal / text.length() << endl;
cout << endl;
return 0;
}