C++数据结构之文件压缩(哈夫曼树)实例详解
概要:
项目简介:利用哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压
开发环境:windows vs2013
项目概述:
1.压缩
a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树
b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底
c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成
d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码
e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)
f.写好配置文件,统计每个字符及其出现次数,并以“字符+','+次数”的形式保存到配置文件中
2.解压
a.读取配置文件,统计所有字符的个数
b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完
c.压缩解压缩完全完成,进行小文件大文件的测试
实例代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
#pragma once #include<vector> template < class T> struct Less { bool operator()( const T& l, const T& r) const { return l < r; } }; template < class T> struct Greater { bool operator()( const T& l, const T& r) const { return l > r; } }; template < class T, class Compare> class Heap { public : Heap() {} Heap(T* array, size_t n) //建堆 { _array.reserve(n); for ( size_t i = 0; i < n; i++) { _array.push_back(array[i]); } for ( int i = (_array.size() - 2) >> 1; i >= 0; --i) { _AdjustDown(i); } } const T& Top() const { return _array[0]; } void Push( const T& x) { _array.push_back(x); _AdjustUp(_array.size() - 1); } size_t Size() { return _array.size(); } void Pop() { assert (_array.size() > 0); swap(_array[0], _array[_array.size() - 1]); _array.pop_back(); _AdjustDown(0); } bool Empty() { return _array.size() == 0; } void Print() { for ( size_t i = 0; i < _array.size(); ++i) { cout << _array[i] << " " ; } cout << endl; } protected : void _AdjustUp( int child) //上调 { Compare ComFunc; int parent = (child - 1) >> 1; while (child) { if (ComFunc(_array[child], _array[parent])) { swap(_array[child], _array[parent]); child = parent; parent = (child - 1) >> 1; } else { break ; } } } void _AdjustDown( int root) //下调 { Compare ComFunc; int parent = root; int child = root * 2 + 1; while (child < _array.size()) { if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child])) { ++child; } if (ComFunc(_array[child], _array[parent])) { swap(_array[child], _array[parent]); parent = child; child = parent * 2 + 1; } else { break ; } } } protected : vector<T> _array; }; void TestHeap() { int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 }; //Heap<int> heap(a, sizeof(a) / sizeof(a[0])); //Heap<int, Less<int>> heap(a, sizeof(a) / sizeof(a[0])); Heap< int , Greater< int >> heap(a, sizeof (a) / sizeof (a[0])); heap.Print(); heap.Push(25); heap.Print(); heap.Pop(); heap.Print(); } |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
|
#pragma once #include"Heap.h" template < class T> struct HuffmanTreeNode { HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; HuffmanTreeNode<T>* _parent; T _w; //权值 HuffmanTreeNode( const T& x) :_w(x) , _left(NULL) , _right(NULL) , _parent(NULL) {} }; template < class T> class HuffmanTree { typedef HuffmanTreeNode<T> Node; public : HuffmanTree() :_root(NULL) {} HuffmanTree(T* a, size_t n, const T& invalid = T()) //构建哈夫曼树 { struct Compare { bool operator()(Node* l, Node* r) const { return l->_w < r->_w; } }; Heap<Node*, Compare> minHeap; for ( size_t i = 0; i < n; ++i) { if (a[i] != invalid) { minHeap.Push( new Node(a[i])); } } while (minHeap.Size() > 1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_w + right->_w); parent->_left = left; parent->_right = right; left->_parent = parent; right->_parent = parent; minHeap.Push(parent); } _root = minHeap.Top(); } Node* GetRoot() { return _root; } ~HuffmanTree() { _Destory(_root); } protected : void _Destory(Node* root) { if (root == NULL) { return ; } _Destory(root->_left); _Destory(root->_right); delete root; } HuffmanTree( const HuffmanTree<T>& tree); HuffmanTree& operator=( const HuffmanTree<T>& tree); protected : Node* _root; }; void TestHuffmanTree() {<pre code_snippet_id= "2340790" snippet_file_name= "blog_20170418_2_3778260" name= "code" class = "cpp" >#pragma once #include<iostream> using namespace std; #include<assert.h> #include"HuffmanTree.h" typedef long long LongType; struct CharInfo { unsigned char _ch; //字符 LongType _count; //字符出现的次数 string _code; //huffman编码 CharInfo(LongType count = 0) :_count(count) , _ch(0) , _code( "" ) {} bool operator<( const CharInfo& info) const { return _count < info._count; } CharInfo operator+( const CharInfo& info) { return CharInfo(_count + info._count); } bool operator!=( const CharInfo& info) const { return _count != info._count; } }; struct CountInfo { unsigned char _ch; //字符 LongType _count; //字符出现的次数 }; class FileCompress { public : FileCompress() { for ( size_t i = 0; i < 256; ++i) { _info[i]._ch = i; } } void Compress( const char * filename) { assert (filename); //统计字符出现的次数,均已二进制方式读写 FILE * fout = fopen (filename, "rb" ); assert (fout); int ch = fgetc (fout); string CompressFile = filename; CompressFile += ".huffman" ; FILE * fin = fopen (CompressFile.c_str(), "wb" ); assert (fin); CountInfo info; while (ch != EOF) { _info[(unsigned char )ch]._count++; ch = fgetc (fout); } //构建huffman tree CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo> tree(_info, 256, invalid); //生成huffman code GetHuffmanCode(tree.GetRoot()); //压缩 //写配置文件 for ( size_t i = 0; i < 256; ++i) { if (_info[i]._count) { info._ch = _info[i]._ch; info._count = _info[i]._count; fwrite (&info, sizeof (info), 1, fin); } } info._count = -1; fwrite (&info, sizeof (info), 1, fin); fseek (fout, 0, SEEK_SET); //返回到文件开始 ch = fgetc (fout); char value = 0; //二进制 int pos = 0; //左移位数 while (ch != EOF) { string& code = _info[(unsigned char )ch]._code; size_t i = 0; for (i = 0; i < code.size(); ++i) { value <<= 1; ++pos; if (code[i] == '1' ) { value |= 1; } if (pos == 8) //满8位写进文件中 { fputc (value, fin); value = 0; pos = 0; } } ch = fgetc (fout); } if (pos) { value <<= (8 - pos); fputc (value, fin); } fclose (fin); fclose (fout); } void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root) //huffman编码 { if (root == NULL) { return ; } if (root->_left == NULL && root->_right == NULL) { HuffmanTreeNode<CharInfo> *parent, *cur; cur = root; parent = cur->_parent; string& code = _info[(unsigned char )root->_w._ch]._code; while (parent) { if (cur == parent->_left) { code += '0' ; } else { code += '1' ; } cur = parent; parent = cur->_parent; } reverse(code.begin(), code.end()); } GetHuffmanCode(root->_left); GetHuffmanCode(root->_right); } //解压 void UnCompress( const char * filename) { assert (filename); string UnCompressFile = filename; size_t index = UnCompressFile.rfind( '.' ); assert (index != string::npos); UnCompressFile = UnCompressFile.substr(0, index); UnCompressFile += ".unhuffman" ; FILE * fout = fopen (filename, "rb" ); assert (fout); FILE * fin = fopen (UnCompressFile.c_str(), "wb" ); assert (fin); CountInfo info; fread (&info, sizeof (CountInfo), 1, fout); //读配置信息 while (1) { fread (&info, sizeof (CountInfo), 1, fout); if (info._count == -1) { break ; } _info[(unsigned char )info._ch]._ch = info._ch; _info[(unsigned char )info._ch]._count = info._count; } //重建huffman树 CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo> tree(_info, 256, invalid); HuffmanTreeNode<CharInfo>* root = tree.GetRoot(); HuffmanTreeNode<CharInfo>* cur = root; LongType count = root->_w._count; int value = fgetc (fout); if (value == EOF) //只有一种字符 { if (info._ch != 0) { while (cur->_w._count--) { fputc (cur->_w._ch, fin); } } } else { while (value != EOF) { int pos = 7; char test = 1; while (pos >= 0) { if (value & (test << pos)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL && cur->_right == NULL) { fputc (cur->_w._ch, fin); cur = root; if (--count == 0) { break ; } } --pos; } value = fgetc (fout); } } fclose (fout); fclose (fin); } protected : CharInfo _info[256]; //所有字符信息 }; void TestFileCompress() { FileCompress fc; //fc.Compress("实验.doc"); //fc.UnCompress("实验.doc.huffman"); //fc.Compress("qq.JPG"); //fc.UnCompress("qq.JPG.huffman"); //fc.Compress("www.MP3"); //fc.UnCompress("www.MP3.huffman"); fc.Compress( "yppppp.txt" ); fc.UnCompress( "yppppp.txt.huffman" ); }</pre><br> int array[10] = { 2, 4, 6, 9, 7, 8, 5, 0, 1, 3 };HuffmanTree< int > t(array, 10);} <pre></pre> <p></p> <pre></pre> <p></p> <p></p> <p></p> <pre code_snippet_id= "2340790" snippet_file_name= "blog_20170418_3_1128302" name= "code" class = "cpp" >#include <iostream> #include <assert.h> #include <windows.h> using namespace std; #include"Heap.h" #include"HuffmanTree.h" #include"FileCompress.h" int main() { //TestHeap(); TestHuffmanTree(); TestFileCompress(); system ( "pause" ); return 0; }</pre><br> <br> <p></p> <p><br> </p> <p><br> </p> <link rel= "stylesheet" href= "http://static.blog.csdn.net/public/res-min/markdown_views.css?v=1.0" rel= "external nofollow" > |
以上就是哈夫曼树的实例详解,如有疑问请留言或者到本站社区交流,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!