在以前写LZW压缩算法的时候,遇到很多难受的问题,基本上都在哈夫曼编码中解决了,虽然写这代码很费神,但还是把代码完整的码出来了,毕竟哈夫曼这个思想确实很牛逼。哈夫曼树很巧妙的解决了当时我在LZW序列化的时候想解决的问题,就是压缩后文本的分割。比如用lzw编码abc,就是1,2,3。但这个在存为文件的时候必须用分割符把1,2,3分割开,非常浪费空间,否则会和12 23 123 产生二义性。而哈夫曼树,将所有char分布在叶节点上,在还原的时候,比如1101110,假设110是叶节点,那么走到110的时候就可以确定,已经走到尽头,回到根节点继续走,这样就避免了字符的分割,全部用1010101010101这样的路径表示字符,可以将8位压缩为1个char进行存储。在构造树的时候,将出现率高的char放在上面,这样路径就很短,自然就节省了存储空间。虽然哈夫曼压缩效率不是最高的,但还算比较乐观的。
哈夫曼除了压缩以外还可以用于加密,在将文本用哈夫曼编码时,需持久化生成的char计数链表结构,这样才能还原出树结构,而解码时路径正是依赖于树结构的。也就是说,这种编码是属于约定形式的编码,在编码时用原文本产生树结构,而存储的是树路径,解码的时候缺少树或树结构与原先不相符都是无法完成解码的,就好比,我用10代表a,你存的是10,你将10解释为 b或c等等都是不正确的。由于转换为了char存储,所以还需持久化最后填充的数目、文本长度,才能还原出原先的01表示的文本格式
这个代码有一定缺陷,由于当时考虑的是对文本进行处理,当文件中有char='\0' 时会出现错误,这个代码打的很费神,就不继续修复了,如有需要,可自行更改,解决的办法应该挺多的
先来个运行图:
源代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
|
#include<iostream> #include<sstream> #include<fstream> void WriteFile( char * path, const char * content, int length, bool append= false ); using namespace std; struct Node{ char data; Node* left; Node* right; }; struct L_Node{ int count; Node* node; L_Node* next; }; Node* AddNode( int count, char data,L_Node*& first){ L_Node* lnode= new L_Node(); lnode->count=count; Node* node= new Node(); node->data=data; node->left=0; node->right=0; lnode->node=node; if (first==0){ first=lnode; } else { if (lnode->count<first->count){ lnode->next=first; first=lnode; } else { L_Node* iter=first; while (iter->next!=0&&iter->next->count<lnode->count){ iter=iter->next; } if (iter->next==0){ iter->next=lnode; lnode->next=0; } else { lnode->next=iter->next; iter->next=lnode; } } } return node; } void SaveLNodes(L_Node* first){ stringstream ss; while (first!=0){ ss<<( int )(unsigned char )first->node->data<< ':' <<first->count<< ' ' ; first=first->next; } WriteFile( "nodes.txt" ,ss.str().c_str(),ss.str().length()); } void GetLNodes(L_Node*& first){ char temp[32]; ifstream in; in.open( "nodes.txt" ,ios::in|ios::binary); while (!in.eof()){ temp[0]=0; in>>temp; if ( strlen (temp)>0){ char * data= strtok (temp, ":" ); char * count= strtok (0, ":" ); AddNode( atoi (count), atoi (data),first); } } } void BuildSortedList( char * content,L_Node*& first, int length){ int array[256]={ 0 }; for ( int i=0;i<length;i++){ array[(unsigned char )content[i]]++; } for ( int i=0;i<256;i++){ if (array[i]>0){ AddNode(array[i],( char )i,first); } } SaveLNodes(first); } void BuildTree(L_Node*& first,Node*& root){ //get l1->node,l2->node,remove l1,l2,then put l3 into list,then set l3->left and l3->right if (first->next==0){ Node* node= new Node(); root=node; root->right=0; node= new Node(); node->data=first->node->data; node->left=0; node->right=0; root->left=node; delete first; return ; } while (first->next!=0){ int count=first->count+first->next->count; Node* node1=first->node; L_Node* temp=first; first=first->next; delete temp; Node* node2=first->node; temp=first; delete temp; first=first->next; root=AddNode(count,0,first); root->left=node1; root->right=node2; //cout<<(int)node1->data<<':'<<(int)node2->data<<endl; } delete first; } void PreOrderTraversal(Node* node, char * track, int branch, char ** table){ if (node!=0){ char * track2=0; if (branch==0){ track2= new char [ strlen (track)+2]; sprintf (track2, "%s0\0" ,track); } else if (branch==1){ track2= new char [ strlen (track)+2]; sprintf (track2, "%s1\0" ,track); } else { track2= new char [ strlen (track)+1]; sprintf (track2, "%s\0" ,track); } if (node->data!=0){ table[(unsigned char )node->data]=track2; } PreOrderTraversal(node->left,track2,0,table); PreOrderTraversal(node->right,track2,1,table); if (node->data==0){ delete track2; } } } void PreOrderTraversal(Node* node){ if (node!=0){ cout<<( int )(unsigned char )node->data<<endl; PreOrderTraversal(node->left); PreOrderTraversal(node->right); } } char * Encode( const char * content, char ** table, int length){ stringstream ss; for ( int i=0;i<length;i++){ if ((unsigned char )content[i]==0){ } else { ss<<table[(unsigned char )content[i]]; } } char * encoded_content= new char [ss.str().length()+1]; memcpy (encoded_content,ss.str().c_str(),ss.str().length()); encoded_content[ss.str().length()]=0; return encoded_content; } int BinToDec( char * bin_content){ int number=0; int cur=1; for ( int i=7;i>=0;i--){ number+=(bin_content[i]- '0' )*cur; cur*=2; } return number; } char * BinToCharText( const char * bin_content, int & fill_count, int & save_length){ int length= strlen (bin_content); fill_count=8-length%8; if (fill_count>0){ char * temp= new char [length+fill_count+1]; char temp1[fill_count]; for ( int i=0;i<fill_count;i++){ temp1[i]= '0' ; } sprintf (temp, "%s%s" ,bin_content,temp1); temp[length+fill_count]=0; bin_content=temp; } length+=fill_count; save_length=length/8; char * text= new char [length/8+1]; for ( int i=0;i<length;i+=8){ char temp[8]; memcpy (temp,bin_content+i,8); text[i/8]=( char )BinToDec(temp); } text[length/8]=0; if (fill_count>0){ delete bin_content; } return text; } char * DecToBin( int num){ char * bin= new char [8]; if (num<0){ num=256+num; } for ( int i=7;i>=0;i--){ bin[i]=num%2+ '0' ; num/=2; } return bin; } char * CharTextToBin( char * text, int fill_count, int save_length){ int length=save_length; char * content= new char [8*length+1]; int pos=0; for ( int i=0;i<length;i++){ int number=text[i]; char * bin=DecToBin(number); memcpy (content+pos,bin,8); pos+=8; delete bin; } content[8*length-fill_count]=0; return content; } char * Decode( const char * encode_content,Node* tree){ stringstream ss; Node* node=tree; for ( int i=0;i< strlen (encode_content);i++){ if (encode_content[i]== '0' ){ node=node->left; } else if (encode_content[i]== '1' ){ node=node->right; } if (node->data!=0){ ss<<node->data; node=tree; } } char * decode_content= new char [ss.str().length()+1]; memcpy (decode_content,ss.str().c_str(),ss.str().length()); decode_content[ss.str().length()]=0; return decode_content; } void ReleaseTable( char ** table){ for ( int i=0;i<256;i++){ if (table[i]!=0){ delete table[i]; } } } void PostOrderTraversal(Node* node){ if (node!=0){ PostOrderTraversal(node->left); PostOrderTraversal(node->right); delete node; } } char * ReadFile( char * path, long & length){ char * content=0; ifstream in; in.open(path,ios::in|ios::binary); in.seekg(0,ios::end); length=in.tellg(); content= new char [length+1]; in.seekg(0,ios::beg); int i=0; while (!in.eof()){ content[i++]=in.get(); } content[length]=0; in.close(); return content; } char * ReadFile( char * path, int & fill_count, int & save_length){ char * content=0; ifstream in; in.open(path,ios::in|ios::binary); in>>fill_count>>save_length; long cur=in.tellg()+( long )1; in.seekg(0,ios::end); long length=in.tellg()-cur; content= new char [length+1]; in.seekg(cur,ios::beg); int i=0; while (!in.eof()){ content[i++]=in.get(); } content[length]=0; in.close(); return content; } void WriteFile( char * path, const char * content, int length, bool append){ ofstream out; if (append){ out.open(path,ios::out|ios::binary|ios::app); } else { out.open(path,ios::out|ios::binary); } out.write(content,length); out.close(); } int main(){ long length; char * content=ReadFile( "content.txt" ,length); L_Node* first=0; BuildSortedList(content,first,length); //create nodes list and save to nodes file //GetLNodes(first);//get and recreate nodes from file Node* root=0; //used for buildtable and decode BuildTree(first,root); //build tree by nodes list and release sorted list char * table[256]={ //build table,used for encode 0 }; PreOrderTraversal(root, "" ,-1,table); //create table char * encode_content=Encode(content,table,length); //convert content to encoded bin text cout<<encode_content<<endl; delete content; ReleaseTable(table); //release table when encode finished int fill_count; int save_length; char * save_text=BinToCharText(encode_content,fill_count,save_length); //convert encoded bin text to char text and save these text to file delete encode_content; char head_info[32]; sprintf (head_info, "%d %d " ,fill_count,save_length); WriteFile( "encoded_content.txt" ,head_info, strlen (head_info)); WriteFile( "encoded_content.txt" ,save_text,save_length, true ); delete save_text; save_text=ReadFile( "encoded_content.txt" ,fill_count,save_length); //read fill_count、save_length、encoded char text from file char * bin_text= CharTextToBin(save_text,fill_count,save_length); //convert char text to bin text delete save_text; char * decode_content=Decode(bin_text,root); //decode by bin_text and tree cout<<decode_content<<endl; delete bin_text; delete decode_content; PostOrderTraversal(root); //release tree return 0; } |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:http://blog.csdn.net/nightwizard2030/article/details/70227365