簡介:利用哈夫曼樹實現一個文本文檔的壓縮,以及對壓縮文件的解壓
思路:在壓縮文件時,首先要統計字符出現的次數,構建哈夫曼樹,生成哈夫曼編碼,壓縮到文件。
在解壓文件時,讀取壓縮文件,將編碼與字符相對應,最後將字符寫到文件中。
在解壓文件中,如何將編碼與字符相對應?
我們都知道,在解壓文件時,我們只有一個壓縮文件,其餘一慨不知。所以在解壓時,需要重建哈夫曼樹。要想重建哈夫曼樹,就需要知道字符以及字符出現的次數。在壓縮文件時,已經統計出字符出現的次數。所以,在壓縮文件時,應該寫配置文件。配置文件中存放字符以及字符出現的次數。在解壓時,讀取壓縮文件,配置文件,重建哈夫曼樹,將編碼與字符相對應。
//建堆 #pragma once #include <iostream> #include <vector> using namespace std; template <class T> struct Less //小於 { bool operator()(const T& l,const T& r) { return l < r; } }; template <class T> struct Greater //大於 { bool operator()(const T& l,const T& r) { return l > r; } }; template <class T,class Comper = Greater<T> >//默認建大堆 class Heap { public: Heap() //無參構造函數 {} Heap(T* a,size_t size) { for(size_t i=0;i<size;++i) { _a.push_back(a[i]); } //建堆 for(int i=(_a.size()-2)/2;i>=0;--i) { _ApDown(i); } } void Push(const T& x)//插入元素 { _a.push_back(x);//在堆尾插入元素 _ApHeapUp(_a.size()-1); //向上調整 } void Pop()//刪除(刪除優先級高) { swap(_a[0],_a[_a.size()-1]);//交換堆的第一個元素和最後一個元素 _a.pop_back();//刪除最後一個元素 _ApDown(0);//向下調整 } size_t Size()//堆的大小 { return _a.size(); } bool Empty()//堆是否爲空 { return _a.empty(); } T Top() { return _a[0]; } public: void _ApDown(size_t parent) { size_t child = parent*2+1; while(child < _a.size()) { Comper com; //找到左右孩子中較大的 if((child+1) < _a.size() && com(_a[child+1],_a[child])) { ++child; } //比較較大孩子與父親 if(com(_a[child],_a[parent])) { swap(_a[child],_a[parent]); parent = child; child = parent*2+1; } else { break; } } } void _ApHeapUp(size_t child) { size_t parent = (child-1)/2; Comper com; while(child > 0) { if(com(_a[child],_a[parent]))//比較孩子與父親 { swap(_a[child],_a[parent]); child = parent; parent = (child-1)/2; } else { break; } } } protected: vector<T> _a; };
//建哈夫曼樹 Huffman.h #include "Heap.h" template <class T> struct HuffmanTreeNode { HuffmanTreeNode(const T& x) :_left(NULL) ,_right(NULL) ,_weight(x) {} HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; T _weight; }; template <class T> class HuffmanTree { typedef HuffmanTreeNode<T> Node; public: HuffmanTree(const T* a,size_t n,const T& invalue) { struct IsLess { bool operator()(const Node* left,const Node* right) { return left->_weight < right->_weight; } }; Heap<Node*,IsLess> minHeap; for(size_t i=0;i<n;++i) { if(a[i] != invalue) { minHeap.Push(new Node(a[i])); //建小堆 } } while(minHeap.Size() > 1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_weight+right->_weight); parent->_left = left; parent->_right = right; minHeap.Push(parent); } _root = minHeap.Top(); } Node* GetRoot() { return _root; } protected: Node* _root; }; void HuffmanTreeTest() { int a[] = {1,2,3,4,5,6,7,8,9}; HuffmanTree<int> ht(a,sizeof(a)/sizeof(a[0]),'#'); }
//實現壓縮,解壓 FileCompare.h #define _CRT_SECURE_NO_WARNINGS #include "HuffmanTree.h" #include <assert.h> #include <string> #include <stdlib.h> typedef unsigned long LongType; struct CharInfo { unsigned char _ch; //字符 LongType _count; //字符出現的次數 string _code; //字符對應的Huffman編碼 CharInfo() :_ch(0) ,_count(0) {} CharInfo(LongType count) :_ch(0) ,_count(count) {} bool operator!=(const CharInfo& info) const { return _count != info._count; } CharInfo operator+(const CharInfo& info) const { return CharInfo(_count + info._count); } bool operator<(const CharInfo& info) const { return _count < info._count; } }; class FileCompress { public: FileCompress() { for(size_t i=0;i<256;++i) { _info[i]._ch = i; _info[i]._count = 0; } } void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root,string code)//獲取哈夫曼編碼 { if(root == NULL) return; if(root->_left == NULL && root->_right == NULL) { _info[root->_weight._ch]._code = code; } GetHuffmanCode(root->_left,code + '0');//左爲0 GetHuffmanCode(root->_right,code + '1');//右爲1 } bool ReadLine(FILE* fout,string& line) { char ch = fgetc(fout); if(feof(fout)) //若結束返回非零值 return false; while(!feof(fout) && ch != '\n') { line += ch; ch = fgetc(fout); } return true; } void Compress(const char* filename) { //統計字符的次數 FILE* fout = fopen(filename,"rb"); assert(fout); char ch = fgetc(fout); while(!feof(fout)) //讀到文件尾的標誌位 若採用ch != EOF 11111111 跳出讀取文件 { _info[(unsigned char)ch]._count++; ch = fgetc(fout); } //構建Huffman樹 CharInfo invalue; //非法值 HuffmanTree<CharInfo> tree(_info,256,invalue); //生成Huffman編碼 string code; GetHuffmanCode(tree.GetRoot(),code); //壓縮 string comFilename = filename; comFilename += ".compress"; FILE* fin = fopen(comFilename.c_str(),"wb"); assert(fin); fseek(fout,0,SEEK_SET); //設置文件指針的位置 ch = fgetc(fout); int size = 0; int value = 0; while(!feof(fout)) //feof 來判斷文件是否執行結束,若結束,則返回非零值。 { string code = _info[(unsigned char)ch]._code; for(size_t i=0;i<code.size();++i) { if(code[i] == '1') { value |= 1; } ++size; if(size == 8) { fputc(value,fin); size = 0; value = 0; } value <<= 1; } ch = fgetc(fout); } if(size > 0) { value <<= (7-size); fputc(value,fin); } //配置文件 string configfile = filename; configfile += ".config"; FILE* fconfig = fopen(configfile.c_str(),"wb");//以二進制的形式打開 assert(fconfig); char buffer[256]; string line; for(size_t i=0;i<256;++i) { if(_info[i]._count > 0) { line += _info[i]._ch; line += ','; line += itoa(_info[i]._count,buffer,10); line += '\n'; fputs(line.c_str(),fconfig); } line.clear(); } fclose(fout); fclose(fin); fclose(fconfig); } void Uncompress(const char* filename) { //讀配置文件 string configfile = filename; configfile += ".config"; FILE* fconfig = fopen(configfile.c_str(),"rb");//以二進制的形式讀取 assert(fconfig); string str; while(ReadLine(fconfig,str)) { if(str.empty()) //處理空行 { str += '\n'; } else { _info[(unsigned char)str[0]]._count = atoi(str.substr(2).c_str());//第二個位置即第三個字符爲字符的次數 str.clear(); } } //構建Huffman樹 CharInfo invalue; HuffmanTree<CharInfo> tree(_info,256,invalue); //讀取壓縮文件,進行還原 string comFilename = filename; comFilename += ".compress"; FILE* fout = fopen(comFilename.c_str(),"rb"); assert(fout); HuffmanTreeNode<CharInfo>* root = tree.GetRoot(); HuffmanTreeNode<CharInfo>* cur = root; string uncomFilename = filename; uncomFilename += ".uncompress"; FILE* fin = fopen(uncomFilename.c_str(),"wb"); assert(fin); LongType SumCount = tree.GetRoot()->_weight._count; //總數 char ch = fgetc(fout); int pos = 7; while(1) { if(ch & (1<<pos)) { cur = cur->_right; } else { cur = cur->_left; } if(cur->_left == NULL && cur->_right == NULL) { fputc(cur->_weight._ch,fin); if(--SumCount == 0) { break; } cur = root; } if(pos-- == 0) { ch = fgetc(fout); pos = 7; } } fclose(fout); fclose(fin); } protected: CharInfo _info[256]; }; void PressHuffmanTest() { FileCompress fh; fh.Compress("input"); //fh.Compress("project.txt"); } void UnPressHuffmanTest() { FileCompress fh; fh.Uncompress("input"); //fh.Uncompress("project.txt"); }
//測試 #include "FileCompree.h" #include <windows.h> int main() { //HuffmanTreeTest(); //驗證哈弗曼樹 int begin1 = GetTickCount(); PressHuffmanTest(); int end1 = GetTickCount(); cout<<"壓縮時間爲:"<<end1-begin1<<endl; int begin2 = GetTickCount(); UnPressHuffmanTest(); int end2 = GetTickCount(); cout<<"解壓時間爲:"<<end2-begin2<<endl; return 0; }
測試結果:
比較結果: