一、
Huffman於1952年提出一種編碼方法,該方法完全依據字符出現概率來構造異字頭的平均長度最短的碼字,有時稱之爲最佳編碼,一般稱爲哈夫曼編碼(有時也稱爲霍夫曼編碼)。
二、背景:
1951年,霍夫曼和他在MIT信息論的同學需要選擇是完成學期報告還是期末考試。導師Robert M. Fano給他們的學期報告的題目是,查找最有效的二進制編碼。由於無法證明哪個已有編碼是最有效的,霍夫曼放棄對已有編碼的研究,轉向新的探索,最終發現了基於有序頻率二叉樹編碼的想法,並很快證明了這個方法是最有效的。
由於這個算法,學生終於青出於藍,超過了他那曾經和信息論創立者克勞德·香農共同研究過類似編碼的導師。霍夫曼使用自底向上的方法構建二叉樹,避免了次優算法香農-範諾編碼的最大弊端──自頂向下構建樹。
三、
哈夫曼編碼是一種統計編碼,屬於無損壓縮編碼。它也是一種變長編碼,也就是說,對於出現頻率高的信息,對應的編碼長度較短,反之較長。通過這樣的編碼處理,表示全部信息所用的總碼長一定小於表示實際信息的所用的符號總長度。
四、
哈夫曼樹,又稱最優二叉樹,是指帶權路徑長度最小的二叉樹。樹的帶權路徑長度,就是樹中所有的葉結點的權值乘上其到根結點的路徑長度的積之和。
五、
哈夫曼樹中的權值可以理解爲:字符的出現頻率
六、如何構造哈夫曼樹:
a、給定n個權值爲{w1,w2,w3,...,wn},先構造n棵只有根結點(帶有相應的權值)的二叉樹(其左右子樹爲空);
b、在森林中選取兩棵根結點的權值最小的二叉樹,將它們作爲左右子樹並構造一棵新的二叉樹。然後置這棵新的二叉樹的根結點的權值爲其左右子樹的根結點的權值之和;
c、在該森林中刪除這兩顆二叉樹,將該新的二叉樹加入該森林中;
d、重複b、c,直到最終森林中只有一棵二叉樹爲止,這棵二叉樹就是哈夫曼樹。
七、實現和應用:(一個小型的哈夫曼編碼/譯碼系統)
(1)實現一個含有如下功能項的主菜單:
I-----Initialization(初始化、建立哈夫曼樹)
T-----Tree printing(打印哈夫曼樹)
C-----Huffman code printing(打印哈夫曼編碼)
E-----Encoding(編碼)
P-----Print(打印編碼文件)
D-----Decoding(譯碼)
Q-----Quit(退出)
(2)實現所有功能項對應的具體功能:
a、初始化、建立哈夫曼樹:從終端讀入字符集大小n、n個字符、n個權值,建立起哈夫曼樹,並將其存於文件hfmTree.txt中;
b、打印哈夫曼樹:將內存中的哈夫曼樹以直觀的形式(這裏使用橫向打印的樹結構)顯示在終端上,同時將該形式的哈夫曼樹寫入文件TreePrint.txt中;
c、打印哈夫曼編碼:將字符集對應的哈夫曼編碼顯示在終端;
d、編碼(有兩種方式):
*利用已經建立好的哈夫曼樹(如果不在內存中,則從文件hfmTree.txt中讀入)對文件ToBeTran.txt中的文本進行編碼。然後將結果存入文件CodeFile.txt中;
*利用已經建立好的哈夫曼樹(如果不在內存中,則從文件hfmTree.txt中讀入)對實時輸入的文本進行編碼,然後將結果顯示在終端上。
e、打印編碼文件:將文件CodeFile.txt以緊湊格式顯示在終端上,每行50個代碼,同時將該字符形式的編碼文件寫入文件CodePrint.txt中;
f、譯碼(也有兩種方式):
*利用已經建立好的哈夫曼樹(如果不在內存中,則從文件hfmTree.txt中讀入)對文件CodeFile.txt中的文本進行譯碼,然後將結果存入文件TextFile.txt中;
*利用已經建立好的哈夫曼樹(如果不在內存中,則從文件hfmTree.txt中讀入)對實時輸入的文本進行譯碼,然後將結果顯示在終端上。
g、退出;
(3)代碼實現:
#include<iostream>
#include<fstream>
#include<string>
#include<Windows.h>
using namespace std;
//哈夫曼樹結點類
struct hfmNode
{
int leftChild, rightChild, parent;
double weight;
hfmNode() :parent(0), weight(0.0) {}
};
int hfmSize = 0; //哈夫曼樹的規模
char *characters = nullptr; //字符集
double *weight = nullptr; //權值
string *huffmanCodeStrings = nullptr; //字符集對應的哈夫曼編碼
hfmNode* hfmTree = nullptr; //結構體數組形式的哈夫曼樹
//分隔線
void printLine()
{
cout << "-----------------------------------------------" << endl;
}
//通過讀取已經存在於文件hfmTree.txt中哈夫曼樹的相關數據進行初始化
void initByFile()
{
ifstream in("D:\\hfmTree.txt", ios::in | ios::binary);
if (!in.is_open())
{
cerr << "文件打開失敗!" << endl;
return;
}
in.read((char*)&hfmSize, sizeof(int));//一定要先從文件中讀取哈夫曼樹的大小!
//0號單元不用,起始下標從1開始
int m = 2 * hfmSize - 1;
hfmTree = new hfmNode[m + 1];
characters = new char[hfmSize + 1];
weight = new double[hfmSize + 1];
huffmanCodeStrings = new string[hfmSize + 1];
in.read(characters, sizeof(char)*(hfmSize + 1));
in.read((char*)weight, sizeof(double)*(hfmSize + 1));
in.read((char*)huffmanCodeStrings, sizeof(string)*(hfmSize + 1));
in.read((char*)hfmTree, sizeof(hfmTree[0])*(m + 1));
in.close();
}
//從哈夫曼樹的n個結點中選出權值最小的結點,並返回該結點的索引
int minInhuffmanTree(hfmNode hfmTree[], int n)
{
int minIndex = 0;
int minWeight = INT_MAX;
for (int i = 1; i <= n; i++)
{
if (hfmTree[i].weight < minWeight&&hfmTree[i].parent == 0)
{
minWeight = hfmTree[i].weight;
minIndex = i;
}
}
hfmTree[minIndex].parent = 1; //設置其parent爲1,表示該結點已經“使用過”
return minIndex;
}
//從哈夫曼樹的n個結點中選出權值最小的兩個結點,並通過參數引用帶回對應的索引
void selectTwoMinsFromhfmTree(hfmNode hfmTree[], int n, int& min1, int& min2)
{
min1 = minInhuffmanTree(hfmTree, n);
min2 = minInhuffmanTree(hfmTree, n);
//使得min1保存權值最小的兩個結點hfmTree[min1], hfmTree[min2]中最小的索引
if (min1 > min2) swap(min1, min2);
}
//通過建立哈夫曼樹得到字符集的相應哈夫曼編碼
void huffmanCoding(hfmNode hfmTree[], string huffmanCodeStrings[], double weight[], int n)
{
int min1, min2;
if (n <= 1) return;
int m = 2 * n - 1;
//初始化各個結點的權值
for (int i = 1; i <= n; i++)
{
hfmTree[i].weight = weight[i];
hfmTree[i].parent = hfmTree[i].leftChild = hfmTree[i].rightChild = 0;
}
for (int i = n + 1; i <= m; i++) hfmTree[i].parent = 0;
for (int i = n + 1; i <= m; i++)
{
selectTwoMinsFromhfmTree(hfmTree, i - 1, min1, min2);
hfmTree[min1].parent = hfmTree[min2].parent = i;
hfmTree[i].leftChild = min1;
hfmTree[i].rightChild = min2;
hfmTree[i].weight = hfmTree[min1].weight + hfmTree[min2].weight;
}
//從哈夫曼樹的n個葉節點出發,自底向上沿着通往根結點的路徑,最終分別得到n個不同字符對應的哈夫曼編碼
int parent, current;
for (int i = 1; i <= n; i++)
{
string huffmanCodeString = "";
int length = 0;
current = i;
parent = hfmTree[current].parent;
while (parent != 0)
{
if (hfmTree[parent].leftChild == current) huffmanCodeString = '0' + huffmanCodeString;
else huffmanCodeString = '1' + huffmanCodeString;
current = parent;
parent = hfmTree[current].parent;
}
huffmanCodeStrings[i] = huffmanCodeString;
}
}
//I-----Initialization(初始化、建立哈夫曼樹)
void init()
{
int sum;
cout << "請輸入您所要編碼的字符種類總數:";
cin >> sum;
hfmSize = sum;
//0號單元不用,起始下標從1開始
characters = new char[sum + 1];
weight = new double[sum + 1];
huffmanCodeStrings = new string[sum + 1];
int m = 2 * sum - 1;
hfmTree = new hfmNode[m + 1];
cout << endl << "請您按順序輸入每種字符以及其對應的權值:" << endl;
printLine();
cin.get();//喫掉回車
for (int i = 1; i <= sum; i++)
{
cout << "請您輸入第 " << i << " 個字符:";
characters[i] = getchar();
cin.get(); //喫掉回車
cout << "請您輸入該字符所所應的權值:";
cin >> weight[i];
cin.get(); //喫掉回車
printLine();
}
cout << "字符集爲:" << endl;
for (int i = 1; i <= sum; i++)
{
cout << characters[i] << ":" << weight[i] << endl;
}
cout << endl;
huffmanCoding(hfmTree, huffmanCodeStrings, weight, sum);
printLine();
//將各種字符的哈夫曼編碼寫入文件hfmTree.txt中
cout << "下面將各種字符的哈夫曼編碼寫入文件hfmTree.txt中......" << endl;
ofstream out("D:\\hfmTree.txt", ios::out | ios::binary);
if (!out.is_open())
{
cerr << "文件打開失敗!" << endl;
return;
}
out.write((char*)&hfmSize, sizeof(int));
out.write(characters, sizeof(char)*(hfmSize + 1));
out.write((char*)weight, sizeof(double)*(hfmSize + 1));
out.write((char*)huffmanCodeStrings, sizeof(string)*(hfmSize + 1));
out.write((char*)hfmTree, sizeof(hfmTree[0])*(m + 1));
cout << "寫入文件hfmTree.txt成功!" << endl;
out.close();
}
//T-----Tree printing(打印哈夫曼樹)
//將要用來打印的樹枝,注意其中:branches[0]=" "; branches[2]="\\"(佔一個字節)
char branches[] = { " /\\<" };
void printHfmTree(int root, int height, ostream& out)
{
if (root != 0)
{
//先打印當前結點的右子樹,並且深度+1
printHfmTree(hfmTree[root].rightChild, height + 1, out);
//通過跳格符來表現當前節點的深度,深度越大的結點會越往右
for (int i = 0; i < height; i++) out << "\t";
//輸出當前結點的權值
out << hfmTree[root].weight;
//如果當前結點是葉結點,則再打印出相應的字符
if (hfmTree[root].leftChild == 0 && hfmTree[root].rightChild == 0) out << "(" << characters[root] << ")";
//打印樹枝
out << branches[((hfmTree[root].leftChild != 0) << 1) | (hfmTree[root].rightChild != 0)];
//換行,打印當前結點的左子樹
out << endl;
printHfmTree(hfmTree[root].leftChild, height + 1, out);
}
}
void PrintHfmTree()
{
cout << "該哈夫曼樹打印如下(橫向打印):" << endl << endl;
printHfmTree(2 * hfmSize - 1, 0, cout);
ofstream out("D:\\TreePrint.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
printHfmTree(2 * hfmSize - 1, 0, out);
cout << "寫入文件TreePrint.txt成功!" << endl;
out.close();
}
//C---- - Huffman code printing(打印哈夫曼編碼)
void printHfmCodeStrings()
{
cout << "該字符集的編碼如下:" << endl << endl;
for (int i = 1; i <= hfmSize; i++)
{
cout << "字符 " << characters[i] << "(權值爲" << weight[i] << ")" << " : " << huffmanCodeStrings[i] << endl;
}
}
//E-----Encoding(編碼),對文本進行編碼(支持含空格的文本)
void encodeText()
{
char inputType,ch=' ';
string textToBeEncoded = "", encodeString = "";
cout << "您有如下兩種方式提供待編碼文本:" << endl << endl;
cout << "1-----讀取文件ToBeTran.txt中的待編碼文本;" << endl;
cout << "2-----讀取實時輸入的待編碼文本;" << endl;
printLine();
cout << "您選擇方式: " << endl;
cin >> inputType;
if (inputType == '1')
{
ifstream in("D:\\ToBeTran.txt", ios::in);
if (!in.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
ofstream out("D:\\CodeFile.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
cin.get();//喫掉回車
while ((ch = in.get()) != EOF)
{
textToBeEncoded = textToBeEncoded + ch;
}
for (int i = 0; i < textToBeEncoded.length(); i++)
{
for (int j = 1; j <= hfmSize; j++)
{
if (characters[j] == textToBeEncoded[i]) encodeString = encodeString + huffmanCodeStrings[j];
}
}
out << encodeString;
cout << "該段文本被編碼後寫入文件CodeFile.txt成功!" << endl;
in.close();
out.close();
}
else if (inputType == '2')
{
cout << "請您輸入待編碼文本:" << endl;
cin.get();//喫掉回車
while (cin.get(ch) && ch != '\n')
{
textToBeEncoded = textToBeEncoded + ch;
}
for (int i = 0; i < textToBeEncoded.length(); i++)
{
for (int j = 1; j <= hfmSize; j++)
{
if (characters[j] == textToBeEncoded[i]) encodeString = encodeString + huffmanCodeStrings[j];
}
}
cout << "該段文本被編碼爲如下:" << endl;
cout << encodeString << endl;
}
}
//P---- - Print(打印編碼文件)
void printEncodeFile()
{
ifstream in("D:\\CodeFile.txt", ios::in);
if (!in.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
ofstream out("D:\\CodePrint.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
string encodeString = "";
in >> encodeString;
for (int i = 0; i < encodeString.length(); i++)
{
cout << encodeString[i];
out << encodeString[i];
//每行50個代碼
if ((i + 1) % 50 == 0)
{
cout << endl;
out << endl;
}
}
cout << endl;
cout << "寫入文件CodePrint.txt成功!" << endl;
in.close();
out.close();
}
//D-----Decoding(譯碼),對文本進行譯碼(支持含空格的文本)
void decodeText()
{
char inputType, ch = ' ';
string textToBeDecoded, decodeString = "";
cout << "您有如下兩種方式提供待譯碼文本:" << endl << endl;
cout << "1-----讀取文件CodeFile.txt中的待譯碼文本;" << endl;
cout << "2-----讀取實時輸入的待譯碼文本;" << endl;
printLine();
cout << "您選擇方式: " << endl;
cin >> inputType;
if (inputType == '1')
{
ifstream in("D:\\CodeFile.txt", ios::in);
if (!in.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
ofstream out("D:\\TextFile.txt", ios::out);
if (!out.is_open())
{
cerr << "文件打開失敗!" << endl;
exit(1);
}
cin.get();//喫掉回車
while ((ch = in.get()) != EOF)
{
textToBeDecoded = textToBeDecoded + ch;
}
int m = 2 * hfmSize - 1;
for (int i = 0; i < textToBeDecoded.length(); i++)
{
if (textToBeDecoded[i] == '0')
{
m = hfmTree[m].leftChild;
//如果已經走到哈夫曼樹的葉結點
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
else if (textToBeDecoded[i] == '1')
{
m = hfmTree[m].rightChild;
//如果已經走到哈夫曼樹的葉結點
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
}
out << decodeString;
cout << "該段文本被譯碼後寫入文件TextFile.txt成功!" << endl;
in.close();
out.close();
}
else if (inputType == '2')
{
cout << "請您輸入待譯碼文本:" << endl;
cin.get();//喫掉回車
while (cin.get(ch) && ch != '\n')
{
textToBeDecoded = textToBeDecoded + ch;
}
int m = 2 * hfmSize - 1;
for (int i = 0; i < textToBeDecoded.length(); i++)
{
if (textToBeDecoded[i] == '0')
{
m = hfmTree[m].leftChild;
//如果已經走到哈夫曼樹的葉結點
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
else if (textToBeDecoded[i] == '1')
{
m = hfmTree[m].rightChild;
//如果已經走到哈夫曼樹的葉結點
if (hfmTree[m].leftChild == 0)
{
decodeString = decodeString + characters[m];
m = 2 * hfmSize - 1;
}
}
}
cout << "該段文本被譯碼爲如下:" << endl;
cout << decodeString << endl;
}
}
int main(void)
{
bool back = true;
char handle, choice;
while (back)
{
system("cls");
cout << "********Welcome to use the Huffman Encoding System!!!********" << endl << endl;
cout << "\t" << "I-----Initialization(初始化、建立哈夫曼樹)" << endl << endl;
cout << "\t" << "T-----Tree printing(打印哈夫曼樹)" << endl << endl;
cout << "\t" << "C-----Huffman code printing(打印哈夫曼編碼)" << endl << endl;
cout << "\t" << "E-----Encoding(編碼)" << endl << endl;
cout << "\t" << "P-----Print(打印編碼文件)" << endl << endl;
cout << "\t" << "D-----Decoding(譯碼)" << endl << endl;
cout << "\t" << "Q-----Quit(退出)" << endl << endl;
cout << endl;
cout << "請輸入您想進行的操作: ";
cin >> handle;
switch (handle)
{
case 'I':
{
system("cls");
init();
cout << endl;
cout << "是否返回主菜單? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'T':
{
system("cls");
if (hfmTree == nullptr) initByFile();
PrintHfmTree();
cout << endl;
cout << "是否返回主菜單? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'C':
{
system("cls");
if (hfmTree == nullptr) initByFile();
printHfmCodeStrings();
cout << endl;
cout << "是否返回主菜單? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'E':
{
system("cls");
if (hfmTree == nullptr) initByFile();
encodeText();
cout << endl;
cout << "是否返回主菜單? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'P':
{
system("cls");
printEncodeFile();
cout << endl;
cout << "是否返回主菜單? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'D':
{
system("cls");
if (hfmTree == nullptr) initByFile();
decodeText();
cout << endl;
cout << "是否返回主菜單? Y/N" << endl;
cin >> choice;
if (choice == 'Y') back = true;
else exit(1);
break;
}
case 'Q':
{
system("cls");
exit(1);
break;
}
}
}
return 0;
}
(經測試,以上代碼可以正常運行)