目錄
問題
實驗內容:
實現文件中數據的加解密與壓縮:將硬盤上的一個文本文件進行加密,比較加密文件和原始文件的大小差別;對加密文件進行解密,比較原始文件和解碼文件的內容是否一致。
實驗說明:
1.輸入和輸出:
(1)輸入:硬盤上給定的原始文件及文件路徑。
(2)輸出:
-
- 硬盤上的加密文件及文件路徑;
- 硬盤上的解碼文件及文件路徑;
- 原始文件和解碼文件的比對結果。
2.實驗要求:
- 提取原始文件中的數據(包括中文、英文或其他字符),根據數據出現的頻率爲權重,構建Huffman編碼表;
- 根據Huffman編碼表對原始文件進行加密,得到加密文件並保存到硬盤上;
- 將加密文件進行解密,得到解碼文件並保存點硬盤上;
- 比對原始文件和解碼文件的一致性,得出是否一致的結論。
正文
說實話你這麼搞,本來教的就不多,還實驗弄得這麼難 ,還要把中文編碼也弄上,我也是服了,我表示心力交瘁、以下這篇代碼是一位大佬得(佩服佩服,實在牛逼),我借鑑來得,並且加上了我自己對代碼所理解得註釋,希望能有所幫助,指針真的指得我頭疼。
你需要了解的是提一下這個中文編碼,中文編碼一般在utf-8編碼中得位數並不是固定不變得一般是3個字節到4個字節,所以使用GBK編碼,在GBK編碼中文只需要兩個字節,一個高位,一個低位。
運行代碼時,需要一個txt文件,你需要把文件設置成系統默認的ANSI編碼模式,也就是GBK編碼模式,
系統默認編碼,
這是你的使用你自己的txt文件的編碼模式
文本的路徑
代碼運行界面
然後會在你存放你txt文件的目錄下出現這兩個txt文本一個是加密文本,一個是解碼文本
程序繼續執行,如下圖
原文件
加密文件
解碼文件
原文件和解碼文件對比
代碼以及註釋
詳情見註釋
#include <stdio.h>
#include<string.h>
#include<stdlib.h>
typedef struct node *LIST;
typedef struct node *TREE;
struct node
{
int high;//高位ascll
int low;//判斷是否是中文,低位
int weight;//權值
int code_index;//編碼指針
char code[100];//編碼數組
TREE Left;//左孩子
TREE Right;//右孩子
LIST NEXT;//下一個
};
void insert(LIST head, LIST tmp);//有序插入結點
LIST find_and_insert(LIST head, LIST tmp);//彈出內部的結點,然後調用insert函數
void output(LIST head);//輸出這個讀取文件的所有字符統計情況
LIST init_LIST(int high, int low, int weight);//初始化鏈表信息
TREE tree_node_init(int high, int low, int weight);//初始化哈夫曼樹各個結點
TREE build_tree(LIST head);//建立哈夫曼樹
void print_huffman_pre(TREE Tree, int flag);//前序輸出,flag 控制是否打印信息
void update_tree(TREE Tree);//更新樹的信息,也即更新編碼信息
void save_file(TREE *a, int right, TREE Tree);//保存文件
void to_free_tree(TREE Tree);//釋放樹
void to_free_list(LIST head);//釋放鏈表
void coding();//編碼
void decoding();//譯碼
TREE queue[1000];//樹的隊列
int queue_index = 0;
int sum_bit_coding = 0;//編碼bit
int sum_bit_decoding = 0;//解碼bit
char file_in[100] = "D:\\file.txt";//路徑
char file_out[100];
void init_main();
void update_file_out(char file_in[])//
{
int i;
for(i = strlen(file_in) - 1; i >= 0; i--)//先規範化輸入的路徑
if(file_in[i] == '\\')
break;
int j;
for(j = 0; j <= i; j++)
file_out[j] = file_in[j];//找出輸出的父目錄
}
int main()
{
init_main();
return 0;
}
void init_main()
{
LIST head = init_LIST(0, 0, 0);
FILE *P;
while(1)
{
printf("爲了結果的準確性,請輸入GBK格式的txt文件\n");
printf("請輸入需要執行的文件及路徑\n例如\nD:\\file.txt\n");
gets(file_in);//輸入文件名
P = fopen(file_in,"r");
if (P==NULL)
{
printf("文件打開失敗\n請重新輸入\n");
continue;
}
else
{
printf("打開成功\n");
break;
}
}
update_file_out(file_in);//找出文件路徑
int ch;
while((ch = fgetc(P)) != EOF)//一個一個字符的讀取
{
LIST tmp = init_LIST(ch, -1, 1);//高位爲字符,低位,權值
if(ch > 128)//如果大於128,是中文讀取低位
tmp->low = fgetc(P);//讀取低位
insert(head, find_and_insert(head, tmp));//傳入的是頭節點
}
//output(head);
TREE Tree = build_tree(head);
coding(Tree);//建樹
print_huffman_pre(Tree->NEXT, 0);//輸出樹的信息
update_tree(Tree);//
queue_index = 0;
print_huffman_pre(Tree->NEXT, 0);
decoding();
fclose(P);
while(1)
{
int flag;
printf("請選擇操作命令-> \n \t1:統計信息;\n\t2:詞頻統計;\n\t3:編碼詳情;\n\t4:文件輸出信息;\n\t5:return\n");
scanf("%d", &flag);
switch (flag)
{
case 1:
printf("-----------------------------------\n");
printf("文件已經保存,共寫入%d比特\n", sum_bit_coding);
printf("從文件讀取%d比特\n", 8 * (Tree->high + Tree->low * 2));
printf("壓縮率是%.3f\n",1.0 * sum_bit_coding / 8 / (Tree->high + Tree->low * 2));
printf("共寫入解碼文件%d比特\n", sum_bit_decoding);
printf("-----------------------------------\n");
break;
case 2:
output(head);
break;
case 3:
print_huffman_pre(Tree->NEXT, 1);
break;
case 4:
{
//路徑coding
char coding_file_name[100];
strcpy(coding_file_name, file_out);
strcat(coding_file_name, "coding.txt");
//路徑encoding
char encoding_file_name[100];
strcpy(encoding_file_name, file_out);
strcat(encoding_file_name, "encoding.txt");
printf("輸入文件的路徑爲%s\n", file_in);
printf("加密文件的路徑爲%s\n", coding_file_name);
printf("解碼文件的路徑爲%s\n", encoding_file_name);
break;
}
case 5:
return;
}
}
to_free_tree(Tree);
to_free_list(head);
}
void decoding()
{
sum_bit_decoding = 0;
//路徑coding
char coding_file_name[100];
strcpy(coding_file_name, file_out);
strcat(coding_file_name, "coding.txt");
//路徑encoding
char encoding_file_name[100];
strcpy(encoding_file_name, file_out);
strcat(encoding_file_name, "encoding.txt");
FILE *in = fopen(coding_file_name, "r");
FILE *out = fopen(encoding_file_name, "wb");
int ch;
int str_index = 0, left;
char str[100];
while((ch = fgetc(in)) != EOF)
{
str[str_index++] = ch;
str[str_index] = '\0';
for(left = 0; left < queue_index; left++)
{
if(strcmp(queue[left]->code, str) == 0)
{
//統計bits
if(queue[left]->high > 128) sum_bit_decoding += 16;
else sum_bit_decoding += 8;
if((char)queue[left]->high == '\n')//對換行符進行特判
{
fprintf(out, "\r\n");
}
else
{
fprintf(out, "%c", queue[left]->high);
if(queue[left]->high > 128) fprintf(out, "%c", queue[left]->low);//輸出是中文
}
str_index = 0;
break;
}
}
}
fclose(in);
fclose(out);
}
void to_free_list(LIST head)
{
LIST P = head;
while(head->NEXT)
{
P = head->NEXT;
head->NEXT = head->NEXT->NEXT;
free(P);
}
free(head);
}
void to_free_tree(TREE Tree)
{
if(!Tree) return;
to_free_tree(Tree->Left);
to_free_tree(Tree->Right);
free(Tree);
}
void save_file(TREE *a, int right, TREE Tree)//解碼
{
int left;
sum_bit_coding = 0;
FILE *P = fopen(file_in,"r");
//路徑
char coding_file_name[100];
strcpy(coding_file_name, file_out);
strcat(coding_file_name, "coding.txt");
FILE *out = fopen(coding_file_name, "wb");
if (P==NULL)
printf("文件打開失敗\n");
int ch;
while((ch = fgetc(P)) != EOF)
{
LIST tmp = init_LIST(ch, -1, 1);
if(ch > 128)
tmp->low = fgetc(P);
// 查找
for(left = 0; left < right; left++)
{
if(a[left]->high == tmp->high)
{
if(tmp->high > 128 && tmp->low == a[left]->low)
{
fprintf(out, "%s", a[left]->code);
sum_bit_coding += strlen(a[left]->code);
}
if(tmp->high <= 128)
{
fprintf(out, "%s", a[left]->code);
sum_bit_coding += strlen(a[left]->code);
}
}
}
free(tmp);
}
fclose(P);
fclose(out);
}
void update_tree(TREE Tree)//開始編碼
{
TREE a[1000];
int left = 0, right = 0;
if(!Tree) return;
a[right++] = Tree->NEXT;//a[0] = head;
while(left < right)
{
//左
if(a[left]->Left)
{
a[right++] = a[left]->Left;//把左節點編號,right記錄節點的個數
strcpy(a[left]->Left->code, a[left]->code);//將此節點的編號複製給下一個左節點
a[left]->Left->code_index = strlen(a[left]->code);
a[left]->Left->code[a[left]->Left->code_index++] = '0';
//a[left]->Left->code[a[left]->Left->code_index] = '\0';
}
//右
if(a[left]->Right)
{
a[right++] = a[left]->Right;
strcpy(a[left]->Right->code, a[left]->code);
a[left]->Right->code_index = strlen(a[left]->code);
a[left]->Right->code[a[left]->Right->code_index++] = '1';
//a[left]->Right->code[a[left]->Right->code_index] = '\0';
}
left++;
}
save_file(a, right, Tree);
}
TREE tree_node_init(int high, int low, int weight)
{
TREE tmp = (TREE)malloc(sizeof(struct node));
tmp->high = high;
tmp->low = low;
tmp->weight = weight;
strcpy(tmp->code, "\0");
tmp->code_index = 0;
tmp->Right = NULL;
tmp->Left = NULL;
tmp->NEXT = NULL;
return tmp;
}
TREE build_tree(LIST head)
{
//copy head
//Tree->weight用來記錄不重複的字符個數;即鏈表的總長度(頭結點不算);又即記錄當前構造哈夫曼樹的時候的隊列剩餘的結點個數。
TREE Tree = tree_node_init(head->high, head->low, 0);
TREE T = Tree;
LIST P = head->NEXT;
while(P)
{
T->NEXT = tree_node_init(P->high, P->low, P->weight);
T = T->NEXT;
//結點數
Tree->weight++;
P = P->NEXT;
}
//output(Tree);
return Tree;
}
void coding(TREE Tree)
{
while(Tree->weight > 1)
{
TREE t1 = Tree->NEXT;
Tree->NEXT = Tree->NEXT->NEXT;
TREE t2 = Tree->NEXT;
Tree->NEXT = Tree->NEXT->NEXT;
//add t1 and t2 to t; 合併樹
TREE t = tree_node_init(-1, -1, t1->weight + t2->weight);
//左樹
t->Left = t1;
//右樹
t->Right = t2;
insert(Tree, t);
Tree->weight--;
}
}
void print_huffman_pre(TREE Tree, int flag)
{
//遍歷
if(!Tree) return;
if((char)Tree->high == '\n')
{
queue[queue_index++] = Tree;
if(flag)
printf("\\n weight == %d coding = %s\n", Tree->weight, Tree->code);
}
else if(Tree->high > 128)
{
queue[queue_index++] = Tree;
if(flag)
{
putchar(Tree->high);
putchar(Tree->low);
printf(" weight == %d coding = %s\n", Tree->weight, Tree->code);
}
}
else if(Tree->high != -1)
{
queue[queue_index++] = Tree;
if(flag)
{
putchar(Tree->high);
printf(" weight == %d coding = %s\n", Tree->weight, Tree->code);
}
}
print_huffman_pre(Tree->Left, flag);
print_huffman_pre(Tree->Right, flag);
}
LIST find_and_insert(LIST head, LIST tmp)
{
//統計漢字和其它字符的不重複個數
if(tmp->low != -1) head->low++;//中文字符個數
else head->high++;//ascall字符個數
LIST P = head;
while(P->NEXT)
{
//這個字符相同
if(P->NEXT->high == tmp->high)
{
//漢字相同情況
if(P->NEXT->low != -1 && tmp->low != -1 && P->NEXT->low == tmp->low)
{
//取出當前結點
LIST found = init_LIST(P->NEXT->high, P->NEXT->low, P->NEXT->weight + 1);//加權值
//刪除
LIST del = P->NEXT;
P->NEXT = P->NEXT->NEXT;
del->NEXT = NULL;
free(del);
return found;
}
//非漢字統計情況
if(P->NEXT->low == -1 && tmp->low == -1)
{
//取出當前結點
LIST found = init_LIST(P->NEXT->high, P->NEXT->low, P->NEXT->weight + 1);
//刪除
LIST del = P->NEXT;
P->NEXT = P->NEXT->NEXT;
del->NEXT = NULL;
free(del);
return found;
}
}
P = P->NEXT;
}
return tmp;
}
void insert(LIST head, LIST tmp)
{
LIST P = head;
while(P->NEXT)//權值由高到低排列
{
if(tmp->weight < P->NEXT->weight)
break;
P = P->NEXT;
}
//找不到位置,直接插在後面
if(!P->NEXT)
{
P->NEXT = tmp;
return;
}
//insert插入,此刻的位置
tmp->NEXT = P->NEXT;
P->NEXT = tmp;
}
void output(LIST head)
{
LIST P = head->NEXT;
while(P)
{
if((char)P->high == '\n')
{
printf("字符 \\n 個數是%d\t佔用字節爲%d\t總字節爲%d\n", P->weight, P->low == -1 ? 1 : 2, P->weight * (P->low == -1 ? 1 : 2));
P = P->NEXT;
continue;
}
printf("字符 ");
putchar(P->high);
if(P->high > 128)
putchar(P->low);
printf(" 個數是%d\t佔用字節爲%d\t總字節爲%d\n", P->weight, P->low == -1 ? 1 : 2, P->weight * (P->low == -1 ? 1 : 2));
P = P->NEXT;
}
printf("總字節數爲%d\n", head->high + head->low * 2);
}
LIST init_LIST(int high, int low, int weight)
{
LIST tmp = (LIST)malloc(sizeof(struct node));
tmp->high = high;
tmp->low = low;
tmp->weight = weight;
tmp->NEXT = NULL;
return tmp;
}