huffman编码实现

大概思路:

1.计算目标文件A中的字符出现频率,从而确定权值

2.由字符权值构建huffman树,结构体数组作为此树的数据结构

3.由huffman树将目标文件A中所出现的每个字符编码

4.用字符编码将目标文件编码到新文件B

5.将编码文件B译码为文件C

/***************************************************************************
*   Copyright (C) 2006 by Lingkun         *
*   [email protected]            *
*           *
*   文件名称：huffman_code.c       *
*           *
*   摘要：利用Huffman编码目标文件,并完成译码                                *
*              *
*   使用方法: huffman_code.exe   待编码文件路径编码文件路径译码文件路径 *
*           *
***************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define N 1000

typedef struct { //字符频率结构体数组
char data;
int count;
float frequent;
} character ;

typedef struct { //huffman树数组
char data;
float weight;
int lchild, rchild, parent;
} hufmtree;

typedef char **hufmcode; //huffman编码列表

hufmtree *Huffman_tree(int n, character *ch);
hufmcode Huffman_Coding(hufmtree *ht, int n);
character *Frequent_of_character(FILE *fp);
int prn_char(character *ch);
void prn_tree(hufmtree *ht);
FILE *file_open(const char *dir);
void Coding_file(hufmcode hc, const char *dir, const char *dir_code);
void Uncoding_file(hufmtree *ht, const char *dir_code, const char *dir_re, int m);

int main(int argc, char *argv[])
{
FILE *fp;
character *ch;
hufmtree *ht;
hufmcode hc;
int m, n;

const char *dir = argv[1];   //待编码文件路径
const char *dir_code = argv[2];  //编码文件路径
const char *dir_re = argv[3];  //译码文件路径

fp = file_open(dir);
ch = Frequent_of_character(fp);
n = prn_char(ch);
m = 2 * n - 1;

ht = Huffman_tree(n, ch);
prn_tree(ht);
hc = Huffman_Coding(ht, n);

Coding_file( hc, dir, dir_code);
Uncoding_file(ht, dir_code, dir_re, m);
fclose(fp);

return 0;
}

hufmtree *Huffman_tree(int n, character *ch)
{//按照字符出现频率构建huffman树
hufmtree *ht;
int   i, j, p1, p2, m;
float   small1, small2;

m = 2 * n - 1;  //所有结点数=叶结点*2 - 1
ht = (hufmtree *)malloc((m + 1) * sizeof(hufmtree)); //huffman树,(m+1)表示从1开始
if(ht == NULL) printf("Memory allocate error./n");
for(i = 1; i <= m ; i++) {     //huffman树所有结点初始化
  ht[i].data = ch[i].data;    //对应字符赋值
  ht[i].weight = 0.0;     //初始为0
  ht[i].lchild = ht[i].rchild = ht[i].parent = 0;
}
putchar('/n');
for(i = 1; i <= n; i++){    //叶子结点权值初始化
  ht[i].weight = ch[i].frequent;
}
printf("构建哈夫曼树/n");
printf("叶子结点个数 n = %d, 总结点个数 m = %d/n", n, m);
for(i = n + 1; i <= m; i++){  //构建huffman树
  p1 = 0;
  p2 = 0;
  small1 = small2 = 1;
  for(j = 1; j <= i - 1; j++){   //查找最小,次小元素
   if(ht[j].parent == 0){
    if(ht[j].weight < small1){
     small2 = small1;
     small1 = ht[j].weight;
     p2 = p1;
     p1 = j;
    }
    else if(ht[j].weight < small2){
     small2 = ht[j].weight;
     p2 = j;
    }
   }
  }
  ht[p1].parent = i ;  //最小元素父结点位置赋值
  ht[p2].parent = i ;  //次小元素父结点位置赋值
  ht[i].lchild = p1 ;  //父结点的左孩子为最小元素
  ht[i].rchild = p2 ;  //父结点的右孩子为次小元素
  ht[i].weight = ht[p1].weight + ht[p2].weight; //父结点权值=次小权值+最小权值
}
return ht;
}

hufmcode Huffman_Coding(hufmtree *ht, int n)
{//huffman编码
int i, c, start, f;
hufmcode hc;
char *cd;

hc = (hufmcode)malloc((n + 1) * sizeof(char *)); //编码列表初始化
cd = (char *)malloc(n * sizeof(char));   //临时编码列表
cd[n - 1] = '/0';
printf("/n将以上字符进行哈夫曼编码:/n");
for(i = 1; i <= n; i++){    //将字符进行哈夫曼编码
  start = n - 1;
  for(c = i, f = ht[i].parent; f != 0; c = f, f = ht[f ].parent){
   if(ht[f].lchild == c) { cd[--start] = '0'; }
   else { cd[--start] = '1'; }
  }
  hc[i] = (char *)malloc((n - start) * sizeof(char));
  strcpy( &hc[i][1], &cd[start]);  //将临时编码复制到编码列表相应位置
  hc[i][0] = ht[i].data;    //编码列表首位置存储字符
}
putchar('/n');
for(i = 1; i <= n; i++){
  printf("hc[%d]: %c    %s/n", i, hc[i][0], &hc[i][1]);
}
return hc;
}

character *Frequent_of_character(FILE *fp)
{//计算字符出现频率
int i, last, length = 0;
character *ch;
char c;

ch = (character *)malloc( N * sizeof(character));
printf("/n/n打印原文:/n");
while( (c = fgetc(fp)) != EOF) {
  printf("%c", c);
  length++;
}
printf("/n/n原文件长度:/n");
printf("/nlength = %d/n/n", length);

fseek(fp, 0, SEEK_SET);
last = 0;    //当前字符种类数
while((c = fgetc(fp)) != EOF) {
  i = 0;
  while( c != ch[i].data && i <= last && last <= N) { //判断是否有新字符出现OR字符种类是否大于N(溢出)
   i++;
  }
  if(i > last){  //出现新字符
   last = i;
   ch[last].data = c;
   ch[last].count++;
  }
  else if( c == ch[i].data){ //已出现字符
   ch[i].count++;
  }
  else {   //字符种类大于N
   printf("字符种类大于N./n");
  }
}
for(i = 1; ch[i].count != 0; i++){  //计算每个字符出现频率,作为权值
  ch[i].frequent = (float)ch[i].count / length;
}
return ch;
}

int prn_char(character *ch)
{//打印字符出现情况
int i, count;

printf("字符个数及其出现频率:/n");
printf("序列号字符计数频率/n");
for(i = 1; ch[i].count != 0; i++){
printf("ch[%d] : %c %d %f ./n", i, ch[i].data, ch[i].count, ch[i].frequent);
}
count = i - 1;
return count;
}

FILE *file_open(const char *dir)
{//文件打开函数
FILE *fp;

fp = fopen( dir, "r" );
if(fp == NULL) printf("File %s open Error/n", dir);
return fp;
}

void prn_tree(hufmtree *ht)
{//打印huffman树
int i;

printf("/n打印哈夫曼树:/n");
printf("序列号字符权值左孩子右孩子双亲/n");
for(i = 1; (ht[i].parent != 0 || ht[i].lchild != 0) && ht[i].weight != 0.0; i++) {
printf("ht[%d] : %c %f lchild: %d rchild: %d parent: %d/n", i, ht[i].data, ht[i].weight, ht[i].lchild, ht[i].rchild, ht[i].parent );
}
}

void Coding_file(hufmcode hc, const char *dir, const char *dir_code)
{//将文件dir编码
int i;
FILE *fp1, *fp2;
char c;

fp1 = file_open( dir);
fp2 = fopen( dir_code, "a+" );
if(fp2 == NULL) printf("File %s open ERROR./n", dir_code);

printf("将原文件编码:/n");
while((c = fgetc(fp1)) != EOF) {
  for( i = 1; hc[i][0] != c; i++) ;  //在编码列表中查找字符
  fputs( &hc[i][1], fp2);    //将字符对应的编码写入fp2指向的文件
  printf("%s", &hc[i][1]);
}
fclose(fp1);
fclose(fp2);
}

void Uncoding_file(hufmtree *ht, const char *dir_code, const char *dir_re, int m)
{//将已编码文件译码
int i;
char c;
FILE *fp1, *fp2;

i = m;   //从树根开始
fp1 = file_open( dir_code);  //已编码文件
fp2 = fopen( dir_re , "a+"); //译码后文件
if(fp2 == NULL) printf("File %s open error./n", dir_re);

c = fgetc(fp1);   //读取一个编码字符
printf("/n将编码文件翻译成明文:/n");
while(c != EOF) {  //将编码文件翻译成明文
  if(c == '0') {
   i = ht[i].lchild ;
  }
  else if(c == '1') {
   i = ht[i].rchild ;
  }
  else printf("Uncoding ERROR./n");
  if(ht[i].lchild == 0) {  //走到叶子结点,将其对应字符写入fp2指向的文件
   putchar(ht[i].data);
   fputc( ht[i].data, fp2);
   i = m;   //重新从树根开始
  }
  c = fgetc(fp1);
}
fclose(fp1);
fclose(fp2);
}

这是上学期数构课的学习成果,现在翻出来回顾一下,还是颇复杂的,呵呵

amonkun

发布了25 篇原创文章 · 获赞 1 · 访问量 4万+

私信关注

huffman编码实现

《Python进阶》学习笔记

Leetcode 3161. 物块放置查询

leetcode 60 排列序列

一个docker容器暴露多个端口

微服务实践之使用 Visual Studio 2022 调试Dapr 应用程序

wpf附加属性理解 WPF附加属性

關於地址的迷思....

程序人生:一個程序員的奮鬥歷程

深圳兩青年在伊拉克炮火中淘金400萬

學習筆記(變量&基本類型)

操操操

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結