最近一段時間抽空看了霍夫曼壓縮和解壓縮的算法,覺得挺巧妙的,就決心自己動手用C實現,沒想到算法看看挺簡單的,實際都是做起來處處都需要仔細考慮纔行,折騰了一週左右終於調試通過了,收穫挺大的,付出總是有回報的!
閒話少說,下面就貼上我的實現代碼。霍夫曼壓縮解壓縮算法需要用到位操作和堆的相關函數。堆的實現在我前面的博客裏已經給出實現代碼了,這裏就不重複貼了。
1.位實現代碼(轉載的,感謝!)
/*bit.c 位操作的實現*/
#include <stdlib.h>
/*bit_get 獲取緩衝區bits中處於pos位的狀態*/
int bit_get(const unsigned char *bits, int pos)
{
unsigned char mask;
int i;
/*設置掩碼*/
mask = 0x80;
for(i=0; i<(pos % 8); i++)
mask = mask >> 1;
/*用位與運算獲取對應的位*/
return (((mask & bits[(int)(pos / 8)]) == mask)? 1:0);
}
/*bit_set 設置緩衝區bits中位於pos位的狀態*/
void bit_set(unsigned char *bits, int pos, int state)
{
unsigned char mask;
int i;
/*設置掩碼*/
mask = 0x80;
for(i=0; i<(pos % 8); i++)
mask=mask>>1;
/*依據state設置位*/
if(state)
bits[pos/8] = bits[pos/8] | mask;
else
bits[pos/8] = bits[pos/8] & (~mask);
return;
}
/*bit_xor 按位異或運算*/
void bit_xor(const unsigned char *bits1,const unsigned char *bits2,unsigned char *bitsx,int size)
{
int i;
/*計算兩個緩衝區的按位異或*/
for(i=0; i<size; i++)
{
if(bit_get(bits1,i) != bit_get(bits2,i))
bit_set(bitsx,i,1);
else
bit_set(bitsx,i,0);
}
return;
}
/*bit_rot_left 輪轉緩衝區bits(含size位),將位值向左移count位*/
void bit_rot_left(unsigned char *bits,int size,int count)
{
int fbit,lbit,i,j;
/*將緩衝區向左輪轉指定位數*/
if(size > 0)
{
for(j=0; j<count; j++)
{
for(i=0; i<=((size-1)/8); i++)
{
/*獲得要從當前字節偏移的位*/
lbit = bit_get(&bits[i],0);
if(i==0)
{
/*保存要從首字節移動到後面的位*/
fbit = lbit;
}
else
{
/*將前一字節最右邊的位設置爲當前字節最左邊的位*/
bit_set(&bits[i-1],7,lbit);
}
/*將當前字節向左移動*/
bits[i] = bits[i] << 1;
}
/*將緩衝區最右邊的位設置爲從第一個字節偏移的位*/
bit_set(bits,size-1,fbit);
}
}
return;
}
void bit_print(unsigned char *bits,int bit_size)
{
for(int i = 0;i < bit_size;i++)
{
int val = bit_get(bits,i);
printf("%d",val);
if(i % 5 == 0 && i > 0)printf("\n");
}
printf("\t\n");
return;
}
霍夫曼相關數據結構:
#define HUFFMAN_VER 0xAA
#define HUFFMAN_MAX_SYM 256
#define HUFFMAN_LEAF_NODE 1
#define HUFFMAN_TRUNK_NODE 2
typedef struct _huffman_node
{
HEAP_NODE heap_node;
char sym;
int freq;
int type;
unsigned short bits;
int bits_size;
struct _huffman_node *left;
struct _huffman_node *right;
struct _huffman_node *parent;
} HUFFMAN_NODE;
typedef struct _huffman_sym_entry
{
char sym;
int freq;
unsigned short code;
int code_size;
} HUFFMAN_SYM_ENTRY;
typedef struct _huffman_sym_save_entry
{
char sym;
char reserverd[3];
int freq;
}HUFFMAN_SYM_SAVE_ENTRY;
typedef struct _huffman_file_header
{
int version;
int bit_size;
int sym_tbl_entry_num;
HUFFMAN_SYM_SAVE_ENTRY sym_save_tbl[0];
char reserved[8];
}HUFFMAN_FILE_HEADER;
void huffman_encode(char *file);
void huffman_decode(char *huffman_file,char *decoded_file);
void huffman_test(void);
霍夫曼實現代碼:
/*
This file implement Huffman compress and decompress algorithm
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <assert.h>
#include "define.h"
#include "data_struct.h"
static HUFFMAN_SYM_ENTRY huffman_symtbl[HUFFMAN_MAX_SYM] = {0};
static HEAP *huffman_heap = NULL;
static HUFFMAN_NODE huffman_tree;
static int huffman_sym_num;
static void huffman_code_print(unsigned short code,int size)
{
int bit;
printf("size=%d\t",size);
for(bit = 0; bit < size; bit++)
{
printf("%d",bit_get(&code,bit));
}
printf("\n");
return;
}
static void huffman_node_print(HUFFMAN_NODE *node)
{
printf("sym:%c,NF=%d,type=%d\n",node->sym,node->heap_node.pri,node->type);
huffman_code_print(node->bits,node->bits_size);
return;
}
static void huffman_tree_print(HUFFMAN_NODE *root)
{
if(!root)
return;
if(root->type == HUFFMAN_LEAF_NODE)
{
if(root->parent)
printf("%c(%c),NF=%d,",root->sym,root->parent->sym,root->heap_node.pri);
else
printf("%c(ROOT),NF=%d,",root->sym,root->heap_node.pri);
huffman_code_print(root->bits,root->bits_size);
bit_print(&root->bits,root->bits_size);
}
if(root->left)
huffman_tree_print(root->left);
if(root->right)
huffman_tree_print(root->right);
return;
}
static void huffman_symtbl_print(void)
{
int i;
printf("total sym num=%d\n",huffman_sym_num);
for(i = 0; i < HUFFMAN_MAX_SYM; i++)
{
if(huffman_symtbl[i].freq)
{
printf("%c(%x),freq=%d,code=%x,code_size=%d\n",huffman_symtbl[i].sym,(unsigned char)huffman_symtbl[i].sym,huffman_symtbl[i].freq,huffman_symtbl[i].code,huffman_symtbl[i].code_size);
bit_print(&huffman_symtbl[i].code,huffman_symtbl[i].code_size);
}
}
return;
}
static HUFFMAN_NODE * huffman_node_new(void)
{
HUFFMAN_NODE *node = malloc(sizeof(*node));
memset(node,0,sizeof(*node));
return node;
}
static void huffman_symtbl_build(char *buf)
{
char *p = buf;
while(*p)
{
unsigned char ch = (unsigned char)*p;
if(huffman_symtbl[ch].freq == 0)huffman_sym_num++;
huffman_symtbl[ch].sym = *p;
huffman_symtbl[ch].freq++;
p++;
}
return;
}
void huffman_heap_init(void)
{
int i;
huffman_heap = heap_init(HEAP_SMALL,sizeof(HUFFMAN_NODE),512);
for(i = 0; i < HUFFMAN_MAX_SYM; i++)
{
if(huffman_symtbl[i].freq)
{
HUFFMAN_NODE node;
node.sym = huffman_symtbl[i].sym;
node.freq = node.heap_node.pri = huffman_symtbl[i].freq;
node.type = HUFFMAN_LEAF_NODE;
node.left = node.right = NULL;
heap_insert(huffman_heap,&node);
}
}
return;
}
static void huffman_tree_merge(HUFFMAN_NODE *tree_parent,HUFFMAN_NODE *tree_l,HUFFMAN_NODE *tree_r)
{
static char new_sym = '@';
memset(tree_parent,0,sizeof(*tree_parent));
tree_parent->type = HUFFMAN_TRUNK_NODE;
tree_parent->sym = new_sym;
tree_parent->heap_node.pri = tree_l->heap_node.pri + tree_r->heap_node.pri;
tree_parent->left = tree_l;
tree_parent->right = tree_r;
tree_l->parent = tree_r->parent = tree_parent;
return;
}
static void huffman_tree_build(void)
{
int rc1,rc2;
HUFFMAN_NODE *node_l,*node_r,*node_parent;
/* 1. init huffman heap */
huffman_heap_init();
/* 2. begin build tree */
while(1)
{
if(HEAP_NODE_NUM(huffman_heap) < 2)
break;
node_l = huffman_node_new();
node_r = huffman_node_new();
node_parent = huffman_node_new();
rc1 = heap_get(huffman_heap,node_l);
rc2 = heap_get(huffman_heap,node_r);
huffman_tree_merge(node_parent,node_l,node_r);
heap_insert(huffman_heap,node_parent);
}
rc1 = heap_get(huffman_heap,&huffman_tree);
assert(rc1 == 0);
return;
}
static void huffman_symtbl_code(HUFFMAN_NODE *node,unsigned short bits,int bits_size)
{
int bit;
assert(node);
node->bits = bits;
node->bits_size = bits_size;
if(node->sym != '@')assert(huffman_symtbl[node->sym].code == 0);
huffman_symtbl[node->sym].code = bits;
huffman_symtbl[node->sym].code_size = bits_size;
bit = bits_size;
bits_size++;
if(node->left)
{
unsigned short l_bits = bits;
bit_set(&l_bits,bit,0);
huffman_symtbl_code(node->left,l_bits,bits_size);
}
if(node->right)
{
unsigned short r_bits = bits;
bit_set(&r_bits,bit,1);
huffman_symtbl_code(node->right,r_bits,bits_size);
}
return;
}
static void huffman_char2code(unsigned char *p,int start_bit,unsigned short code,int code_size)
{
int bit;
for(bit = 0;bit < code_size;bit++)
{
unsigned char bit_val = bit_get(&code,bit);
bit_set(p,bit+start_bit,bit_val);
}
return;
}
unsigned char* huffman_buf_encode(char *buf,int size,int *buf_bit_size)
{
assert(size > 0);
unsigned char *buf_encoded = malloc(size);
int start_bit = 0;
int i;
for(i = 0;i < size;i++)
{
char ch = buf[i];
huffman_char2code(buf_encoded,start_bit,huffman_symtbl[ch].code,huffman_symtbl[ch].code_size);
start_bit += huffman_symtbl[ch].code_size;
}
*buf_bit_size = start_bit;
return buf_encoded;
}
static void huffman_file_create(unsigned char *buf,int bit_size)
{
int encoded_size;
HUFFMAN_FILE_HEADER *file_header = NULL;
int header_size = sizeof(HUFFMAN_FILE_HEADER) + huffman_sym_num*sizeof(HUFFMAN_SYM_SAVE_ENTRY);
assert(bit_size > 0);
encoded_size = bit_size/8;
if(bit_size % 8)
encoded_size++;
file_header = (HUFFMAN_FILE_HEADER *)malloc(header_size);
assert(file_header);
file_header->version = HUFFMAN_VER;
file_header->bit_size = bit_size;
printf("version=%x,bit size=%d\n",HUFFMAN_VER,bit_size);
file_header->sym_tbl_entry_num = huffman_sym_num;
int j = 0;
for(int i = 0;i < HUFFMAN_MAX_SYM;i++)
{
if(huffman_symtbl[i].freq == 0)continue;
file_header->sym_save_tbl[j].sym = huffman_symtbl[i].sym;
file_header->sym_save_tbl[j].freq = huffman_symtbl[i].freq;
j++;
printf("encoded sym=%c,freq=%d\n",huffman_symtbl[i].sym,huffman_symtbl[i].freq);
}
assert(j == huffman_sym_num);
file_write("huffman_file",file_header,header_size);
file_append("huffman_file",buf,encoded_size);
return;
}
void huffman_encode(char *file)
{
int size;
char *buf_encoded = NULL;
/* 1. read file into buf */
char *buf = file_read(file,&size);
assert(buf);
/* 2.scan buf into huffman_symtbl */
huffman_symtbl_build(buf);
/* 3. build huffman tree */
huffman_tree_build();
/* 4. set code for each sym */
unsigned short bits = 0;
huffman_symtbl_code(&huffman_tree,bits,0);
/* 5. begin compress buf with the code */
int total_bit_size;
buf_encoded = huffman_buf_encode(buf,size,&total_bit_size);
/* 6. write encoded buf into HUFFMAN file */
huffman_file_create(buf_encoded,total_bit_size);
huffman_tree_print(&huffman_tree);
free(buf);
return;
}
static char huffman_decode_char(HUFFMAN_NODE *node,char *pbits,int pos,int *char_bits)
{
int bit_value;
char result;
assert(node);
if(node->type == HUFFMAN_LEAF_NODE)
{
*char_bits = node->bits_size;
return node->sym;
}
bit_value = bit_get(pbits,pos);
pos++;
if(bit_value)
result = huffman_decode_char(node->right,pbits,pos,char_bits);
else
result = huffman_decode_char(node->left,pbits,pos,char_bits);
return result;
}
static void huffman_symtbl_decoded(HUFFMAN_FILE_HEADER *file_header)
{
assert(file_header);
huffman_sym_num = file_header->sym_tbl_entry_num;
for(int i = 0;i < huffman_sym_num;i++)
{
char sym = file_header->sym_save_tbl[i].sym;
printf("decoded sym=%c,freq=%d\n",file_header->sym_save_tbl[i].sym,file_header->sym_save_tbl[i].freq);
huffman_symtbl[sym].sym = file_header->sym_save_tbl[i].sym;
huffman_symtbl[sym].freq = file_header->sym_save_tbl[i].freq;
}
return;
}
void huffman_decode(char *huffman_file,char *decoded_file)
{
int buf_size,bit_size;
int decoded_buf_size;
char *buf = file_read(huffman_file,&buf_size);
assert(buf && buf_size > 0);
HUFFMAN_FILE_HEADER *file_header = (HUFFMAN_FILE_HEADER *)buf;
huffman_symtbl_decoded(file_header);
huffman_tree_build();
int header_size = sizeof(HUFFMAN_FILE_HEADER) + huffman_sym_num*sizeof(HUFFMAN_SYM_SAVE_ENTRY);
unsigned short bits = 0;
huffman_symtbl_code(&huffman_tree,bits,0);
huffman_tree_print(&huffman_tree);
bit_size = file_header->bit_size;
decoded_buf_size = bit_size/8 *10; //suppose compressed at max 10%
printf("decoded bit size=%d\n",bit_size);
char *decoded_buf = malloc(decoded_buf_size);
assert(decoded_buf);
char *pbits = buf + header_size;
int bit_pos = 0;
int decoded_char_num = 0;
bit_print(pbits,120);
while(bit_size > 0)
{
int char_bits = 0;
assert(decoded_char_num < decoded_buf_size-1);
decoded_buf[decoded_char_num] = huffman_decode_char(&huffman_tree,pbits,bit_pos,&char_bits);
printf("decoded char=%c\n",decoded_buf[decoded_char_num]);
decoded_char_num++;
bit_size -= char_bits;
bit_pos += char_bits;
}
decoded_buf[decoded_char_num] = '\0';
printf("decode finished!decoded file size=%d,mallocd size=%d\n",decoded_char_num,decoded_buf_size);
/* write to file */
file_write(decoded_file,decoded_buf,decoded_char_num);
free(decoded_buf);
return;
}