霍夫曼壓縮和解壓縮C語言實現

  最近一段時間抽空看了霍夫曼壓縮和解壓縮的算法,覺得挺巧妙的,就決心自己動手用C實現,沒想到算法看看挺簡單的,實際都是做起來處處都需要仔細考慮纔行,折騰了一週左右終於調試通過了,收穫挺大的,付出總是有回報的!

  閒話少說,下面就貼上我的實現代碼。霍夫曼壓縮解壓縮算法需要用到位操作和堆的相關函數。堆的實現在我前面的博客裏已經給出實現代碼了,這裏就不重複貼了。

  1.位實現代碼(轉載的,感謝!)

/*bit.c  位操作的實現*/
#include <stdlib.h>


/*bit_get  獲取緩衝區bits中處於pos位的狀態*/
int bit_get(const unsigned char *bits, int pos)
{
    unsigned char  mask;
    int            i;

    /*設置掩碼*/
    mask = 0x80;
    for(i=0; i<(pos % 8); i++)
        mask = mask >> 1;
    /*用位與運算獲取對應的位*/
    return (((mask & bits[(int)(pos / 8)]) == mask)? 1:0);
}

/*bit_set  設置緩衝區bits中位於pos位的狀態*/
void bit_set(unsigned char *bits, int pos, int state)
{
    unsigned char mask;
    int           i;

    /*設置掩碼*/
    mask = 0x80;
    for(i=0; i<(pos % 8); i++)
        mask=mask>>1;

    /*依據state設置位*/
    if(state)
        bits[pos/8] = bits[pos/8] | mask;
    else
        bits[pos/8] = bits[pos/8] & (~mask);

    return;
}
/*bit_xor  按位異或運算*/
void bit_xor(const unsigned char *bits1,const unsigned char *bits2,unsigned char *bitsx,int size)
{
    int i;
    /*計算兩個緩衝區的按位異或*/
    for(i=0; i<size; i++)
    {
        if(bit_get(bits1,i) != bit_get(bits2,i))
            bit_set(bitsx,i,1);
        else
            bit_set(bitsx,i,0);
    }
    return;
}
/*bit_rot_left 輪轉緩衝區bits(含size位),將位值向左移count位*/
void bit_rot_left(unsigned char *bits,int size,int count)
{
    int  fbit,lbit,i,j;

    /*將緩衝區向左輪轉指定位數*/
    if(size > 0)
    {
        for(j=0; j<count; j++)
        {
            for(i=0; i<=((size-1)/8); i++)
            {
                /*獲得要從當前字節偏移的位*/
                lbit = bit_get(&bits[i],0);
                if(i==0)
                {
                    /*保存要從首字節移動到後面的位*/
                    fbit = lbit;
                }
                else
                {
                    /*將前一字節最右邊的位設置爲當前字節最左邊的位*/
                    bit_set(&bits[i-1],7,lbit);
                }

                /*將當前字節向左移動*/
                bits[i] = bits[i] << 1;
            }
            /*將緩衝區最右邊的位設置爲從第一個字節偏移的位*/
            bit_set(bits,size-1,fbit);
        }
    }
    return;
}

void bit_print(unsigned char *bits,int bit_size)
{
    for(int i = 0;i < bit_size;i++)
    {
        int val = bit_get(bits,i);
        printf("%d",val);
        if(i % 5 == 0 && i > 0)printf("\n");
    }

    printf("\t\n");
    return;
}

霍夫曼相關數據結構:

#define HUFFMAN_VER         0xAA
#define HUFFMAN_MAX_SYM 256
#define HUFFMAN_LEAF_NODE   1
#define HUFFMAN_TRUNK_NODE  2

typedef struct _huffman_node
{
    HEAP_NODE heap_node;
    char sym;
    int  freq;
    int  type;
    unsigned short bits;
    int bits_size;
    struct _huffman_node *left;
    struct _huffman_node *right;
    struct _huffman_node *parent;
} HUFFMAN_NODE;

typedef struct _huffman_sym_entry
{
    char                sym;
    int                 freq;
    unsigned short      code;
    int                 code_size;
} HUFFMAN_SYM_ENTRY;

typedef struct _huffman_sym_save_entry
{
    char    sym;
    char    reserverd[3];
    int     freq;
}HUFFMAN_SYM_SAVE_ENTRY;


typedef struct _huffman_file_header
{
    int version;
    int bit_size;
    int sym_tbl_entry_num;
    HUFFMAN_SYM_SAVE_ENTRY  sym_save_tbl[0];
    char reserved[8];
}HUFFMAN_FILE_HEADER;


void    huffman_encode(char *file);
void    huffman_decode(char *huffman_file,char *decoded_file);
void    huffman_test(void);

霍夫曼實現代碼:

/*
    This file implement Huffman compress and decompress algorithm
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <assert.h>
#include "define.h"
#include "data_struct.h"

static HUFFMAN_SYM_ENTRY huffman_symtbl[HUFFMAN_MAX_SYM] = {0};
static HEAP *huffman_heap = NULL;
static HUFFMAN_NODE huffman_tree;
static int huffman_sym_num;


static void huffman_code_print(unsigned short code,int size)
{
    int bit;

    printf("size=%d\t",size);

    for(bit = 0; bit < size; bit++)
    {
        printf("%d",bit_get(&code,bit));
    }

    printf("\n");

    return;
}

static void huffman_node_print(HUFFMAN_NODE *node)
{
    printf("sym:%c,NF=%d,type=%d\n",node->sym,node->heap_node.pri,node->type);
    huffman_code_print(node->bits,node->bits_size);

    return;
}

static void huffman_tree_print(HUFFMAN_NODE *root)
{
    if(!root)
        return;

    if(root->type == HUFFMAN_LEAF_NODE)
    {
        if(root->parent)
            printf("%c(%c),NF=%d,",root->sym,root->parent->sym,root->heap_node.pri);
        else
            printf("%c(ROOT),NF=%d,",root->sym,root->heap_node.pri);

        huffman_code_print(root->bits,root->bits_size);
        bit_print(&root->bits,root->bits_size);
    }

    if(root->left)
        huffman_tree_print(root->left);

    if(root->right)
        huffman_tree_print(root->right);

    return;
}

static void huffman_symtbl_print(void)
{
    int i;

    printf("total sym num=%d\n",huffman_sym_num);

    for(i = 0; i < HUFFMAN_MAX_SYM; i++)
    {
        if(huffman_symtbl[i].freq)
        {
            printf("%c(%x),freq=%d,code=%x,code_size=%d\n",huffman_symtbl[i].sym,(unsigned char)huffman_symtbl[i].sym,huffman_symtbl[i].freq,huffman_symtbl[i].code,huffman_symtbl[i].code_size);
            bit_print(&huffman_symtbl[i].code,huffman_symtbl[i].code_size);
        }
    }

    return;
}

static HUFFMAN_NODE * huffman_node_new(void)
{
    HUFFMAN_NODE *node = malloc(sizeof(*node));

    memset(node,0,sizeof(*node));

    return node;
}


static void huffman_symtbl_build(char *buf)
{
    char *p = buf;

    while(*p)
    {
        unsigned char ch = (unsigned char)*p;

        if(huffman_symtbl[ch].freq == 0)huffman_sym_num++;

        huffman_symtbl[ch].sym = *p;
        huffman_symtbl[ch].freq++;
        p++;
    }

    return;
}

void huffman_heap_init(void)
{
    int i;

    huffman_heap = heap_init(HEAP_SMALL,sizeof(HUFFMAN_NODE),512);

    for(i = 0; i < HUFFMAN_MAX_SYM; i++)
    {
        if(huffman_symtbl[i].freq)
        {
            HUFFMAN_NODE node;

            node.sym = huffman_symtbl[i].sym;
            node.freq = node.heap_node.pri = huffman_symtbl[i].freq;
            node.type = HUFFMAN_LEAF_NODE;
            node.left = node.right = NULL;

            heap_insert(huffman_heap,&node);
        }
    }

    return;
}

static void  huffman_tree_merge(HUFFMAN_NODE *tree_parent,HUFFMAN_NODE *tree_l,HUFFMAN_NODE *tree_r)
{
    static char new_sym = '@';

    memset(tree_parent,0,sizeof(*tree_parent));

    tree_parent->type = HUFFMAN_TRUNK_NODE;
    tree_parent->sym = new_sym;
    tree_parent->heap_node.pri = tree_l->heap_node.pri + tree_r->heap_node.pri;

    tree_parent->left = tree_l;
    tree_parent->right = tree_r;

    tree_l->parent = tree_r->parent = tree_parent;

    return;
}

static void huffman_tree_build(void)
{
    int rc1,rc2;
    HUFFMAN_NODE *node_l,*node_r,*node_parent;

    /* 1. init huffman heap */
    huffman_heap_init();
    /* 2. begin build tree */
    while(1)
    {
        if(HEAP_NODE_NUM(huffman_heap) < 2)
            break;

        node_l = huffman_node_new();
        node_r = huffman_node_new();
        node_parent = huffman_node_new();

        rc1 = heap_get(huffman_heap,node_l);
        rc2 = heap_get(huffman_heap,node_r);

        huffman_tree_merge(node_parent,node_l,node_r);

        heap_insert(huffman_heap,node_parent);
    }

    rc1 = heap_get(huffman_heap,&huffman_tree);
    assert(rc1 == 0);

    return;

}

static void   huffman_symtbl_code(HUFFMAN_NODE *node,unsigned short bits,int bits_size)
{
    int bit;

    assert(node);

    node->bits = bits;
    node->bits_size = bits_size;

    if(node->sym != '@')assert(huffman_symtbl[node->sym].code == 0);

    huffman_symtbl[node->sym].code = bits;
    huffman_symtbl[node->sym].code_size = bits_size;

    bit = bits_size;
    bits_size++;

    if(node->left)
    {
        unsigned short l_bits = bits;
        bit_set(&l_bits,bit,0);
        huffman_symtbl_code(node->left,l_bits,bits_size);
    }

    if(node->right)
    {
        unsigned short r_bits = bits;
        bit_set(&r_bits,bit,1);
        huffman_symtbl_code(node->right,r_bits,bits_size);
    }

    return;
}

static void  huffman_char2code(unsigned char *p,int start_bit,unsigned short code,int code_size)
{
    int bit;

    for(bit = 0;bit < code_size;bit++)
    {
        unsigned char bit_val = bit_get(&code,bit);
        bit_set(p,bit+start_bit,bit_val);
    }

    return;
}


unsigned char* huffman_buf_encode(char *buf,int size,int *buf_bit_size)
{
    assert(size > 0);

    unsigned char *buf_encoded = malloc(size);
    int start_bit = 0;
    int i;

    for(i = 0;i < size;i++)
    {
        char ch = buf[i];
        huffman_char2code(buf_encoded,start_bit,huffman_symtbl[ch].code,huffman_symtbl[ch].code_size);
        start_bit += huffman_symtbl[ch].code_size;
    }

    *buf_bit_size = start_bit;

    return buf_encoded;
}

static  void huffman_file_create(unsigned char *buf,int bit_size)
{
    int encoded_size;
    HUFFMAN_FILE_HEADER *file_header = NULL;
    int header_size = sizeof(HUFFMAN_FILE_HEADER) + huffman_sym_num*sizeof(HUFFMAN_SYM_SAVE_ENTRY);

    assert(bit_size > 0);

    encoded_size = bit_size/8;
    if(bit_size % 8)
        encoded_size++;

    file_header = (HUFFMAN_FILE_HEADER *)malloc(header_size);
    assert(file_header);

    file_header->version = HUFFMAN_VER;
    file_header->bit_size = bit_size;

    printf("version=%x,bit size=%d\n",HUFFMAN_VER,bit_size);

    file_header->sym_tbl_entry_num = huffman_sym_num;
    int j = 0;
    for(int i = 0;i < HUFFMAN_MAX_SYM;i++)
    {
        if(huffman_symtbl[i].freq == 0)continue;

        file_header->sym_save_tbl[j].sym = huffman_symtbl[i].sym;
        file_header->sym_save_tbl[j].freq = huffman_symtbl[i].freq;
        j++;

        printf("encoded sym=%c,freq=%d\n",huffman_symtbl[i].sym,huffman_symtbl[i].freq);
    }

    assert(j == huffman_sym_num);

    file_write("huffman_file",file_header,header_size);
    file_append("huffman_file",buf,encoded_size);

    return;
}



void huffman_encode(char *file)
{
    int size;
    char *buf_encoded = NULL;

    /* 1. read file into buf */
    char *buf = file_read(file,&size);
    assert(buf);
    /* 2.scan buf into huffman_symtbl */
    huffman_symtbl_build(buf);
    /* 3. build huffman tree */
    huffman_tree_build();
    /* 4. set code for each sym */
    unsigned short bits = 0;
    huffman_symtbl_code(&huffman_tree,bits,0);
    /* 5. begin compress buf with the code */
    int total_bit_size;
    buf_encoded = huffman_buf_encode(buf,size,&total_bit_size);
    /* 6. write encoded buf into HUFFMAN file */
    huffman_file_create(buf_encoded,total_bit_size);

    huffman_tree_print(&huffman_tree);

    free(buf);

    return;
}

static char huffman_decode_char(HUFFMAN_NODE *node,char *pbits,int pos,int *char_bits)
{
    int bit_value;
    char result;

    assert(node);

    if(node->type == HUFFMAN_LEAF_NODE)
    {
        *char_bits = node->bits_size;

        return node->sym;
    }

    bit_value = bit_get(pbits,pos);
    pos++;

    if(bit_value)
        result = huffman_decode_char(node->right,pbits,pos,char_bits);
    else
        result = huffman_decode_char(node->left,pbits,pos,char_bits);

    return result;
}

static  void huffman_symtbl_decoded(HUFFMAN_FILE_HEADER *file_header)
{
    assert(file_header);

    huffman_sym_num = file_header->sym_tbl_entry_num;

    for(int i = 0;i < huffman_sym_num;i++)
    {
        char sym = file_header->sym_save_tbl[i].sym;

        printf("decoded sym=%c,freq=%d\n",file_header->sym_save_tbl[i].sym,file_header->sym_save_tbl[i].freq);
        huffman_symtbl[sym].sym = file_header->sym_save_tbl[i].sym;
        huffman_symtbl[sym].freq = file_header->sym_save_tbl[i].freq;
    }

    return;
}


void huffman_decode(char *huffman_file,char *decoded_file)
{
    int buf_size,bit_size;
    int decoded_buf_size;

    char *buf = file_read(huffman_file,&buf_size);
    assert(buf && buf_size > 0);

    HUFFMAN_FILE_HEADER *file_header = (HUFFMAN_FILE_HEADER *)buf;

    huffman_symtbl_decoded(file_header);

    huffman_tree_build();

    int header_size = sizeof(HUFFMAN_FILE_HEADER) + huffman_sym_num*sizeof(HUFFMAN_SYM_SAVE_ENTRY);

    unsigned short bits = 0;
    huffman_symtbl_code(&huffman_tree,bits,0);

    huffman_tree_print(&huffman_tree);

    bit_size = file_header->bit_size;
    decoded_buf_size = bit_size/8 *10; //suppose compressed at max 10%

    printf("decoded bit size=%d\n",bit_size);

    char *decoded_buf = malloc(decoded_buf_size);
    assert(decoded_buf);

    char *pbits = buf + header_size;

    int bit_pos = 0;
    int decoded_char_num = 0;

    bit_print(pbits,120);

    while(bit_size > 0)
    {
        int char_bits = 0;

        assert(decoded_char_num < decoded_buf_size-1);

        decoded_buf[decoded_char_num] = huffman_decode_char(&huffman_tree,pbits,bit_pos,&char_bits);

        printf("decoded char=%c\n",decoded_buf[decoded_char_num]);

        decoded_char_num++;
        bit_size -= char_bits;
        bit_pos += char_bits;
    }

    decoded_buf[decoded_char_num] = '\0';

    printf("decode finished!decoded file size=%d,mallocd size=%d\n",decoded_char_num,decoded_buf_size);

    /* write to file */
    file_write(decoded_file,decoded_buf,decoded_char_num);

    free(decoded_buf);

    return;
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章