哈夫曼編碼原理以及實現
哈夫曼編碼的主要用途:
哈夫曼編碼原理概述
哈夫曼編碼代碼實現過程
public class Haff {
private int node; // 結點的值
private int parent; // 父結點的值
private int llink; // 左孩子結點的值
private int rlink; // 右孩子結點的值
private int mark; // 標記結點下標,解碼時候方便
}
下面是用Java寫的哈夫曼編碼二叉樹的生成過程:
public void generatorTree(List<Haff> list){
int length = (list.size()+1)/2;
for (int i = 0; i < length-1; i++) { // x,y爲最小兩個數組的下標,min1,min2爲Integer類型最大值,方便比較的大小 int min1 = MAXINT, min2 = MAXINT, x = ELEINDEX, y = ELEINDEX; // 找出指定鏈表長度內最小的兩個數 for (int j = 0; j < length + i; j++) { if (list.get(j).getParent() == -1 && min1 > list.get(j).getNode()) { y = x; min2 = min1; min1 = list.get(j).getNode(); x = j; } else if (list.get(j).getParent() == -1 && min2 > list.get(j).getNode()) { min2 = list.get(j).getNode(); y = j; } } list.get(x).setParent(length + i); list.get(y).setParent(length + i); list.get(length + i).setNode(min1 + min2); list.get(length + i).setLlink(x); list.get(length + i).setRlink(y); } }
從代碼中我們可以看出,每次循環需要找到已知鏈表長度中兩個最小的結點,然後生成新的結點加入到鏈表中。
下面是根據已經生成的哈夫曼樹生成哈夫曼編碼的過程(採用遞歸的方式實現):
利用遞歸的方法,生成HaffMan編碼
public void generatorCode(List<Haff> list,int index,StringBuilder strb,Map<Integer,String> map){
if(list.get(index).getRlink() == -1 || list.get(index).getLlink() == -1){
map.put(list.get(index).getMark(), strb.toString());
return;
}
strb.append("0");
generatorCode(list,list.get(index).getLlink(),strb,map);
strb.deleteCharAt(strb.length()-1);
strb.append("1");
generatorCode(list,list.get(index).getRlink(),strb,map);
strb.deleteCharAt(strb.length()-1);
}
也可以使用棧從而採用非遞歸的方式實現。
模擬哈夫曼編碼實現文件壓縮過程
package algo;
public class Haff {
private int node;
private int parent;
private int llink;
private int rlink;
private int mark;
public Haff(){
this.node = 0;
this.parent = -1;
this.llink = -1;
this.rlink = -1;
this.mark = -1;
}
public int getMark() {
return mark;
}
public void setMark(int mark) {
this.mark = mark;
}
public int getNode() {
return node;
}
public void setNode(int node) {
this.node = node;
}
public int getParent() {
return parent;
}
public void setParent(int parent) {
this.parent = parent;
}
public int getLlink() {
return llink;
}
public void setLlink(int llink) {
this.llink = llink;
}
public int getRlink() {
return rlink;
}
public void setRlink(int rlink) {
this.rlink = rlink;
}
}
package algo;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class HaffTree {
private final static int[] array = new int[256];
private final static int MAXINT = Integer.MAX_VALUE;
private final static int ELEINDEX = -1;
static {
Arrays.fill(array, 0);
}
// 初始化列表
public List<Haff> initTree(File file){
int length = 0;
int size = array.length;
List<Haff> list = new ArrayList<Haff>(size);
// 打開文件讀取流,統計文件中的各個字母出現的次數
try(FileInputStream in = new FileInputStream(file)){
int c;
while((c = in.read()) != -1){
array[c]++;
}
}catch(IOException ex){
System.err.println(ex);
}
// 因爲HaffMan樹是滿二叉樹, 初始化list,鏈表的長度爲葉子節點加內部節點。內部節點等於葉子節點減一
for (int i = 0; i < size; i++) {
Haff haff = new Haff();
if (array[i] != 0) {
haff.setNode(array[i]);
haff.setMark(i);
list.add(haff);
length++;
}
}
for(int i = 0; i < length-1; i++){
list.add(new Haff());
}
return list;
}
// HaffMan樹
public Map<Integer,String> generatorTree(List<Haff> list){
int length = (list.size()+1)/2;
for (int i = 0; i < length-1; i++) {
// x,y爲最小兩個數組的下標,min1,min2爲Integer類型最大值,方便比較的大小
int min1 = MAXINT, min2 = MAXINT, x = ELEINDEX, y = ELEINDEX;
// 找出指定鏈表長度內最小的兩個數
for (int j = 0; j < length + i; j++) {
if (list.get(j).getParent() == -1 && min1 > list.get(j).getNode()) {
y = x;
min2 = min1;
min1 = list.get(j).getNode();
x = j;
} else if (list.get(j).getParent() == -1 && min2 > list.get(j).getNode()) {
min2 = list.get(j).getNode();
y = j;
}
}
list.get(x).setParent(length + i);
list.get(y).setParent(length + i);
list.get(length + i).setNode(min1 + min2);
list.get(length + i).setLlink(x);
list.get(length + i).setRlink(y);
}
StringBuilder strb = new StringBuilder();
Map<Integer,String> map = new HashMap<Integer,String>();
// 根據HaffMan樹,生成HaffMan編碼
generatorCode(list,list.size()-1,strb,map);
return map;
}
// 利用遞歸的方法,生成HaffMan編碼
public void generatorCode(List<Haff> list,int index,StringBuilder strb,Map<Integer,String> map){
if(list.get(index).getRlink() == -1 || list.get(index).getLlink() == -1){
map.put(list.get(index).getMark(), strb.toString());
return;
}
strb.append("0");
generatorCode(list,list.get(index).getLlink(),strb,map);
strb.deleteCharAt(strb.length()-1);
strb.append("1");
generatorCode(list,list.get(index).getRlink(),strb,map);
strb.deleteCharAt(strb.length()-1);
}
// 根據HaffMan編碼生成新的文件
public void getNewFile(File inFile,File outFile,Map<Integer,String> map){
try(FileInputStream in = new FileInputStream(inFile);
FileOutputStream out = new FileOutputStream(outFile)){
int c;
while((c = in.read()) != -1){
if(map.containsKey(c)){
out.write(map.get(c).getBytes());
}
}
}catch(IOException ex){
System.err.println(ex);
}
}
// 還原HaffMan編碼
public void enGeneratorCode(File file,File inFile,List<Haff> list){
// 打開要讀取和寫入的文件
try(FileInputStream in = new FileInputStream(inFile);
FileOutputStream out = new FileOutputStream(file)){
int temp = 0,index = list.size()-1,node = 0;
// 從根節點根據讀入的字符遍歷
while((node = in.read()) != -1){
temp = getNextNode(list, index, node);
if(list.get(temp).getLlink() == -1){
out.write((char)list.get(temp).getMark());
index = list.size()-1;
}else{
index = temp;
}
}
}catch(IOException ex){
System.err.println(ex);
}
}
public int getNextNode(List<Haff> list,int index,int node){
if(node == 48){
return list.get(index).getLlink();
}else{
return list.get(index).getRlink();
}
}
}
package algo;
import java.io.File;
import java.util.List;
import java.util.Map;
public class readFile {
public static void main(String[] args){
HaffTree haffTree = new HaffTree();
File inFile = new File("E:/user.txt");
File outFile = new File("E:/userCode.txt");
File file = new File("E:/default.txt");
// 獲得HaffMan編碼
List<Haff> list = haffTree.initTree(inFile);
Map<Integer,String> map = haffTree.generatorTree(list);
// 根據HaffMan編碼輸出新的文件
haffTree.getNewFile(inFile, outFile,map);
// 還原HaffMan編碼
haffTree.enGeneratorCode(file, outFile, list);
}
}