1、赫夫曼書概述
赫夫曼樹又稱爲最優二叉樹,那什麼叫做最優二叉樹呢?最優二叉樹是指n個葉子節點構成的所有二叉樹中,帶權路徑最小的一個。我們用wpl來表示樹的帶權路徑長度,如下圖所示:
圖a的帶權路徑長度 wpl = 92+42+52+22 = 40
圖b的帶權路徑長度 wpl = 91+52+43+23 = 37
圖c的帶權路徑長度 wpl = 41+22+53+93 = 50
由結果我們可以知道,權值越大的節點越靠近根節點,那麼帶權路徑長度就越小。
2、赫夫曼樹的編碼實現
根據赫夫曼樹的概述我們可以知道,想要實現創建赫夫曼樹,我們必須要對元素進行一個排序,然後按照權值越大的靠近根節點的實現方式,就可以生成一顆棵赫夫曼樹。
如下圖所示我們有這樣一組已排好序的元素
按照以下原則去生成赫夫曼樹
1、取出節點權值最小的兩棵二叉樹
2、組成一棵新的二叉樹,步驟1取出的兩棵二叉樹是該樹的兩棵子樹
3、根節點的權值是取出的兩棵二叉樹根節點的權值之和
生成的赫夫曼樹如圖所示:
代碼實現如下:
package huofuman;
public class Node {
private int value;
private Node left;
private Node right;
public Node(int value) {
this.value = value;
}
public int getValue() {
return value;
}
public Node getLeft() {
return left;
}
public void setLeft(Node left) {
this.left = left;
}
public Node getRight() {
return right;
}
public void setRight(Node right) {
this.right = right;
}
@Override
public String toString() {
return "Node{" +
"value=" + value +
'}';
}
}
package huofuman;
import java.util.*;
public class HuffmanTree {
/**
* 創建赫夫曼樹
*
* @param arr
* @return
*/
public Node creatHumanTree(int[] arr) {
//先使用數組中所有元素創建一棵二叉樹(只有一個節點)
List<Node> nodeList = new ArrayList<>();
for (int value : arr) {
nodeList.add(new Node(value));
}
while (nodeList.size() > 1) {
//先對元素進行一個排序
Collections.sort(nodeList, new Comparator<Node>() {
@Override
public int compare(Node o1, Node o2) {
return o2.getValue() - o1.getValue();
}
});
//取出權值最小的兩個二叉樹
Node left = nodeList.get(nodeList.size() - 1);
Node right = nodeList.get(nodeList.size() - 2);
//構建一棵新的二叉樹
Node parent = new Node(left.getValue() + right.getValue());
parent.setLeft(left);
parent.setRight(right);
//把取出的兩棵二叉樹移除
nodeList.remove(left);
nodeList.remove(right);
//放入原來的二叉樹中
nodeList.add(parent);
}
return nodeList.get(0);
}
}
3 、赫夫曼編碼概述
赫夫曼編碼在通信領域中有着非常重要的應用,通過赫夫曼編碼對通信的信息進行壓縮,赫夫曼編碼使得相同的一句話可以傳輸更少的信息,我們知道相同時間內信息傳輸越少,那就意味着我們的傳輸效率更高。下面我們來看看赫夫曼編碼到底是如果對數據進行壓縮的。
假設我們想要在網絡上傳輸這樣一句話: canyoucanacar
我們先統計這段話中一些字母出現的次數:y:1 u:1 r:1 o:1 n:2 c:3 a:4
我們將每一個統計的結果作爲一個二叉樹的節點,可以得到如下的赫夫曼樹:
赫夫曼樹的任何一個葉子節點的路徑都是唯一的,這樣我們可以得到唯一識別的赫夫曼編碼表
這樣生成的二進制文件就是這個樣子的:
0001100101110111
而我們最開始的那一句話轉成的二進制文件是這個樣子的(假設我們按照八位傳輸):
我們將其轉成 ASCII碼
99 97 110 121 111 117 99 97 110 97 114
對應的二進制
011000110110000101101110001111001011011110111010101100011011000010110111001110010
對比一下我們壓縮的長度,壓縮率達到 72%,根據赫夫曼編碼的性質我們可以知道,數據重複度越高,赫夫曼壓縮的壓縮率就越大。
4、赫夫曼編碼代碼實現
4.1 數據壓縮與解壓
赫夫曼數據壓縮需要經過四個過程
我們來看代碼的實現:
1、節點創建
package hfm;
import javafx.util.Pair;
public class HuffmanNode {
private Pair<Byte,Integer> value;
private HuffmanNode leftNode;
private HuffmanNode rightNode;
public Pair<Byte, Integer> getValue() {
return value;
}
public HuffmanNode(Pair<Byte, Integer> value) {
this.value = value;
}
public HuffmanNode getLeftNode() {
return leftNode;
}
public void setLeftNode(HuffmanNode leftNode) {
this.leftNode = leftNode;
}
public HuffmanNode getRightNode() {
return rightNode;
}
public void setRightNode(HuffmanNode rightNode) {
this.rightNode = rightNode;
}
}
2、解壓縮方法
package hfm;
import javafx.util.Pair;
import java.util.*;
public abstract class HuffmanUtil {
private static Map<Byte, String> huffmanMap = new HashMap<>();
private static StringBuilder stringBuilder = new StringBuilder();
;
/**
* 創建節點
*
* @return
*/
public static List<HuffmanNode> getNode(byte[] bytes) {
List<HuffmanNode> huffmanNodeList = new ArrayList<>();
if (bytes == null || bytes.length == 0) {
return huffmanNodeList;
}
Map<Byte, Integer> counts = new HashMap<>();
//統計每一個byte出現的次數
for (byte by : bytes) {
Integer count = counts.get(by);
if (count == null) {
counts.put(by, 1);
} else {
count += 1;
counts.put(by, count++);
}
}
//將鍵值對轉成一個鍵值對
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
HuffmanNode node = new HuffmanNode(new Pair<>(entry.getKey(), entry.getValue()));
huffmanNodeList.add(node);
}
return huffmanNodeList;
}
/**
* 創建赫夫曼樹
*
* @param huffmanNodeList huffmanNodeList
* @return
*/
public static HuffmanNode creatHumanTree(List<HuffmanNode> huffmanNodeList) {
while (huffmanNodeList.size() > 1) {
Collections.sort(huffmanNodeList, new Comparator<HuffmanNode>() {
@Override
public int compare(HuffmanNode o1, HuffmanNode o2) {
return o2.getValue().getValue() - o1.getValue().getValue();
}
});
HuffmanNode leftNode = huffmanNodeList.get(huffmanNodeList.size() - 1);
HuffmanNode rightNode = huffmanNodeList.get(huffmanNodeList.size() - 2);
//創建一棵新的樹
int value = leftNode.getValue().getValue() + leftNode.getValue().getValue();
HuffmanNode parent = new HuffmanNode(new Pair<>(null, value));
parent.setLeftNode(leftNode);
parent.setRightNode(rightNode);
huffmanNodeList.remove(leftNode);
huffmanNodeList.remove(rightNode);
huffmanNodeList.add(parent);
}
return huffmanNodeList.get(0);
}
/**
* 根據赫夫曼樹 獲取赫夫曼編碼
*
* @param node
* @return
*/
public static Map<Byte, String> getHuffmanMap(HuffmanNode node) {
if (node == null) {
return huffmanMap;
}
getCodes(node.getLeftNode(), "0", stringBuilder);
getCodes(node.getRightNode(), "1", stringBuilder);
return huffmanMap;
}
/**
* 數據壓縮
*
* @param bytes
* @param huffCodes
* @return
*/
public static byte[] zip(byte[] bytes, Map<Byte, String> huffCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (byte by : bytes) {
stringBuilder.append(huffCodes.get(by));
}
int len;
if (stringBuilder.length() % 8 == 0) {
len = stringBuilder.length() / 8;
} else {
len = stringBuilder.length() / 8 + 1;
}
//定義長度
byte[] newBytes = new byte[len];
int idex = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
} else {
strByte = stringBuilder.substring(i, i + 8);
}
newBytes[idex] = (byte) Integer.parseInt(strByte, 2);
idex++;
}
return newBytes;
}
private static void getCodes(HuffmanNode node, String path, StringBuilder sb) {
if (node == null) {
return;
}
StringBuilder stringBuilder = new StringBuilder(sb);
stringBuilder.append(path);
if (node.getValue().getKey() == null) {
getCodes(node.getLeftNode(), "0", stringBuilder);
getCodes(node.getRightNode(), "1", stringBuilder);
} else {
huffmanMap.put(node.getValue().getKey(), stringBuilder.toString());
}
}
/**
* 使用赫夫曼解碼
*
* @param codeMap
* @param zipBytes
* @return
*/
public static byte[] decode(Map<Byte, String> codeMap, byte[] zipBytes) {
//首先將byte數組轉成一個二進制的字符串
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < zipBytes.length; i++) {
boolean flag = i == zipBytes.length - 1;
stringBuilder.append(byteToString(!flag, zipBytes[i]));
}
//將二進制按照赫夫曼編碼表解碼
Map<String, Byte> deCodeMap = new HashMap<>();
for (Map.Entry<Byte, String> entry : codeMap.entrySet()) {
deCodeMap.put(entry.getValue(), entry.getKey());
}
List<Byte> byteList = new ArrayList<>();
//處理字符串
for (int i = 0; i < stringBuilder.length();) {
int count = 1;
boolean flag = true;
while (flag) {
String s = stringBuilder.substring(i, i+count);
Byte by = deCodeMap.get(s);
if (by == null) {
count++;
} else {
byteList.add(by);
flag = false;
}
}
i += count;
}
//將集合轉爲數組
byte[] byt = new byte[byteList.size()];
for (int i = 0; i <byteList.size(); i++) {
byt[i] = byteList.get(i);
}
return byt;
}
private static String byteToString(boolean flag, byte by) {
int tem = by;
if (flag) {
tem |= 256;
String str = Integer.toBinaryString(tem);
return str.substring(str.length() - 8);
}
return Integer.toBinaryString(tem);
}
}
4.2 赫夫曼編碼壓縮解壓文件
package hfm;
import java.io.*;
import java.util.List;
import java.util.Map;
public abstract class HuffmanFileUtil {
/**
* 赫夫曼壓縮文件
*
* @param str
* @param dst
* @throws IOException
*/
public static void zipFile(String str, String dst) {
InputStream inputStream = null;
OutputStream outputStream = null;
ObjectOutput objectOutput = null;
try {
inputStream = new FileInputStream(str);
byte[] bytes = new byte[inputStream.available()];
//讀取文件內容
inputStream.read(bytes);
List<HuffmanNode> nodes = HuffmanUtil.getNode(bytes);
HuffmanNode node = HuffmanUtil.creatHumanTree(nodes);
Map<Byte, String> huffmanMap = HuffmanUtil.getHuffmanMap(node);
byte[] zipByte = HuffmanUtil.zip(bytes, huffmanMap);
outputStream = new FileOutputStream(dst);
objectOutput = new ObjectOutputStream(outputStream);
objectOutput.writeObject(zipByte);
objectOutput.writeObject(huffmanMap);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
objectOutput.close();
outputStream.close();
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
/**
* 赫夫曼解壓文件
*
* @param src
* @param dst
* @throws Exception
*/
public static void unzip(String src, String dst) throws Exception {
InputStream inputStream = null;
OutputStream outputStream = null;
ObjectInputStream objectInputStream = null;
try {
inputStream = new FileInputStream(src);
objectInputStream = new ObjectInputStream(inputStream);
//讀取文件內容
byte[] by = (byte[]) objectInputStream.readObject();
//讀取編碼表
Map<Byte, String> hufman = (Map<Byte, String>) objectInputStream.readObject();
byte[] bytes = HuffmanUtil.decode(hufman, by);
outputStream = new FileOutputStream(dst);
outputStream.write(bytes);
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
inputStream.close();
objectInputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}