import java.util.*;
public class Main {
public static void main(String[] args) {
String str = "This is the test data";
byte[] bytes = str.getBytes();
byte[] huffmanCodesBytes = huffmanZip(bytes);
System.out.println("原數據爲:");
System.out.println(str);
System.out.println("壓縮後的數據爲:");
System.out.println(Arrays.toString(huffmanCodesBytes));
System.out.println("解壓後的字符串爲:");
System.out.println(new String(decode(huffmanCodes, huffmanCodesBytes)));
}
// 完成數據的解壓
// 1.將字節型數組轉換爲二進制字符串
// 2.將二進制字符串轉爲字符串
//將byte轉爲二進制字符串,flag表示是否需要補高位
private static String byeToBitString(boolean flag, byte b){
int temp = b; // 字節型轉換爲整型
if(flag){
temp |= 256; // 當字節型轉二進制時,二進制數不滿足8位,則需要補位。b = 1, temp = 1 | 100000000 = 100000001
}
String str = Integer.toBinaryString(temp); // 返回爲二進制補碼
if(flag || temp < 0){
return str.substring(str.length() - 8);
}else{
return str;
}
}
// 對壓縮數據進行解碼
private static byte[] decode(Map<Byte, String> huffmanCodes, byte[] huffmanBytes){
// 1.獲得二進制字符串
StringBuilder stringBuilder = new StringBuilder();
// 將數組轉爲二進制字符串
for(int i = 0; i < huffmanBytes.length; i++){
byte b = huffmanBytes[i];
boolean flag = (i == huffmanBytes.length - 1); // 當i爲數字最後一個字節時,flag爲true
stringBuilder.append(byeToBitString(!flag, b));
}
// 把字符串按哈夫曼編碼進行解碼
// 調轉哈夫曼編碼表,即a->001
Map<String, Byte> map = new HashMap<String, Byte>();
for(Map.Entry<Byte, String> entry : huffmanCodes.entrySet()){
map.put(entry.getValue(), entry.getKey());
}
// 創建集合,存放byte
List<Byte> list = new ArrayList<Byte>();
for(int i = 0; i < stringBuilder.length(); ){
int count = 1;
boolean flag = true;
Byte b = null;
while (flag){
String key = stringBuilder.substring(i, i + count);
b = map.get(key);
//判斷key是否爲哈夫曼編碼,b爲null代表不是哈夫曼編碼,需要再加一位二進制數,否則找到,退出循環
if(b == null){
count++;
}else{
flag = false;
}
}
list.add(b);
i += count;
}
byte[] b = new byte[list.size()];
for(int i = 0; i < b.length; i++){
b[i] = list.get(i);
}
return b;
}
//將前面方法全部封裝
public static byte[] huffmanZip(byte[] bytes) {
List<Node> nodes = getNodes(bytes);
Node root = createHuffmanTree(nodes);
Map<Byte, String> map = getCodes(root);
return zip(bytes, map);
}
//將字符串對應的byte[]數組,壓縮
public static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
//bytes數組中的字符串爲二進制編碼,對字符串進行壓縮
//將字符串每8位進行分割,再將其轉換爲byte類型。
//例如:10101000,轉換後爲11011000,即轉換之前爲二進制補碼,要將其轉換爲原碼
StringBuilder stringbuilder = new StringBuilder();
for (byte b : bytes) {
stringbuilder.append(huffmanCodes.get(b));
}
//統計轉換後的字符串長度
int len;
//字符串長度可能爲8的整數,也可能不是
len = (stringbuilder.length() + 7) / 8;
byte[] huffmanCodesBytes = new byte[len];
int index = 0;
for (int i = 0; i < stringbuilder.length(); i += 8) {
String str;
//如果字符串不滿足8位
if (i + 8 > stringbuilder.length()) {
str = stringbuilder.substring(i);
} else {
str = stringbuilder.substring(i, i + 8);
}
huffmanCodesBytes[index++] = (byte) Integer.parseInt(str, 2);
}
return huffmanCodesBytes;
}
//實現哈夫曼樹轉化爲哈夫曼編碼
//使用哈希表來存放哈夫曼編碼,形式爲:97(a) - 001
public static Map<Byte, String> huffmanCodes = new HashMap<>();
//使用StringBuilder來存儲哈夫曼編碼
public static StringBuilder stringbuilder = new StringBuilder();
//對getCodes函數進行重載
public static Map<Byte, String> getCodes(Node node) {
if (node == null) {
return null;
}
getCodes(node.left, "0", stringbuilder);
getCodes(node.right, "1", stringbuilder);
return huffmanCodes;
}
//獲取傳入的節點node的哈夫曼編碼,保存在stringbuilder中
public static void getCodes(Node node, String code, StringBuilder stringbuilder) {
StringBuilder stringbuilder2 = new StringBuilder(stringbuilder);
stringbuilder2.append(code);
if (node != null) {
//當該節點爲非葉子節點
if (node.data == null) {
//向左遞歸
getCodes(node.left, "0", stringbuilder2);
//向右遞歸
getCodes(node.right, "1", stringbuilder2);
} else {
//該節點爲葉子結點
huffmanCodes.put(node.data, stringbuilder2.toString());
}
}
}
//將字符串轉換爲字節型
public static List<Node> getNodes(byte[] bytes) {
ArrayList<Node> nodes = new ArrayList<Node>();
//遍歷bytes,獲得每個字節出現的次數
//使用map來存儲每個字節與字節出現的次數
Map<Byte, Integer> map = new HashMap<>();
for (byte b : bytes) {
Integer count = map.get(b);
//當該字節第一次出現
if (count == null) {
map.put(b, 1);
} else {
//當該字節不是第一次出現
map.put(b, count + 1);
}
}
//把HashMap中每一個鍵值對轉爲Node對象
for (Map.Entry<Byte, Integer> entry : map.entrySet()) {
nodes.add(new Node(entry.getKey(), entry.getValue()));
}
return nodes;
}
//哈夫曼樹方法
public static Node createHuffmanTree(List<Node> nodes) {
while (nodes.size() > 1) {
//從小到大排序
Collections.sort(nodes);
//取出節點權值最小的兩個節點構成二叉樹
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
//重新構建新的二叉樹,以之前兩個節點爲左右子節點
Node parent = new Node(null, leftNode.value + rightNode.value);
parent.left = leftNode;
parent.right = rightNode;
//刪除兩個子節點,並將父節點放入順序表中
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);
}
//實現前序遍歷
public static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("該二叉樹爲空");
}
}
}
//實現Comparable接口
class Node implements Comparable<Node> {
Byte data; //存放字符本身
int value; //權值大小
Node left;
Node right;
public Node(Byte data, int value) {
this.data = data;
this.value = value;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", value=" + value +
'}';
}
//實現從小到大排序
@Override
public int compareTo(Node o) {
return this.value - o.value;
}
//前序遍歷
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
}
原數據爲:
This is the test data
壓縮後的數據爲:
[-124, -18, 119, 116, 119, 55, -23, 28, 0]
解壓後的字符串爲:
This is the test data