1.壓縮:使用赫夫曼編碼進行壓縮
題目
構建赫夫曼樹
package tree.huffmantree;
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte [] contentBytes = content.getBytes();
System.out.println(contentBytes.length);
List<Node1> nodes = getNodes(contentBytes);
//System.out.println(nodes);
//測試創建二叉樹
Node1 huffmanTree = createHuffmanTree(nodes);
//前序遍歷
preOrder(huffmanTree);
}
//前序遍歷
public static void preOrder(Node1 root){
if (root != null){
root.preOrder();
}else {
System.out.println("赫夫曼樹爲空");
}
}
private static List<Node1> getNodes(byte [] bytes){
//1.創建ArrayList
ArrayList<Node1> node1s = new ArrayList<>();
//遍歷bytes 統計乜咯 byte出現的次數,存儲每個byte出現的次數 -> map
Map<Byte,Integer> counts = new HashMap<>();
for (byte b : bytes){
Integer count = counts.get(b);
if (count == null){ //說明map中還沒有這個字符
counts.put(b,1);
}else {
counts.put(b,count+1);
}
}
//把每個鍵值對轉成一個Node對象,並加入到nodes集合
//遍歷map
for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
node1s.add(new Node1(entry.getKey(),entry.getValue()));
}
return node1s;
}
//通過List創建赫夫曼樹
private static Node1 createHuffmanTree(List<Node1> nodes){
while (nodes.size() > 1){
//排序 從小到大
Collections.sort(nodes);
//取出第一顆、第二顆最小的二叉樹
Node1 leftNode = nodes.get(0);
Node1 rightNode = nodes.get(1);
//創建新的二叉樹,新的二叉樹沒有數據,只有權值
Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//將0,1移除List
nodes.remove(leftNode);
nodes.remove(rightNode);
//parent加入List
nodes.add(parent);
}
//nodes最後剩餘就是哈弗曼樹的根節點
return nodes.get(0);
}
}
class Node1 implements Comparable<Node1>{
Byte data; //存放數據 按照ascii
int weight; //權值,表示字符出現的次數
Node1 left;
Node1 right;
//前序遍歷
public void preOrder(){
System.out.println(this);
if (this.left != null){
this.left.preOrder();
}
if (this.right != null){
this.right.preOrder();
}
}
@Override
public int compareTo(Node1 o) {
return this.weight - o.weight;
}
public Node1(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node1{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
//生成赫夫曼樹對應的赫夫曼編碼
//思路:
//1.將赫夫曼編碼表存放在Map<Byte,String> 形式
//32->01 97->100 100->11000等等
static Map<Byte,String> huffmanCodes = new HashMap<>();
//2.在生成赫夫曼編碼表時需要拼接路徑,創建Stringbuilder存儲某個葉子節點的路徑
static StringBuilder stringBuilder = new StringBuilder();
/**
* 功能:將傳入的node節點的所有葉子節點赫夫曼編碼得到,並放入到赫夫曼集合中
* @param node 傳入節點
* @param code 路徑:左子節點是0,右子節點是1
* @param stringBuilder 是用於拼接路徑
*/
private static void getCondes(Node1 node,String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//將傳入的code加入到Stringbuilder2
stringBuilder2.append(code);
if (node != null){
//判斷當前node是葉子節點還是非葉子節點
if (node.data == null){ //說明是非葉子節點
//遞歸處理
//向左遞歸
getCondes(node.left,"0",stringBuilder2);
//向右遞歸
getCondes(node.right,"1",stringBuilder2);
}else {//說明是葉子節點
//就表示找到了某個葉子節點最後
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
//編寫一個方法,將字符串對應的byte[]數組,通過生成的赫夫曼編碼表,返回赫夫曼編碼壓縮後的byte[]
/**
*
* @param bytes 原始的字符對應的byte[]
* @param huffmanCodes 生成的赫夫曼編碼表map
* @return 返回赫夫曼編碼處理後的byte[]
* 舉例:String content = "i like like like java do you like a java";
* 返回的是字符串"10101000"。。。等等
* =>對應byte[] huffmancodeBytes,即8位對應一個byte,放入到huffmanCodeBytes
* huffmancodeBytes[0] = 10101000(補碼) => byte [推導 10101000 => 10101000 -1 => 10100111(反碼) => 11011000(原碼)]
* huffmancodeBytes[1] = -88
*/
private static byte[] zip(byte [] bytes, Map<Byte,String> huffmanCodes){
//1.利用赫夫曼編碼表將傳進來的byte數組轉成赫夫曼編碼字符串
StringBuilder stringBuilder = new StringBuilder();
//遍歷bytes數組
for (byte b : bytes){
stringBuilder.append(huffmanCodes.get(b));
}
//按照這個字符串發送肯定是變大了,不行,那麼就要將字符串轉成byte數組
System.out.println(stringBuilder.toString());
//統計返回的byte[] huffmanCodeBytes 長度
//一句話搞定int len = (stringBuilder.length() + 7) / 8;
int len;
if (stringBuilder.length() % 8 == 0){
len = stringBuilder.length() /8;
}else {
len = stringBuilder.length() /8 + 1;
}
//創建存儲壓縮後的byte數組
byte [] huffmanCodeBytes = new byte[len];
int index = 0;//記錄是第幾個byte
for (int i = 0; i < stringBuilder.length(); i += 8){//因爲每8位對應一個byte
String strByte;
if (i+8 <= stringBuilder.length()){
strByte = stringBuilder.substring(i,i+8);
}else {
strByte = stringBuilder.substring(i); //-88
}
//將StringByte轉成byte數組放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
index++;
}
return huffmanCodeBytes;
}
完整代碼
package tree.huffmantree;
import java.util.*;
public class HuffmanCode {
public static void main(String[] args) {
String content = "i like like like java do you like a java";
byte [] contentBytes = content.getBytes();
byte[] bytes = huffmanZip(contentBytes);
System.out.println("壓縮後的結果: " + Arrays.toString(bytes));
// System.out.println(contentBytes.length);
//
// List<Node1> nodes = getNodes(contentBytes);
// //System.out.println(nodes);
//
// //測試創建二叉樹
// Node1 huffmanTree = createHuffmanTree(nodes);
// //前序遍歷
// preOrder(huffmanTree);
//
// //測試是否生成了對應的哈夫曼編碼
// Map<Byte, String> huffmancondes = getCondes(huffmanTree);
// System.out.println("生成的赫夫曼編碼表" + huffmancondes);
//
// //測試
// byte[] huffmanCodeBytes = zip(contentBytes, huffmancondes);
// System.out.println("huffmanCodeBytes=" + Arrays.toString(huffmanCodeBytes));
}
//封裝前面所寫的,便於調用
/**
*
* @param bytes 原始字符串對應的字節數組
* @return 返回的是經過赫夫曼編碼處理後的字節數組(壓縮後的數組)
*/
private static byte[] huffmanZip(byte [] bytes){
//第一步:創建節點
List<Node1> nodes = getNodes(bytes);
//第二步:創建赫夫曼樹
Node1 huffmanTree = createHuffmanTree(nodes);
//第三步:生成對應的赫夫曼編碼(根據赫夫曼樹)
Map<Byte, String> hufumanCodes = getCondes(huffmanTree);
//第四步:根據赫夫曼編碼壓縮,生成赫夫曼字節數組
byte[] huffmanBytes = zip(bytes, hufumanCodes);
return huffmanBytes;
}
//編寫一個方法,將字符串對應的byte[]數組,通過生成的赫夫曼編碼表,返回赫夫曼編碼壓縮後的byte[]
/**
*
* @param bytes 原始的字符對應的byte[]
* @param huffmanCodes 生成的赫夫曼編碼表map
* @return 返回赫夫曼編碼處理後的byte[]
* 舉例:String content = "i like like like java do you like a java";
* 返回的是字符串"10101000"。。。等等
* =>對應byte[] huffmancodeBytes,即8位對應一個byte,放入到huffmanCodeBytes
* huffmancodeBytes[0] = 10101000(補碼) => byte [推導 10101000 => 10101000 -1 => 10100111(反碼) => 11011000(原碼)]
* huffmancodeBytes[1] = -88
*/
private static byte[] zip(byte [] bytes, Map<Byte,String> huffmanCodes){
//1.利用赫夫曼編碼表將傳進來的byte數組轉成赫夫曼編碼字符串
StringBuilder stringBuilder = new StringBuilder();
//遍歷bytes數組
for (byte b : bytes){
stringBuilder.append(huffmanCodes.get(b));
}
//按照這個字符串發送肯定是變大了,不行,那麼就要將字符串轉成byte數組
System.out.println(stringBuilder.toString());
//統計返回的byte[] huffmanCodeBytes 長度
//一句話搞定int len = (stringBuilder.length() + 7) / 8;
int len;
if (stringBuilder.length() % 8 == 0){
len = stringBuilder.length() /8;
}else {
len = stringBuilder.length() /8 + 1;
}
//創建存儲壓縮後的byte數組
byte [] huffmanCodeBytes = new byte[len];
int index = 0;//記錄是第幾個byte
for (int i = 0; i < stringBuilder.length(); i += 8){//因爲每8位對應一個byte
String strByte;
if (i+8 <= stringBuilder.length()){
strByte = stringBuilder.substring(i,i+8);
}else {
strByte = stringBuilder.substring(i); //-88
}
//將StringByte轉成byte數組放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
index++;
}
return huffmanCodeBytes;
}
//生成赫夫曼樹對應的赫夫曼編碼
//思路:
//1.將赫夫曼編碼表存放在Map<Byte,String> 形式
//32->01 97->100 100->11000等等
static Map<Byte,String> huffmanCodes = new HashMap<>();
//2.在生成赫夫曼編碼表時需要拼接路徑,創建Stringbuilder存儲某個葉子節點的路徑
static StringBuilder stringBuilder = new StringBuilder();
//爲了調用方便重載getCondes
private static Map<Byte,String> getCondes(Node1 root){
if (root == null){
return null;
}
//處理root
getCondes(root,"",stringBuilder);
return huffmanCodes;
}
/**
* 功能:將傳入的node節點的所有葉子節點赫夫曼編碼得到,並放入到赫夫曼集合中
* @param node 傳入節點
* @param code 路徑:左子節點是0,右子節點是1
* @param stringBuilder 是用於拼接路徑
*/
private static void getCondes(Node1 node,String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//將傳入的code加入到Stringbuilder2
stringBuilder2.append(code);
if (node != null){
//判斷當前node是葉子節點還是非葉子節點
if (node.data == null){ //說明是非葉子節點
//遞歸處理
//向左遞歸
getCondes(node.left,"0",stringBuilder2);
//向右遞歸
getCondes(node.right,"1",stringBuilder2);
}else {//說明是葉子節點
//就表示找到了某個葉子節點最後
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
//前序遍歷
public static void preOrder(Node1 root){
if (root != null){
root.preOrder();
}else {
System.out.println("赫夫曼樹爲空");
}
}
private static List<Node1> getNodes(byte [] bytes){
//1.創建ArrayList
ArrayList<Node1> node1s = new ArrayList<>();
//遍歷bytes 統計乜咯 byte出現的次數,存儲每個byte出現的次數 -> map
Map<Byte,Integer> counts = new HashMap<>();
for (byte b : bytes){
Integer count = counts.get(b);
if (count == null){ //說明map中還沒有這個字符
counts.put(b,1);
}else {
counts.put(b,count+1);
}
}
//把每個鍵值對轉成一個Node對象,並加入到nodes集合
//遍歷map
for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
node1s.add(new Node1(entry.getKey(),entry.getValue()));
}
return node1s;
}
//通過List創建赫夫曼樹
private static Node1 createHuffmanTree(List<Node1> nodes){
while (nodes.size() > 1){
//排序 從小到大
Collections.sort(nodes);
//取出第一顆、第二顆最小的二叉樹
Node1 leftNode = nodes.get(0);
Node1 rightNode = nodes.get(1);
//創建新的二叉樹,新的二叉樹沒有數據,只有權值
Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//將0,1移除List
nodes.remove(leftNode);
nodes.remove(rightNode);
//parent加入List
nodes.add(parent);
}
//nodes最後剩餘就是哈弗曼樹的根節點
return nodes.get(0);
}
}
class Node1 implements Comparable<Node1>{
Byte data; //存放數據 按照ascii
int weight; //權值,表示字符出現的次數
Node1 left;
Node1 right;
//前序遍歷
public void preOrder(){
System.out.println(this);
if (this.left != null){
this.left.preOrder();
}
if (this.right != null){
this.right.preOrder();
}
}
@Override
public int compareTo(Node1 o) {
return this.weight - o.weight;
}
public Node1(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node1{" +
"data=" + data +
", weight=" + weight +
'}';
}
}
2.解壓(解碼)
//完成數據解壓
//思路
//1.將huffmanCodeBytes[-88,-65..]重寫先轉成赫夫曼編碼對應的二進制字符串
//2.赫夫曼編碼對應的二進制字符串根據赫夫曼編碼轉成字符
//編寫一個方法,對壓縮數據解碼
/**
*
* @param huffmanCodes 赫夫曼編碼表 map
* @param huffmanBytes 需要解碼的字節數組
* @return 返回原來字符串對應的數組
*/
private static byte[] decode(Map<Byte,String> huffmanCodes, byte[] huffmanBytes){
//1.先得到huffmanBytes對應的二進制的字符串
StringBuilder stringBuilder = new StringBuilder();
//將byte數組轉成二進制字符串
for (int i = 0; i < huffmanBytes.length; i++) {
//判斷是否是最後一個字節
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag,huffmanBytes[i]));
}
System.out.println("赫夫曼 解碼後 對應的二進制字符串:" + stringBuilder.toString());
//把字符串按照指定的赫夫曼編碼進行解碼
//把赫夫曼編碼表的k,v進行調換;因爲要反向查詢
Map<String,Byte> map = new HashMap<>();
for (Map.Entry<Byte,String> entry : huffmanCodes.entrySet()){
map.put(entry.getValue(),entry.getKey());
}
//System.out.println(map);
//創建一個集合存放byte
List<Byte> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length();) {
int count = 1; //小的計數器
boolean flag = true;
Byte b = null;
while (flag){
//1010100010111。。。。
String key = stringBuilder.substring(i, i + count);// i 不動讓count移動,直到匹配到字符
b = map.get(key);
if (b == null){
count ++;
}else {
flag = false;
}
}
list.add(b);
i += count; //讓 i 移動到count
}
//for循環結束後list中存放了所以的字符
//把list放入到byte[] 並返回
byte b [] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
/**
* 將一個byte轉成二進制字符串
* @param b 傳入一個byte
* @param flag 標誌是否需要補高位,true需要補高位,如果是最後一個字節不需要補高位
* @return
*/
private static String byteToBitString(boolean flag, byte b){
//使用變量保存b
int temp = b;//將b轉成int
if (flag){
temp |= 256; //按位與256 1 0000 0000 | 0000 0001 =》1 0000 0001
}
String str = Integer.toBinaryString(temp);
if (flag){
return str.substring(str.length() - 8);
}else {
return str;
}
}
3.對文件進行壓縮(加入io,通過對象流把赫夫曼編碼傳入,解壓的時候需要用)
//編寫一個方法,進行文件壓縮
public static void zipFile(String srcFile, String dstFile){
//創建輸出流
FileInputStream is = null;
//文件輸入流
OutputStream os = null;
ObjectOutputStream oos = null;
try{
is = new FileInputStream(srcFile);
//創建一個和源文件大小一樣的btyte[]
byte[] b = new byte[is.available()];
//讀取文件
is.read(b);
//直接對源文件壓縮
byte[] huffmanBytes = huffmanZip(b);
//創建文件的輸出流,存放壓縮文件
os = new FileOutputStream(dstFile);
//創建一個和文件輸出流關聯的ObjectOutPutStream對象流
oos = new ObjectOutputStream(os);
//把赫夫曼編碼後的字節數組寫入壓縮文件
oos.writeObject(huffmanBytes);
//這裏我們以對象流的方式寫入 赫夫曼編碼,爲了恢復原文件時使用
//!!!一定要把赫夫曼編碼也寫入,要不然無法恢復
oos.writeObject(huffmanCodes);
}catch (Exception e){
System.out.println(e.getMessage());
}finally {
try {
is.close();
os.close();
oos.close();
} catch (IOException ex) {
System.out.println(ex);
}
}
}
4.對文件進行解壓
//編寫一個方法,進行解壓
public static void unzipFile(String zipFile,String dstFile){
//文件輸入流
InputStream is = null;
//創建輸出流
OutputStream os = null;
//對象輸入流
ObjectInputStream ois = null;
try {
//創建文件輸入流
is = new FileInputStream(zipFile);
//場景和is關聯的對象輸入流
ois = new ObjectInputStream(is);
//讀取byte數組 huffmanBytes
byte [] huffmanBytes = (byte[]) ois.readObject();
//讀取赫夫曼編碼表
Map<Byte,String> huffmanCode = (Map<Byte,String>)ois.readObject();
//解碼
byte [] bytes = decode(huffmanCode,huffmanBytes);
//將bytes數組寫入到目標文件
os = new FileOutputStream(dstFile);
//寫出數據
os.write(bytes);
} catch (Exception e) {
System.out.println(e.getMessage());
}finally {
try {
os.close();
ois.close();
is.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
赫夫曼編碼可以壓縮所有類型的文件,因爲是通過字節進行壓縮
完整代碼
package tree.huffmantree.ZipFile;
import java.io.*;
import java.util.*;
public class HuffmanZipFile {
public static void main(String[] args) {
//測試壓縮文件
String srcFile = "D:\\薛豔春\\桌面\\新建文件夾 (3)\\薛豔春2.pdf";
String dstFile = "D:\\薛豔春\\桌面\\新建文件夾 (3)\\薛豔春2.zip";
zipFile(srcFile,dstFile);
System.out.println("壓縮成功~~");
String zipFile = "D:\\薛豔春\\桌面\\新建文件夾 (3)\\dst.zip";
String dstFile2 = "D:\\薛豔春\\桌面\\新建文件夾 (3)\\src2.png";
//unzipFile(zipFile,dstFile2);
}
//編寫一個方法,進行解壓
public static void unzipFile(String zipFile,String dstFile){
//文件輸入流
InputStream is = null;
//創建輸出流
OutputStream os = null;
//對象輸入流
ObjectInputStream ois = null;
try {
//創建文件輸入流
is = new FileInputStream(zipFile);
//場景和is關聯的對象輸入流
ois = new ObjectInputStream(is);
//讀取byte數組 huffmanBytes
byte [] huffmanBytes = (byte[]) ois.readObject();
//讀取赫夫曼編碼表
Map<Byte,String> huffmanCode = (Map<Byte,String>)ois.readObject();
//解碼
byte [] bytes = decode(huffmanCode,huffmanBytes);
//將bytes數組寫入到目標文件
os = new FileOutputStream(dstFile);
//寫出數據
os.write(bytes);
} catch (Exception e) {
System.out.println(e.getMessage());
}finally {
try {
os.close();
ois.close();
is.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
//編寫一個方法,進行文件壓縮
public static void zipFile(String srcFile, String dstFile){
//創建輸出流
FileInputStream is = null;
//文件輸入流
OutputStream os = null;
ObjectOutputStream oos = null;
try{
is = new FileInputStream(srcFile);
//創建一個和源文件大小一樣的btyte[]
byte[] b = new byte[is.available()];
//讀取文件
is.read(b);
//直接對源文件壓縮
byte[] huffmanBytes = huffmanZip(b);
//創建文件的輸出流,存放壓縮文件
os = new FileOutputStream(dstFile);
//創建一個和文件輸出流關聯的ObjectOutPutStream對象流
oos = new ObjectOutputStream(os);
//把赫夫曼編碼後的字節數組寫入壓縮文件
oos.writeObject(huffmanBytes);
//這裏我們以對象流的方式寫入 赫夫曼編碼,爲了恢復原文件時使用
//!!!一定要把赫夫曼編碼也寫入,要不然無法恢復
oos.writeObject(huffmanCodes);
}catch (Exception e){
System.out.println(e.getMessage());
}finally {
try {
is.close();
os.close();
oos.close();
} catch (IOException ex) {
System.out.println(ex);
}
}
}
//完成數據解壓
//思路
//1.將huffmanCodeBytes[-88,-65..]重寫先轉成赫夫曼編碼對應的二進制字符串
//2.赫夫曼編碼對應的二進制字符串根據赫夫曼編碼轉成字符
//編寫一個方法,對壓縮數據解碼
/**
*
* @param huffmanCodes 赫夫曼編碼表 map
* @param huffmanBytes 需要解碼的字節數組
* @return 返回原來字符串對應的數組
*/
private static byte[] decode(Map<Byte,String> huffmanCodes, byte[] huffmanBytes){
//1.先得到huffmanBytes對應的二進制的字符串
StringBuilder stringBuilder = new StringBuilder();
//將byte數組轉成二進制字符串
for (int i = 0; i < huffmanBytes.length; i++) {
//判斷是否是最後一個字節
boolean flag = (i == huffmanBytes.length - 1);
stringBuilder.append(byteToBitString(!flag,huffmanBytes[i]));
}
//System.out.println("赫夫曼 解碼後 對應的二進制字符串:" + stringBuilder.toString());
//把字符串按照指定的赫夫曼編碼進行解碼
//把赫夫曼編碼表的k,v進行調換;因爲要反向查詢
Map<String,Byte> map = new HashMap<>();
for (Map.Entry<Byte,String> entry : huffmanCodes.entrySet()){
map.put(entry.getValue(),entry.getKey());
}
//System.out.println(map);
//創建一個集合存放byte
List<Byte> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length();) {
int count = 1; //小的計數器
boolean flag = true;
Byte b = null;
while (flag){
//1010100010111。。。。
String key = stringBuilder.substring(i, i + count);// i 不動讓count移動,直到匹配到字符
b = map.get(key);
if (b == null){
count ++;
}else {
flag = false;
}
}
list.add(b);
i += count; //讓 i 移動到count
}
//for循環結束後list中存放了所以的字符
//把list放入到byte[] 並返回
byte b [] = new byte[list.size()];
for (int i = 0; i < b.length; i++) {
b[i] = list.get(i);
}
return b;
}
/**
* 將一個byte轉成二進制字符串
* @param b 傳入一個byte
* @param flag 標誌是否需要補高位,true需要補高位,如果是最後一個字節不需要補高位
* @return
*/
private static String byteToBitString(boolean flag, byte b){
//使用變量保存b
int temp = b;//將b轉成int
if (flag){
temp |= 256; //按位與256 1 0000 0000 | 0000 0001 =》1 0000 0001
}
String str = Integer.toBinaryString(temp);
if (flag){
return str.substring(str.length() - 8);
}else {
return str;
}
}
//封裝前面所寫的,便於調用
/**
*
* @param bytes 原始字符串對應的字節數組
* @return 返回的是經過赫夫曼編碼處理後的字節數組(壓縮後的數組)
*/
private static byte[] huffmanZip(byte [] bytes){
//第一步:創建節點
List<Node1> nodes = getNodes(bytes);
//第二步:創建赫夫曼樹
Node1 huffmanTree = createHuffmanTree(nodes);
//第三步:生成對應的赫夫曼編碼(根據赫夫曼樹)
Map<Byte, String> hufumanCodes = getCondes(huffmanTree);
//第四步:根據赫夫曼編碼壓縮,生成赫夫曼字節數組
byte[] huffmanBytes = zip(bytes, hufumanCodes);
return huffmanBytes;
}
//編寫一個方法,將字符串對應的byte[]數組,通過生成的赫夫曼編碼表,返回赫夫曼編碼壓縮後的byte[]
/**
*
* @param bytes 原始的字符對應的byte[]
* @param huffmanCodes 生成的赫夫曼編碼表map
* @return 返回赫夫曼編碼處理後的byte[]
* 舉例:String content = "i like like like java do you like a java";
* 返回的是字符串"10101000"。。。等等
* =>對應byte[] huffmancodeBytes,即8位對應一個byte,放入到huffmanCodeBytes
* huffmancodeBytes[0] = 10101000(補碼) => byte [推導 10101000 => 10101000 -1 => 10100111(反碼) => 11011000(原碼)]
* huffmancodeBytes[1] = -88
*/
private static byte[] zip(byte [] bytes, Map<Byte,String> huffmanCodes){
//1.利用赫夫曼編碼表將傳進來的byte數組轉成赫夫曼編碼字符串
StringBuilder stringBuilder = new StringBuilder();
//遍歷bytes數組
for (byte b : bytes){
stringBuilder.append(huffmanCodes.get(b));
}
//按照這個字符串發送肯定是變大了,不行,那麼就要將字符串轉成byte數組
//System.out.println("赫夫曼 編碼後 對應的二進制字符串:" + stringBuilder.toString());
//統計返回的byte[] huffmanCodeBytes 長度
//一句話搞定int len = (stringBuilder.length() + 7) / 8;
int len;
if (stringBuilder.length() % 8 == 0){
len = stringBuilder.length() /8;
}else {
len = stringBuilder.length() /8 + 1;
}
//創建存儲壓縮後的byte數組
byte [] huffmanCodeBytes = new byte[len];
int index = 0;//記錄是第幾個byte
for (int i = 0; i < stringBuilder.length(); i += 8){//因爲每8位對應一個byte
String strByte;
if (i+8 <= stringBuilder.length()){
strByte = stringBuilder.substring(i,i+8);
}else {
strByte = stringBuilder.substring(i); //-88
}
//將StringByte轉成byte數組放入到huffmanCodeBytes
huffmanCodeBytes[index] = (byte) Integer.parseInt(strByte,2);
index++;
}
return huffmanCodeBytes;
}
//生成赫夫曼樹對應的赫夫曼編碼
//思路:
//1.將赫夫曼編碼表存放在Map<Byte,String> 形式
//32->01 97->100 100->11000等等
static Map<Byte,String> huffmanCodes = new HashMap<>();
//2.在生成赫夫曼編碼表時需要拼接路徑,創建Stringbuilder存儲某個葉子節點的路徑
static StringBuilder stringBuilder = new StringBuilder();
//爲了調用方便重載getCondes
private static Map<Byte,String> getCondes(Node1 root){
if (root == null){
return null;
}
//處理root
getCondes(root,"",stringBuilder);
return huffmanCodes;
}
/**
* 功能:將傳入的node節點的所有葉子節點赫夫曼編碼得到,並放入到赫夫曼集合中
* @param node 傳入節點
* @param code 路徑:左子節點是0,右子節點是1
* @param stringBuilder 是用於拼接路徑
*/
private static void getCondes(Node1 node,String code, StringBuilder stringBuilder){
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
//將傳入的code加入到Stringbuilder2
stringBuilder2.append(code);
if (node != null){
//判斷當前node是葉子節點還是非葉子節點
if (node.data == null){ //說明是非葉子節點
//遞歸處理
//向左遞歸
getCondes(node.left,"0",stringBuilder2);
//向右遞歸
getCondes(node.right,"1",stringBuilder2);
}else {//說明是葉子節點
//就表示找到了某個葉子節點最後
huffmanCodes.put(node.data,stringBuilder2.toString());
}
}
}
//前序遍歷
public static void preOrder(Node1 root){
if (root != null){
root.preOrder();
}else {
System.out.println("赫夫曼樹爲空");
}
}
private static List<Node1> getNodes(byte [] bytes){
//1.創建ArrayList
ArrayList<Node1> node1s = new ArrayList<>();
//遍歷bytes 統計乜咯 byte出現的次數,存儲每個byte出現的次數 -> map
Map<Byte,Integer> counts = new HashMap<>();
for (byte b : bytes){
Integer count = counts.get(b);
if (count == null){ //說明map中還沒有這個字符
counts.put(b,1);
}else {
counts.put(b,count+1);
}
}
//把每個鍵值對轉成一個Node對象,並加入到nodes集合
//遍歷map
for (Map.Entry<Byte,Integer> entry : counts.entrySet()){
node1s.add(new Node1(entry.getKey(),entry.getValue()));
}
return node1s;
}
//通過List創建赫夫曼樹
private static Node1 createHuffmanTree(List<Node1> nodes){
while (nodes.size() > 1){
//排序 從小到大
Collections.sort(nodes);
//取出第一顆、第二顆最小的二叉樹
Node1 leftNode = nodes.get(0);
Node1 rightNode = nodes.get(1);
//創建新的二叉樹,新的二叉樹沒有數據,只有權值
Node1 parent = new Node1(null,leftNode.weight + rightNode.weight);
parent.left = leftNode;
parent.right = rightNode;
//將0,1移除List
nodes.remove(leftNode);
nodes.remove(rightNode);
//parent加入List
nodes.add(parent);
}
//nodes最後剩餘就是哈弗曼樹的根節點
return nodes.get(0);
}
}
class Node1 implements Comparable<Node1>{
Byte data; //存放數據 按照ascii
int weight; //權值,表示字符出現的次數
Node1 left;
Node1 right;
//前序遍歷
public void preOrder(){
System.out.println(this);
if (this.left != null){
this.left.preOrder();
}
if (this.right != null){
this.right.preOrder();
}
}
@Override
public int compareTo(Node1 o) {
return this.weight - o.weight;
}
public Node1(Byte data, int weight) {
this.data = data;
this.weight = weight;
}
@Override
public String toString() {
return "Node1{" +
"data=" + data +
", weight=" + weight +
'}';
}
}