GO實現文件壓縮算法

實現原理

   讀取文件,統計字符出現次數爲權值,構建哈夫曼樹,獲取每個字符的哈夫曼編碼,寫入文件。

 

壓縮文件頭定義

type compressHead struct {
	srclen, dstlen, keymapLen uint32	//源文件字符個數  壓縮文件字符個數   哈夫曼編碼字符映射個數
	patchBit                  uint8     //壓縮後不足8bit補0個數
	keysMap                   map[interface{}]uint32	//字符統計構建哈夫曼樹
}

壓縮實現過程如下

//按照小端模式寫入文件
func getCompressedBytes(pHead *compressHead, data []byte) []byte {
	buf := new(bytes.Buffer)
	if err := binary.Write(buf, binary.LittleEndian, pHead.srclen); err == nil {
		if err = binary.Write(buf, binary.LittleEndian, pHead.dstlen); err != nil {
			fmt.Println(err.Error())
		}

		if err = binary.Write(buf, binary.LittleEndian, pHead.keymapLen); err != nil {
			fmt.Println(err.Error())
		}

		if err = binary.Write(buf, binary.LittleEndian, pHead.patchBit); err != nil {
			fmt.Println(err.Error())
		}

		for key, value := range pHead.keysMap {
			if err = binary.Write(buf, binary.LittleEndian, key); err != nil {
				fmt.Println(err.Error())
			}

			if err = binary.Write(buf, binary.LittleEndian, value); err != nil {
				fmt.Println(err.Error())
			}
		}

		if err = binary.Write(buf, binary.LittleEndian, data); err != nil {
			fmt.Println(err.Error())
		}
	} else {
		fmt.Println(err.Error())
	}
	return buf.Bytes()
}

func Compress(strInFileName, strOutFileName string) bool {
	if data, err := ioutil.ReadFile(strInFileName); err != nil {
		fmt.Println(err.Error())
		return false
	} else {
		keys := make(map[interface{}]uint32)
		for i := 0; i < len(data); i++ {
			if _, ok := keys[data[i]]; ok {
				keys[data[i]]++
			} else {
				keys[data[i]] = 1
			}
		}

		if pTree := CreatHuffman(keys); pTree != nil {
			pHead := &compressHead{srclen: (uint32(len(data))), keysMap: keys}
			compressdata := make([]byte, 0)
			factor := make(map[byte][]byte)
			for i := 0; i < len(data); i++ {
				if value, ok := factor[data[i]]; ok {
					compressdata = append(compressdata, value...)
				} else {
					if code, codeok := GetHuffmanCode(data[i], pTree); codeok {
						factor[data[i]] = code
						compressdata = append(compressdata, code...)
					}
				}
			}

			pHead.keymapLen = (uint32(len(pHead.keysMap)))
			pHead.patchBit = (uint8)(8 - len(compressdata)%8)
			for i := uint8(0); i < pHead.patchBit; i++ {
				compressdata = append(compressdata, 0)
			}

			afterdata := make([]byte, 0)
			for ; len(compressdata) >= 8; compressdata = compressdata[8:] {
				var b byte = 0
				for i := 0; i < 8; i++ {
					b |= compressdata[i] << (7 - i)
				}

				afterdata = append(afterdata, b)
			}

			fmt.Printf("after data:%v\n", afterdata)
			pHead.dstlen = (uint32(len(afterdata)))
			buf := getCompressedBytes(pHead, afterdata)
			ioutil.WriteFile(strOutFileName, buf, 0666)
		}
	}

	return true
}

構造哈夫曼樹可以參考哈夫曼樹構建

解壓縮過程

解壓縮讀取壓縮文件頭部信息,構建哈夫曼樹,bit遍歷每個字符進行解壓縮

func getCompressedHead(data []byte) (*compressHead, []byte) {
	pHead := new(compressHead)
	buf := bytes.NewBuffer(data)

	binary.Read(buf, binary.LittleEndian, &pHead.srclen)
	binary.Read(buf, binary.LittleEndian, &pHead.dstlen)
	binary.Read(buf, binary.LittleEndian, &pHead.keymapLen)
	binary.Read(buf, binary.LittleEndian, &pHead.patchBit)
	pHead.keysMap = make(map[interface{}]uint32)
	for i := uint32(0); i < pHead.keymapLen; i++ {
		var key byte
		var value uint32
		binary.Read(buf, binary.LittleEndian, &key)
		binary.Read(buf, binary.LittleEndian, &value)
		pHead.keysMap[key] = value
	}

	dstdata := make([]byte, pHead.dstlen)
	binary.Read(buf, binary.LittleEndian, dstdata[:pHead.dstlen])

	return pHead, dstdata
}

func UnCompress(strInFileName, strOutFileName string) bool {
	if data, err := ioutil.ReadFile(strInFileName); err != nil {
		fmt.Println(err.Error())
		return false
	} else {
		pHead, dst := getCompressedHead(data)
		src := make([]byte, pHead.srclen)
		srcindex := 0
		if pTree := CreatHuffman(pHead.keysMap); pTree != nil {
			pTmpTree := pTree
			bEnd := false
			for len(dst) >= 1 && !bEnd {
				b := dst[0]
				for i := 7; i >= 0; i-- {
					if (b>>i)&1 != 0 {
						pTmpTree = pTmpTree.GetRight()
					} else {
						pTmpTree = pTmpTree.GetLeft()
					}

					if nil != pTmpTree && pTmpTree.Value != nil {
						v, _ := GetHuffmanValue(pTmpTree).(byte)
						src[srcindex] = v
						srcindex++

						if len(dst) == 1 && (uint8(i)) == pHead.patchBit {
							bEnd = true
							break
						}

						pTmpTree = pTree
					}
				}

				dst = dst[1:]
			}
		}

		ioutil.WriteFile(strOutFileName, src, 0666)
	}

	return true
}

 

發佈了58 篇原創文章 · 獲贊 8 · 訪問量 2萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章