壓縮excel報表時excel內容差異越大,壓縮比越小

          最近要處理從頁面導出交易數據的excel報表過大問題,首先想到的是生成excel文件時壓縮成zip格式文件,然後導出壓縮文件。於是本地實驗,將一行重複寫了60多萬行,大小爲118M,壓縮輸出後,只有5M多,差不多隻有當初的1/20了,以爲還不錯。結果放到生產環境上後,同樣大小的文件壓縮效果不理想,一個118M的excel文件壓縮後還有115M,幾乎沒有作用。原來本地做的測試方式有誤。excel文件的壓縮不只跟本身大小有關,還和裏面的數據內容差異化有關。數據內容差異越大,壓縮比越小。

            最後只好還是將交易數據寫入csv格式文件中,這種純文本文件可以被壓縮得很小。

修正後的實驗對比(同樣寫入600000行,每一個單元格爲6位數字。文件一:都爲666666,文件二:都爲6位隨機數),測試對比效果很明顯:

附測試代碼:


import de.schlichtherle.util.zip.ZipEntry;
import de.schlichtherle.util.zip.ZipOutputStream;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import com.csvreader.CsvWriter;
import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class Demo {
	private static int times = 600000;

	public static void main(String[] args) {
		String title = "測試1";
		String[] headers = new String[] { "列A", "列B", "列C", "列D", "列E", "列F" };
		// 文件1--數據重複
		List<String[]> dataList = new ArrayList<>();
		for (int k = 0; k < times; k++) {
			String[] dataArr = new String[6];
			for (int i = 0; i < dataArr.length; i++) {
				dataArr[i] = "666666";
			}
			dataList.add(dataArr);
		}
		
		// 文件2--全爲隨機數
		List<String[]> dataList2 = new ArrayList<>();
		for (int k = 0; k < times; k++) {
			String[] dataArr = new String[6];
			for (int i = 0; i < dataArr.length; i++) {
				dataArr[i] = String.valueOf((int) ((Math.random() * 9 + 1) * 100000));
			}
			dataList2.add(dataArr);
		}

		String outputPath1 = "D:\\E_disk\\testReport\\test1.xlsx";
		String outputPath2 = "D:\\E_disk\\testReport\\test1.zip";
		writeExcel(title, headers, dataList, outputPath1);
		File file1 = new File(outputPath1);
		File file2 = new File(outputPath2);
		createZipFile(file1, file2);
		
		String outputPath3 = "D:\\E_disk\\testReport\\test2.xlsx";
		String outputPath4 = "D:\\E_disk\\testReport\\test2.zip";
		writeExcel(title, headers, dataList2, outputPath3);
		File file3 = new File(outputPath3);
		File file4 = new File(outputPath4);
		createZipFile(file3, file4);
		// 測試csv文件
		String outputPath5 = "D:\\E_disk\\testReport\\test3.csv";
		String outputPath6 = "D:\\E_disk\\testReport\\test3.zip";
		writeCsv(headers, dataList, outputPath5);
		File file5 = new File(outputPath5);
		File file6 = new File(outputPath6);
		createZipFile(file5, file6);

		String outputPath7 = "D:\\E_disk\\testReport\\test4.csv";
		String outputPath8 = "D:\\E_disk\\testReport\\test4.zip";
		writeCsv(headers, dataList2, outputPath7);
		File file7 = new File(outputPath7);
		File file8 = new File(outputPath8);
		createZipFile(file7, file8);

	}

	public static void writeExcel(String title, String[] headers, List<String[]> dataList, String outputPath) {
		FileOutputStream os = null;
		BufferedOutputStream bos = null;
		Workbook workbook = null;
		try {
			// 聲明一個工作薄
			workbook = new SXSSFWorkbook(1000);
			// 生成一個表格
			Sheet sheet = workbook.createSheet(title);
			// 設置表格默認列寬度爲15個字節
			sheet.setDefaultColumnWidth(15);
			// 產生表格標題行
			Row row = sheet.createRow(0);
			for (int i = 0; i < headers.length; i++) {
				Cell cell = row.createCell(i);
				XSSFRichTextString text = new XSSFRichTextString(headers[i]);
				cell.setCellValue(text);
			}
			// 遍歷集合數據,產生數據行
			for (int i = 0; i < dataList.size(); i++) {
				String[] dataArr = dataList.get(i);
				row = sheet.createRow(i + 1);
				for (int k = 0; k < dataArr.length; k++) {
					Cell cell0 = row.createCell(k);
					cell0.setCellValue(dataArr[k]);
				}
			}
			File outFile = new File(outputPath);
			os = new FileOutputStream(outFile);
			bos = new BufferedOutputStream(os);
			workbook.write(bos);
			bos.flush();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				bos.close();
				os.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}

	public static void writeCsv(String[] headers, List<String[]> dataList, String outputPath) {
		FileOutputStream fos = null;
		BufferedOutputStream bos = null;
		CsvWriter cwriter = null;
		try {
			File outputfile = new File(outputPath);
			fos = new FileOutputStream(outputfile);
			bos = new BufferedOutputStream(fos);
			cwriter = new CsvWriter(bos, ',', Charset.forName("GBK"));
			cwriter.writeRecord(headers);
			for (String[] strarr : dataList) {
				for (int i = 0; i < strarr.length; i++) {
					strarr[i] = "\t" + strarr[i];
				}
				cwriter.writeRecord(strarr, true);
			}
			cwriter.flush();
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			try {
				if (cwriter != null)
					cwriter.close();
				if (bos != null)
					bos.close();
				if (fos != null)
					fos.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}

	}

	public static void createZipFile(File inputFile, File zipFile) {
		FileInputStream fis = null;
		BufferedInputStream bis = null;
		FileOutputStream fos = null;
		ZipOutputStream zipOut = null;
		BufferedOutputStream bos = null;
		try {
			fis = new FileInputStream(inputFile);
			bis = new BufferedInputStream(fis, 2048);
			fos = new FileOutputStream(zipFile);
			bos = new BufferedOutputStream(fos);
			zipOut = new ZipOutputStream(bos);
			zipOut.putNextEntry(new ZipEntry(inputFile.getName()));
			byte[] buf = new byte[2048];
			int len;
			while ((len = bis.read(buf, 0, 2048)) != -1) {
				zipOut.write(buf, 0, len);
			}
			zipOut.flush();
		} catch (Throwable e) {
			e.printStackTrace();
		} finally {
			try {
				if (bis != null)
					bis.close();
				if (fis != null)
					fis.close();
				if (zipOut != null)
					zipOut.close();
				if (bos != null)
					bos.close();
				if (fos != null)
					fos.close();
				// 壓縮完成後刪除原始文件
				// if(inputFile.exists())inputFile.delete();
			} catch (IOException e) {
				e.printStackTrace();
			}

		}

	}
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章