最近要處理從頁面導出交易數據的excel報表過大問題,首先想到的是生成excel文件時壓縮成zip格式文件,然後導出壓縮文件。於是本地實驗,將一行重複寫了60多萬行,大小爲118M,壓縮輸出後,只有5M多,差不多隻有當初的1/20了,以爲還不錯。結果放到生產環境上後,同樣大小的文件壓縮效果不理想,一個118M的excel文件壓縮後還有115M,幾乎沒有作用。原來本地做的測試方式有誤。excel文件的壓縮不只跟本身大小有關,還和裏面的數據內容差異化有關。數據內容差異越大,壓縮比越小。
最後只好還是將交易數據寫入csv格式文件中,這種純文本文件可以被壓縮得很小。
修正後的實驗對比(同樣寫入600000行,每一個單元格爲6位數字。文件一:都爲666666,文件二:都爲6位隨機數),測試對比效果很明顯:
附測試代碼:
import de.schlichtherle.util.zip.ZipEntry;
import de.schlichtherle.util.zip.ZipOutputStream;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import com.csvreader.CsvWriter;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class Demo {
private static int times = 600000;
public static void main(String[] args) {
String title = "測試1";
String[] headers = new String[] { "列A", "列B", "列C", "列D", "列E", "列F" };
// 文件1--數據重複
List<String[]> dataList = new ArrayList<>();
for (int k = 0; k < times; k++) {
String[] dataArr = new String[6];
for (int i = 0; i < dataArr.length; i++) {
dataArr[i] = "666666";
}
dataList.add(dataArr);
}
// 文件2--全爲隨機數
List<String[]> dataList2 = new ArrayList<>();
for (int k = 0; k < times; k++) {
String[] dataArr = new String[6];
for (int i = 0; i < dataArr.length; i++) {
dataArr[i] = String.valueOf((int) ((Math.random() * 9 + 1) * 100000));
}
dataList2.add(dataArr);
}
String outputPath1 = "D:\\E_disk\\testReport\\test1.xlsx";
String outputPath2 = "D:\\E_disk\\testReport\\test1.zip";
writeExcel(title, headers, dataList, outputPath1);
File file1 = new File(outputPath1);
File file2 = new File(outputPath2);
createZipFile(file1, file2);
String outputPath3 = "D:\\E_disk\\testReport\\test2.xlsx";
String outputPath4 = "D:\\E_disk\\testReport\\test2.zip";
writeExcel(title, headers, dataList2, outputPath3);
File file3 = new File(outputPath3);
File file4 = new File(outputPath4);
createZipFile(file3, file4);
// 測試csv文件
String outputPath5 = "D:\\E_disk\\testReport\\test3.csv";
String outputPath6 = "D:\\E_disk\\testReport\\test3.zip";
writeCsv(headers, dataList, outputPath5);
File file5 = new File(outputPath5);
File file6 = new File(outputPath6);
createZipFile(file5, file6);
String outputPath7 = "D:\\E_disk\\testReport\\test4.csv";
String outputPath8 = "D:\\E_disk\\testReport\\test4.zip";
writeCsv(headers, dataList2, outputPath7);
File file7 = new File(outputPath7);
File file8 = new File(outputPath8);
createZipFile(file7, file8);
}
public static void writeExcel(String title, String[] headers, List<String[]> dataList, String outputPath) {
FileOutputStream os = null;
BufferedOutputStream bos = null;
Workbook workbook = null;
try {
// 聲明一個工作薄
workbook = new SXSSFWorkbook(1000);
// 生成一個表格
Sheet sheet = workbook.createSheet(title);
// 設置表格默認列寬度爲15個字節
sheet.setDefaultColumnWidth(15);
// 產生表格標題行
Row row = sheet.createRow(0);
for (int i = 0; i < headers.length; i++) {
Cell cell = row.createCell(i);
XSSFRichTextString text = new XSSFRichTextString(headers[i]);
cell.setCellValue(text);
}
// 遍歷集合數據,產生數據行
for (int i = 0; i < dataList.size(); i++) {
String[] dataArr = dataList.get(i);
row = sheet.createRow(i + 1);
for (int k = 0; k < dataArr.length; k++) {
Cell cell0 = row.createCell(k);
cell0.setCellValue(dataArr[k]);
}
}
File outFile = new File(outputPath);
os = new FileOutputStream(outFile);
bos = new BufferedOutputStream(os);
workbook.write(bos);
bos.flush();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
bos.close();
os.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void writeCsv(String[] headers, List<String[]> dataList, String outputPath) {
FileOutputStream fos = null;
BufferedOutputStream bos = null;
CsvWriter cwriter = null;
try {
File outputfile = new File(outputPath);
fos = new FileOutputStream(outputfile);
bos = new BufferedOutputStream(fos);
cwriter = new CsvWriter(bos, ',', Charset.forName("GBK"));
cwriter.writeRecord(headers);
for (String[] strarr : dataList) {
for (int i = 0; i < strarr.length; i++) {
strarr[i] = "\t" + strarr[i];
}
cwriter.writeRecord(strarr, true);
}
cwriter.flush();
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (cwriter != null)
cwriter.close();
if (bos != null)
bos.close();
if (fos != null)
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void createZipFile(File inputFile, File zipFile) {
FileInputStream fis = null;
BufferedInputStream bis = null;
FileOutputStream fos = null;
ZipOutputStream zipOut = null;
BufferedOutputStream bos = null;
try {
fis = new FileInputStream(inputFile);
bis = new BufferedInputStream(fis, 2048);
fos = new FileOutputStream(zipFile);
bos = new BufferedOutputStream(fos);
zipOut = new ZipOutputStream(bos);
zipOut.putNextEntry(new ZipEntry(inputFile.getName()));
byte[] buf = new byte[2048];
int len;
while ((len = bis.read(buf, 0, 2048)) != -1) {
zipOut.write(buf, 0, len);
}
zipOut.flush();
} catch (Throwable e) {
e.printStackTrace();
} finally {
try {
if (bis != null)
bis.close();
if (fis != null)
fis.close();
if (zipOut != null)
zipOut.close();
if (bos != null)
bos.close();
if (fos != null)
fos.close();
// 壓縮完成後刪除原始文件
// if(inputFile.exists())inputFile.delete();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}