寫Excel 2007大文件的一個取巧方法

1.問題
用[url=http://poi.apache.org/]POI[/url]來讀寫excel文件相信大家都不陌生了。最近項目中有個需求,需要讀取一個excel2007格式的模板,然後再寫10000條記錄到這個excel裏,最後輸出。本人一開始用poi的user model來寫excel,但是測試下來發現內存消耗極大(2G),且需要10分鐘才能完成excel的生成。那有沒有性能更好更快的方法呢?poi還提供了2種方法,一種是event model,但只支持讀,還有一種sxsff,只支持寫。由於我們原來excel模板裏有許多格式還有控件,vba宏在,需要同時具有讀和寫的功能。所以這2種方法都不行。

2.取巧方法
本人在這裏找到的方法:[url]http://vikramvkamath.blogspot.com/2010/07/writing-large-excel-files-excel-2007.html[/url]

原理:excel2007格式文件其實就是一個zip壓縮包。可以用7zip等壓縮軟件打開。解壓出來結構如下:
/
- ..
- _rels
- .rels
- docProps
- app.xml
- core.xml
- xl
- _rels
- worksheets
- sheet1.xml
- [color=red][b]sheet2.xml[/b][/color]
- sharedStrings.xml
- styles.xml
- workbook.xml
- [Content_Types].xml


如上紅色的sheet2.xml打開看一下,你會發現這是第二個sheet的數據文件。所以這個取巧的方法就是用java來寫這個xml文件,然後再重新打回壓縮包裏。寫xml的話就比寫xlsx快許多了。

上個代碼示例,來源是去下載poi的源代碼包,找到裏面的examples。或者去[url=http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/usermodel/examples/BigGridDemo.java]這裏[/url]查看


import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Calendar;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;

import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFDataFormat;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

/**
* Demonstrates a workaround you can use to generate large workbooks and avoid OutOfMemory exception.
*
* The trick is as follows:
* 1. create a template workbook, create sheets and global objects such as cell styles, number formats, etc.
* 2. create an application that streams data in a text file
* 3. Substitute the sheet in the template with the generated data
*
* @author Yegor Kozlov
*/
public class BigGridDemo {
private static final String XML_ENCODING = "UTF-8";

public static void main(String[] args) throws Exception {

// Step 1. Create a template file. Setup sheets and workbook-level objects such as
// cell styles, number formats, etc.

XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet("Big Grid");

Map<String, XSSFCellStyle> styles = createStyles(wb);
//name of the zip entry holding sheet data, e.g. /xl/worksheets/sheet1.xml
String sheetRef = sheet.getPackagePart().getPartName().getName();

//save the template
FileOutputStream os = new FileOutputStream("template.xlsx");
wb.write(os);
os.close();

//Step 2. Generate XML file.
File tmp = new File("sheet.xml");
Writer fw = new OutputStreamWriter(new FileOutputStream(tmp), XML_ENCODING);
generate(fw, styles);
fw.close();

//Step 3. Substitute the template entry with the generated data
FileOutputStream out = new FileOutputStream("big-grid.xlsx");
substitute(new File("template.xlsx"), tmp, sheetRef.substring(1), out);
out.close();
}

/**
* Create a library of cell styles.
*/
private static Map<String, XSSFCellStyle> createStyles(XSSFWorkbook wb){
Map<String, XSSFCellStyle> styles = new HashMap<String, XSSFCellStyle>();
XSSFDataFormat fmt = wb.createDataFormat();

XSSFCellStyle style1 = wb.createCellStyle();
style1.setAlignment(XSSFCellStyle.ALIGN_RIGHT);
style1.setDataFormat(fmt.getFormat("0.0%"));
styles.put("percent", style1);

XSSFCellStyle style2 = wb.createCellStyle();
style2.setAlignment(XSSFCellStyle.ALIGN_CENTER);
style2.setDataFormat(fmt.getFormat("0.0X"));
styles.put("coeff", style2);

XSSFCellStyle style3 = wb.createCellStyle();
style3.setAlignment(XSSFCellStyle.ALIGN_RIGHT);
style3.setDataFormat(fmt.getFormat("$#,##0.00"));
styles.put("currency", style3);

XSSFCellStyle style4 = wb.createCellStyle();
style4.setAlignment(XSSFCellStyle.ALIGN_RIGHT);
style4.setDataFormat(fmt.getFormat("mmm dd"));
styles.put("date", style4);

XSSFCellStyle style5 = wb.createCellStyle();
XSSFFont headerFont = wb.createFont();
headerFont.setBold(true);
style5.setFillForegroundColor(IndexedColors.GREY_25_PERCENT.getIndex());
style5.setFillPattern(XSSFCellStyle.SOLID_FOREGROUND);
style5.setFont(headerFont);
styles.put("header", style5);

return styles;
}

private static void generate(Writer out, Map<String, XSSFCellStyle> styles) throws Exception {

Random rnd = new Random();
Calendar calendar = Calendar.getInstance();

SpreadsheetWriter sw = new SpreadsheetWriter(out);
sw.beginSheet();

//insert header row
sw.insertRow(0);
int styleIndex = styles.get("header").getIndex();
sw.createCell(0, "Title", styleIndex);
sw.createCell(1, "% Change", styleIndex);
sw.createCell(2, "Ratio", styleIndex);
sw.createCell(3, "Expenses", styleIndex);
sw.createCell(4, "Date", styleIndex);

sw.endRow();

//write data rows
for (int rownum = 1; rownum < 100000; rownum++) {
sw.insertRow(rownum);

sw.createCell(0, "Hello, " + rownum + "!");
sw.createCell(1, (double)rnd.nextInt(100)/100, styles.get("percent").getIndex());
sw.createCell(2, (double)rnd.nextInt(10)/10, styles.get("coeff").getIndex());
sw.createCell(3, rnd.nextInt(10000), styles.get("currency").getIndex());
sw.createCell(4, calendar, styles.get("date").getIndex());

sw.endRow();

calendar.roll(Calendar.DAY_OF_YEAR, 1);
}
sw.endSheet();
}

/**
*
* @param zipfile the template file
* @param tmpfile the XML file with the sheet data
* @param entry the name of the sheet entry to substitute, e.g. xl/worksheets/sheet1.xml
* @param out the stream to write the result to
*/
private static void substitute(File zipfile, File tmpfile, String entry, OutputStream out) throws IOException {
ZipFile zip = new ZipFile(zipfile);

ZipOutputStream zos = new ZipOutputStream(out);

@SuppressWarnings("unchecked")
Enumeration<ZipEntry> en = (Enumeration<ZipEntry>) zip.entries();
while (en.hasMoreElements()) {
ZipEntry ze = en.nextElement();
if(!ze.getName().equals(entry)){
zos.putNextEntry(new ZipEntry(ze.getName()));
InputStream is = zip.getInputStream(ze);
copyStream(is, zos);
is.close();
}
}
zos.putNextEntry(new ZipEntry(entry));
InputStream is = new FileInputStream(tmpfile);
copyStream(is, zos);
is.close();

zos.close();
}

private static void copyStream(InputStream in, OutputStream out) throws IOException {
byte[] chunk = new byte[1024];
int count;
while ((count = in.read(chunk)) >=0 ) {
out.write(chunk,0,count);
}
}

/**
* Writes spreadsheet data in a Writer.
* (YK: in future it may evolve in a full-featured API for streaming data in Excel)
*/
public static class SpreadsheetWriter {
private final Writer _out;
private int _rownum;

public SpreadsheetWriter(Writer out){
_out = out;
}

public void beginSheet() throws IOException {
_out.write("<?xml version=\"1.0\" encoding=\""+XML_ENCODING+"\"?>" +
"<worksheet xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\">" );
_out.write("<sheetData>\n");
}

public void endSheet() throws IOException {
_out.write("</sheetData>");
_out.write("</worksheet>");
}

/**
* Insert a new row
*
* @param rownum 0-based row number
*/
public void insertRow(int rownum) throws IOException {
_out.write("<row r=\""+(rownum+1)+"\">\n");
this._rownum = rownum;
}

/**
* Insert row end marker
*/
public void endRow() throws IOException {
_out.write("</row>\n");
}

public void createCell(int columnIndex, String value, int styleIndex) throws IOException {
String ref = new CellReference(_rownum, columnIndex).formatAsString();
_out.write("<c r=\""+ref+"\" t=\"inlineStr\"");
if(styleIndex != -1) _out.write(" s=\""+styleIndex+"\"");
_out.write(">");
_out.write("<is><t>"+value+"</t></is>");
_out.write("</c>");
}

public void createCell(int columnIndex, String value) throws IOException {
createCell(columnIndex, value, -1);
}

public void createCell(int columnIndex, double value, int styleIndex) throws IOException {
String ref = new CellReference(_rownum, columnIndex).formatAsString();
_out.write("<c r=\""+ref+"\" t=\"n\"");
if(styleIndex != -1) _out.write(" s=\""+styleIndex+"\"");
_out.write(">");
_out.write("<v>"+value+"</v>");
_out.write("</c>");
}

public void createCell(int columnIndex, double value) throws IOException {
createCell(columnIndex, value, -1);
}

public void createCell(int columnIndex, Calendar value, int styleIndex) throws IOException {
createCell(columnIndex, DateUtil.getExcelDate(value, false), styleIndex);
}
}
}


3.性能比較
使用了這個詭計以後,讀取excel模板並寫10000條記錄到excel並生成從原來的10分鐘變成了3秒!性能有了飛躍。
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章