//整體excel加載 問題出現在第一步文件加載,經測試5M文件的加載基本已經到我電腦4G內存的極限了,
//當然如果電腦配置更好的話可以優化jvm,應該會再大一些
一、小excel處理
import com.yeepay.prop.common.utils.excel.HxlsReader;
import com.yeepay.prop.common.utils.excel.IRowReader;
import com.yeepay.prop.common.utils.excel.XxlsReader;
import com.yeepay.prop.common.utils.exception.SysOperCustomException;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.*;
public class POIUtil {
/**
* 導入Excel文件
* 內容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件對象
* @param strKeys : Map的Key列表,Value爲相應的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) {
String[] strKey = strKeys.split(",");
List<Map<String,String>> listMap = new ArrayList<Map<String,String>>();
String fileName = excelFile.getPath();
int i = 0;
try {
Workbook workbook = null;
String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase();
try {
if (fileType.equals("xls")) {
<span style="color:#FF0000;">workbook = new XSSFWorkbook(new FileInputStream(excelFile))</span>;//<span style="font-size:18px;color:#FF0000;">如果文件過大 一定會在裏卡死,造成內存溢出</span>
} else if (fileType.equals("xlsx")) {
<span style="color:#FF0000;"> workbook = new HSSFWorkbook(new FileInputStream(excelFile));</span>
}
} catch (Exception e) {
throw new SysOperCustomException("導入文件格式錯誤: "+e.getMessage());
}
Sheet sheet = workbook.getSheetAt(0);
while (true) {
Row row = sheet.getRow(i);
if (row == null)
break;
Map<String,String> map = new HashMap<String,String>();
Boolean flag = false;
for(int keyIndex = 0; keyIndex < strKey.length; keyIndex++){
//System.out.println(keyIndex+" "+row.getCell(keyIndex));
Cell cell = row.getCell(keyIndex);
//null 不可以 設置格式爲cell_type_string
if(null != row.getCell(keyIndex))
{
if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell))
{
// 數值、日期類型
double d = cell.getNumericCellValue();
// 日期類型
Date date = HSSFDateUtil.getJavaDate(d);
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
.format(date));
}
else
{
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
}
}
String cellValue = "";
if(null != cell){
cellValue = row.getCell(keyIndex).getStringCellValue();
}
//如果行數據存在不爲空 或 null的數據,則視爲有效數據
if(null != cellValue && !cellValue.isEmpty())
flag = true;
map.put(strKey[keyIndex], cellValue.trim());
}
//只添加有效數據
if(flag)
listMap.add(map);
i++;
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("導入中斷,錯誤位置:第"+ i +"行數據!");
}
return listMap;
}
}
調用方法如下
import java.io.File;
import java.util.Date;
import java.util.List;
import java.util.Map;
public class Test {
@org.junit.Test
public void testImport() {
File image = new File("D:\\test.xls");
String titelStr = "bankCustomerNo,cardNumber,tradeAmount,tradeTime,requestId";//實體屬性
System.out.println(new Date());
List<Map<String,String>> list = POIUtil.importExcelToMap(image, titelStr);
//模板的判定
//在這裏處理業務邏輯與數據處理 實體的創建於保存
for(Map<String, String> map : list) {
User user = new User();
user.setName(map.get("name"));
.
.
.
}
}
}
在我以前所有的excel數據導入都是通過poi的簡單應用,直接加載整個excel進行數據返回統計,第一步直接造成了不可能對於大文件、大數據的的處理。因此通過參考http://blog.sina.com.cn/s/blog_628d4dd101017quk.html並在以上的基礎上編寫了如下工具類,之所以工具類不支持大數據量的處理是因爲其中方法optRows編寫代碼問題,該方法爲每一行數據的存儲都會調用該方法,當數據量達到10萬條數據的時候就會調用十萬次,list中存在十萬個map數據,map中又存在十萬個key、10萬個value,並且這些對象會長期持有不會釋放掉,因此會造成gc回收出現文件,最後超出內存 異常爆出out of memory
之所以保留了此方法,是因爲數據一起返回操作會更簡便,業務書寫會更方便(相對於大excel處理)
二、中等excel處理
import com.yeepay.prop.common.utils.excel.HxlsReader;
import com.yeepay.prop.common.utils.excel.IRowReader;
import com.yeepay.prop.common.utils.excel.XxlsReader;
import com.yeepay.prop.common.utils.exception.SysOperCustomException;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.File;
import java.io.FileInputStream;
import java.text.SimpleDateFormat;
import java.util.*;
public class POIUtil {
/**
* 導入Excel文件
* 內容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件對象
* @param strKeys : Map的Key列表,Value爲相應的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) {
String[] strKey = strKeys.split(",");
List<Map<String,String>> listMap = new ArrayList<Map<String,String>>();
String fileName = excelFile.getPath();
int i = 0;
try {
Workbook workbook = null;
String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase();
try {
if (fileType.equals("xls")) {
workbook = new XSSFWorkbook(new FileInputStream(excelFile));
} else if (fileType.equals("xlsx")) {
workbook = new HSSFWorkbook(new FileInputStream(excelFile));
}
} catch (Exception e) {
throw new SysOperCustomException("導入文件格式錯誤: "+e.getMessage());
}
Sheet sheet = workbook.getSheetAt(0);
while (true) {
Row row = sheet.getRow(i);
if (row == null)
break;
Map<String,String> map = new HashMap<String,String>();
Boolean flag = false;
for(int keyIndex = 0; keyIndex < strKey.length; keyIndex++){
//System.out.println(keyIndex+" "+row.getCell(keyIndex));
Cell cell = row.getCell(keyIndex);
//null 不可以 設置格式爲cell_type_string
if(null != row.getCell(keyIndex))
{
if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell))
{
// 數值、日期類型
double d = cell.getNumericCellValue();
// 日期類型
Date date = HSSFDateUtil.getJavaDate(d);
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
.format(date));
}
else
{
cell.setCellType(HSSFCell.CELL_TYPE_STRING);
}
}
String cellValue = "";
if(null != cell){
cellValue = row.getCell(keyIndex).getStringCellValue();
}
//如果行數據存在不爲空 或 null的數據,則視爲有效數據
if(null != cellValue && !cellValue.isEmpty())
flag = true;
map.put(strKey[keyIndex], cellValue.trim());
}
//只添加有效數據
if(flag)
listMap.add(map);
i++;
}
} catch (Exception e) {
e.printStackTrace();
System.out.println("導入中斷,錯誤位置:第"+ i +"行數據!");
}
return listMap;
}
/**
* 導入Excel文件 暫時可以讀取10萬以內數據
* 內容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件對象
* @param strKeys : Map的Key列表,Value爲相應的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importBigExcelToMap(File excelFile, String strKeys) {
String filename = excelFile.getPath();
String fileType = filename.substring(filename.lastIndexOf(".")+1).toLowerCase();
List<Map<String, String>> list = new ArrayList<Map<String, String>>();
try {
if (fileType.equals("xls")) {
list = <strong>HxlsUtil.importExcelToMap</strong>( excelFile, strKeys);
} else if (fileType.equals("xlsx")) {
list = <strong>XxlsUtil.importExcelToMap</strong>( excelFile,strKeys);
}
} catch (Exception e) {
System.out.println("導入文件格式錯誤: "+e.getMessage());
}
return list;
}
}
1.excel2003工具類
package com.yeepay.prop.common.utils;
import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener;
import org.apache.poi.hssf.eventusermodel.*;
import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord;
import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord;
import org.apache.poi.hssf.model.HSSFFormulaParser;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* excel 2003 文件處理
*/
public class HxlsUtil implements HSSFListener {
private int minColumns;
private POIFSFileSystem fs;
private PrintStream output;
private int lastRowNumber;
private int lastColumnNumber;
/** Should we output the formula, or the value it has? */
private boolean outputFormulaValues = true;
/** For parsing Formulas */
private SheetRecordCollectingListener workbookBuildingListener;
private HSSFWorkbook stubWorkbook;
// Records we pick up as we process
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
/** So we known which sheet we're on */
private int sheetIndex = -1;
private BoundSheetRecord[] orderedBSRs;
private ArrayList boundSheetRecords = new ArrayList();
// For handling formulas with string results
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
private int curRow;
private List<String> rowlist;
@SuppressWarnings( "unused")
private String sheetName;
private String title;
private List<Map<String, String>> mapList;
private HxlsUtil(POIFSFileSystem fs, String title)
throws SQLException {
this.fs = fs;
this.output = System.out;
this.minColumns = -1;
this.curRow = 0;
this.rowlist = new ArrayList<String>();
this.title = title;
this.mapList = new ArrayList<Map<String, String>>();
}
private HxlsUtil(String filename, String title) throws IOException, SQLException {
this(new POIFSFileSystem(new FileInputStream(filename)), title);
}
<strong>public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
<span style="color:#FF0000;">String[] titleStr = title.split(",");</span>
<span style="color:#FF0000;">int col</span> = rowlist.size();
<span style="color:#FF0000;">Map<String, String> map = new HashMap<String, String>();</span>
for(int i = 0; i < titleStr.length; i++) {
if(col >= titleStr.length)
map.put(titleStr[i], rowlist.get(i).trim());
else
map.put(titleStr[i], "");
}
return map.size() > 0 ? map : null;
}</strong>
/**
* 遍歷 excel 文件
*/
public void process() throws IOException {
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(
this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new SheetRecordCollectingListener(
formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
}
/**
* HSSFListener 監聽方法,處理 Record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
// 如果有需要,則建立子工作薄
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener
.getStubHSSFWorkbook();
}
sheetIndex++;
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord
.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
rowlist.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: //單元格爲布爾類型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue()+"";
rowlist.add(thisColumn, thisStr);
break;
case FormulaRecord.sid: //單元格爲公式類型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook,
frec.getParsedExpression()) + '"';
}
rowlist.add(thisColumn,thisStr);
break;
case StringRecord.sid://單元格中公式的字符串
if (outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("")?" ":value;
this.rowlist.add(thisColumn, value);
break;
case LabelSSTRecord.sid: //單元格爲字符串類型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
rowlist.add(thisColumn, " ");
} else {
value = sstRecord
.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("")?" ":value;
rowlist.add(thisColumn,value);
}
break;
case NumberRecord.sid: //單元格爲數字類型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
value = formatListener.formatNumberDateCell(numrec).trim();
value = value.equals("")?" ":value;
// 向容器加入列值
rowlist.add(thisColumn, value);
break;
default:
break;
}
// 遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
// 空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
rowlist.add(thisColumn," ");
}
// 更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
// 行結束時的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColumns > 0) {
// 列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
// 行結束時, 調用 optRows() 方法
lastColumnNumber = -1;
try {
mapList.add(optRows(sheetIndex,curRow, rowlist));
} catch (SQLException e) {
e.printStackTrace();
}
rowlist.clear();
}
}
/**
* 導入Excel文件
* 內容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件對象
* @param title : Map的Key列表,Value爲相應的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String title) throws IOException, SQLException {
HxlsUtil hxls = new HxlsUtil(excelFile.getPath(), title);
hxls.process();
return hxls.mapList;
}
}
2.2007excel工具類
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.InputStream;
import java.sql.SQLException;
import java.util.*;
/**
* Excel 2007採用SAX解析文件,注意JDK版本1.5以上
*/
public class XxlsUtil extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private int sheetIndex = -1;
private List<String> rowlist = new ArrayList<String>();
private int curRow = 0;
private int curCol = 0;
private String title;
private List<Map<String, String>> mapList;
private XxlsUtil(String fileName, String title) throws Exception {
this.title = title;
this.mapList = new ArrayList<Map<String, String>>();
processOneSheet(fileName, 1);
}
public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException {
String[] titleStr = title.split(",");
int col = rowlist.size();
Map<String, String> map = new HashMap<String, String>();
for(int i = 0; i < titleStr.length; i++) {
if(col >= titleStr.length)
map.put(titleStr[i], rowlist.get(i).trim());
else
map.put(titleStr[i], "");
// System.out.print(rowlist.get(i) + " ");
}
return map.size() > 0 ? map : null;
}
//只遍歷一個sheet,其中sheetId爲要遍歷的sheet索引,從1開始,1-3
public void processOneSheet(String filename,int sheetId) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
// rId2 found by processing the Workbook
// 根據 rId# 或 rSheet# 查找sheet
InputStream sheet2 = r.getSheet("rId"+sheetId);
sheetIndex++;
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
/**
* 遍歷 excel 文件
*/
public void process(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
curRow = 0;
sheetIndex++;
InputStream sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst)
throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader();
// .createXMLReader("org.apache.xerces.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
return parser;
}
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => 單元格
if (name.equals("c")) {
// 如果下一個元素是 SST 的索引,則將nextIsString標記爲true
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// 置空
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// 根據SST的索引值的到單元格的真正要存儲的字符串
// 這時characters()方法可能會被調用多次
if (nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
.toString();
} catch (Exception e) {
}
}
// v => 單元格的值,如果單元格是字符串則v標籤的值爲該字符串在SST中的索引
// 將單元格內容加入rowlist中,在這之前先去掉字符串前後的空白符
if (name.equals("v")) {
String value = lastContents.trim();
value = value.equals("")?" ":value;
rowlist.add(curCol, value);
curCol++;
}else {
//如果標籤名稱爲 row ,這說明已到行尾,調用 optRows() 方法
if (name.equals("row")) {
try {
mapList.add(optRows(sheetIndex, curRow, rowlist));
} catch (SQLException e) {
e.printStackTrace();
}
rowlist.clear();
curRow++;
curCol = 0;
}
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
//得到單元格內容的值
lastContents += new String(ch, start, length);
}
/**
* 導入Excel文件
* 內容以List<Map<String K,String V>>的方式存放
* @param excelFile : Excel文件對象
* @param title : Map的Key列表,Value爲相應的sheet一行中各列的值
* @return
*/
public static List<Map<String,String>> importExcelToMap(File excelFile, String title){
try {
XxlsUtil xxls = new XxlsUtil(excelFile.getPath(), title);
return xxls.mapList;
}catch (Exception e) {
e.printStackTrace();
}
return null;
}
}
調用方法如下
import com.yeepay.prop.common.utils.POIUtil;
import java.io.File;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
* Description
* PackageName: PACKAGE_NAME
*
* @author hua.jin
* @since 14-12-22 上午9:26
*/
public class Test {
@org.junit.Test
public void testImport() {
File image = new File("D:\\test.xls");
String titelStr = "name,age,gender";
System.out.println(new Date());
List<Map<String,String>> list = POIUtil.<strong>importBigExcelToMap</strong>(image, titelStr); <pre name="code" class="java"> //模板的判定
//在這裏處理業務邏輯與數據處理 實體的創建於保存
for(Map<String, String> map : list) {
User user = new User();
user.setName(map.get("name"));
.
.
.
}
}}
三、大excel文件的處理
這個就沒什麼好說的了,該方法 主要參考了http://blog.csdn.net/goodkuang2012/article/details/7350985,將行數據進行了接口處理,因爲一行行數據進行處理也就缺失了整體文件的把控,在異常處理這塊 該好好考慮一下了,在此基礎上的改動沒有太大,也就不再敘述了,有得有失吧
經測試大文件處理80M沒有問題