前言:前段時間,由於項目中有一個功能,導入大量的數據,整個Excel的容量大概有200M左右,用以前的方法讀取很慢,甚至會內存溢出,所以後面改用另外一種方式(驅動模式),其實我也不是很懂,是借鑑了一個前輩的思路(https://www.cnblogs.com/swordfall/p/8298386.html),下面直接上碼
1.引入jar包,POI的依賴包
<!-- poi office -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.16</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.16</version>
</dependency>
2.解析Excel2003的類ExcelXlsReader
public class ExcelXlsReader implements HSSFListener {
private int minColums = -1;
private POIFSFileSystem fs;
/**
* 總行數
*/
private int totalRows=0;
/**
* 上一行row的序號
*/
private int lastRowNumber;
/**
* 上一單元格的序號
*/
private int lastColumnNumber;
/**
* 是否輸出formula,還是它對應的值
*/
private boolean outputFormulaValues = true;
/**
* 用於轉換formulas
*/
private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener;
//excel2003工作簿
private HSSFWorkbook stubWorkbook;
private SSTRecord sstRecord;
private FormatTrackingHSSFListener formatListener;
private final HSSFDataFormatter formatter = new HSSFDataFormatter();
/**
* 文件的絕對路徑
*/
private String filePath = "";
//表索引
private int sheetIndex = 0;
private BoundSheetRecord[] orderedBSRs;
@SuppressWarnings("unchecked")
private ArrayList boundSheetRecords = new ArrayList();
private int nextRow;
private int nextColumn;
private boolean outputNextStringRecord;
//當前行
private int curRow = 0;
//存儲一行記錄所有單元格的容器
private List<String> cellList = new ArrayList<String>();
//第一個sheet列表
private static List<List<String>> firstSheetList = new ArrayList<>();
//第二個sheet列表
private static List<List<String>> secondSheetList = new ArrayList<>();
//第三個sheet列表
private static List<List<String>> thirdSheetList = new ArrayList<>();
private static Map<String,List<List<String>>> allListMap = new HashMap<>();
/**
* 判斷整行是否爲空行的標記
*/
private boolean flag = false;
@SuppressWarnings("unused")
private String sheetName;
/**
* 遍歷excel下所有的sheet
*
* @param file
* @throws Exception
*/
public Map<String,List<List<String>>> process(File file) throws Exception {
// filePath = fileName;
this.fs = new POIFSFileSystem(new FileInputStream(file));
MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this);
formatListener = new FormatTrackingHSSFListener(listener);
HSSFEventFactory factory = new HSSFEventFactory();
HSSFRequest request = new HSSFRequest();
if (outputFormulaValues) {
request.addListenerForAllRecords(formatListener);
} else {
workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener);
request.addListenerForAllRecords(workbookBuildingListener);
}
factory.processWorkbookEvents(request, fs);
allListMap.put("first",new ArrayList<>(firstSheetList));
allListMap.put("second",new ArrayList<>(secondSheetList));
allListMap.put("third",new ArrayList<>(thirdSheetList));
firstSheetList.clear();
secondSheetList.clear();
thirdSheetList.clear();
return allListMap;
}
/**
* HSSFListener 監聽方法,處理Record
* 處理每個單元格
* @param record
*/
@SuppressWarnings("unchecked")
public void processRecord(Record record) {
int thisRow = -1;
int thisColumn = -1;
String thisStr = null;
String value = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid: //開始處理每個sheet
BOFRecord br = (BOFRecord) record;
if (br.getType() == BOFRecord.TYPE_WORKSHEET) {
//如果有需要,則建立子工作簿
if (workbookBuildingListener != null && stubWorkbook == null) {
stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook();
}
if (orderedBSRs == null) {
orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
sheetName = orderedBSRs[sheetIndex].getSheetname();
sheetIndex++;
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid: //單元格爲空白
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisColumn = brec.getColumn();
thisStr = "";
cellList.add(thisColumn, thisStr);
break;
case BoolErrRecord.sid: //單元格爲布爾類型
BoolErrRecord berec = (BoolErrRecord) record;
thisRow = berec.getRow();
thisColumn = berec.getColumn();
thisStr = berec.getBooleanValue() + "";
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); //如果裏面某個單元格含有值,則標識該行不爲空行
break;
case FormulaRecord.sid://單元格爲公式類型
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
thisColumn = frec.getColumn();
if (outputFormulaValues) {
if (Double.isNaN(frec.getValue())) {
outputNextStringRecord = true;
nextRow = frec.getRow();
nextColumn = frec.getColumn();
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
} else {
thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"';
}
cellList.add(thisColumn, thisStr);
checkRowIsNull(thisStr); //如果裏面某個單元格含有值,則標識該行不爲空行
break;
case StringRecord.sid: //單元格中公式的字符串
if (outputNextStringRecord) {
StringRecord srec = (StringRecord) record;
thisStr = srec.getString();
thisRow = nextRow;
thisColumn = nextColumn;
outputNextStringRecord = false;
}
break;
case LabelRecord.sid:
LabelRecord lrec = (LabelRecord) record;
curRow = thisRow = lrec.getRow();
thisColumn = lrec.getColumn();
value = lrec.getValue().trim();
value = value.equals("") ? "" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果裏面某個單元格含有值,則標識該行不爲空行
break;
case LabelSSTRecord.sid: //單元格爲字符串類型
LabelSSTRecord lsrec = (LabelSSTRecord) record;
curRow = thisRow = lsrec.getRow();
thisColumn = lsrec.getColumn();
if (sstRecord == null) {
cellList.add(thisColumn, "");
} else {
value = sstRecord.getString(lsrec.getSSTIndex()).toString().trim();
value = value.equals("") ? "" : value;
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果裏面某個單元格含有值,則標識該行不爲空行
}
break;
case NumberRecord.sid: //單元格爲數字類型
NumberRecord numrec = (NumberRecord) record;
curRow = thisRow = numrec.getRow();
thisColumn = numrec.getColumn();
//第一種方式
//value = formatListener.formatNumberDateCell(numrec).trim();//這個被寫死,採用的m/d/yy h:mm格式,不符合要求
//第二種方式,參照formatNumberDateCell裏面的實現方法編寫
Double valueDouble=((NumberRecord)numrec).getValue();
String formatString=formatListener.getFormatString(numrec);
if (formatString.contains("m/d/yy")){
formatString="yyyy-MM-dd hh:mm:ss";
}
int formatIndex=formatListener.getFormatIndex(numrec);
value=formatter.formatRawCellContents(valueDouble, formatIndex, formatString).trim();
value = value.equals("") ? "" : value;
//向容器加入列值
cellList.add(thisColumn, value);
checkRowIsNull(value); //如果裏面某個單元格含有值,則標識該行不爲空行
break;
default:
break;
}
//遇到新行的操作
if (thisRow != -1 && thisRow != lastRowNumber) {
lastColumnNumber = -1;
}
//空值的操作
if (record instanceof MissingCellDummyRecord) {
MissingCellDummyRecord mc = (MissingCellDummyRecord) record;
curRow = thisRow = mc.getRow();
thisColumn = mc.getColumn();
cellList.add(thisColumn, "");
}
//更新行和列的值
if (thisRow > -1)
lastRowNumber = thisRow;
if (thisColumn > -1)
lastColumnNumber = thisColumn;
//行結束時的操作
if (record instanceof LastCellOfRowDummyRecord) {
if (minColums > 0) {
//列值重新置空
if (lastColumnNumber == -1) {
lastColumnNumber = 0;
}
}
lastColumnNumber = -1;
if (flag&&curRow!=0) { //該行不爲空行且該行不是第一行,發送(第一行爲列名,不需要)
switch(sheetIndex){
case 1 :
//第一個sheet
List<String> a = new ArrayList<>(cellList);
firstSheetList.add(a);
break;
case 2 :
//第二個sheet
List<String> b = new ArrayList<>(cellList);
secondSheetList.add(b);
break;
case 3 :
//第三個sheet
List<String> c = new ArrayList<>(cellList);
thirdSheetList.add(c);
break;
default :
break;
}
totalRows++;
}
//清空容器
cellList.clear();
flag=false;
}
}
/**
* 如果裏面某個單元格含有值,則標識該行不爲空行
* @param value
*/
public void checkRowIsNull(String value){
if (value != null && !"".equals(value)) {
flag = true;
}
}
}
3.解析Excel2007的類ExcelXlsxReader
public class ExcelXlsxReader extends DefaultHandler {
/**
* 單元格中的數據可能的數據類型
*/
enum CellDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, DATE, NULL
}
/**
* 共享字符串表
*/
private SharedStringsTable sst;
/**
* 上一次的索引值
*/
private String lastIndex;
/**
* 文件的絕對路徑
*/
private String filePath = "";
/**
* 工作表索引
*/
private int sheetIndex = 0;
/**
* sheet名
*/
private String sheetName = "";
/**
* 總行數
*/
private int totalRows=0;
/**
* 一行內cell集合
*/
private static List<String> cellList = new ArrayList<>();
//第一個sheet列表
private static List<List<String>> firstSheetList = new ArrayList<>();
//第二個sheet列表
private static List<List<String>> secondSheetList = new ArrayList<>();
//第三個sheet列表
private static List<List<String>> thirdSheetList = new ArrayList<>();
private static Map<String,List<List<String>>> allListMap = new HashMap<>();
/**
* 判斷整行是否爲空行的標記
*/
private boolean flag = false;
/**
* 當前行
*/
private int curRow = 1;
/**
* 當前列
*/
private int curCol = 0;
/**
* T元素標識
*/
private boolean isTElement;
/**
* 異常信息,如果爲空則表示沒有異常
*/
private String exceptionMessage;
/**
* 單元格數據類型,默認爲字符串類型
*/
private CellDataType nextDataType = CellDataType.SSTINDEX;
private final DataFormatter formatter = new DataFormatter();
/**
* 單元格日期格式的索引
*/
private short formatIndex;
/**
* 日期格式字符串
*/
private String formatString;
//定義前一個元素和當前元素的位置,用來計算其中空的單元格數量,如A6和A8等
private String preRef = null, ref = null;
//定義該文檔一行最大的單元格數,用來補全一行最後可能缺失的單元格
private String maxRef = null;
/**
* 單元格
*/
private StylesTable stylesTable;
/**
* 遍歷工作簿中所有的電子表格
* 並緩存在mySheetList中
*
* @param file
* @throws Exception
*/
public Map<String,List<List<String>>> process(File file) throws Exception {
// filePath = filename;
OPCPackage pkg = OPCPackage.open(file);
XSSFReader xssfReader = new XSSFReader(pkg);
stylesTable = xssfReader.getStylesTable();
SharedStringsTable sst = xssfReader.getSharedStringsTable();
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
this.sst = sst;
parser.setContentHandler(this);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (sheets.hasNext()) { //遍歷sheet
curRow = 1; //標記初始行爲第一行
sheetIndex++;
InputStream sheet = sheets.next(); //sheets.next()和sheets.getSheetName()不能換位置,否則sheetName報錯
sheetName = sheets.getSheetName();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource); //解析excel的每條記錄,在這個過程中startElement()、characters()、endElement()這三個函數會依次執行
sheet.close();
}
allListMap.put("first",new ArrayList<>(firstSheetList));
allListMap.put("second",new ArrayList<>(secondSheetList));
allListMap.put("third",new ArrayList<>(thirdSheetList));
firstSheetList.clear();
secondSheetList.clear();
thirdSheetList.clear();
return allListMap; //返回該excel文件的總行數,不包括首列和空行
}
/**
* 第一個執行
*
* @param uri
* @param localName
* @param name
* @param attributes
* @throws SAXException
*/
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
//c => 單元格
if ("c".equals(name)) {
//前一個單元格的位置
if (preRef == null) {
preRef = attributes.getValue("r");
} else {
preRef = ref;
}
//當前單元格的位置
ref = attributes.getValue("r");
//設定單元格類型
this.setNextDataType(attributes);
}
//當元素爲t時
if ("t".equals(name)) {
isTElement = true;
} else {
isTElement = false;
}
//置空
lastIndex = "";
}
/**
* 第二個執行
* 得到單元格對應的索引值或是內容值
* 如果單元格類型是字符串、INLINESTR、數字、日期,lastIndex則是索引值
* 如果單元格類型是布爾值、錯誤、公式,lastIndex則是內容值
* @param ch
* @param start
* @param length
* @throws SAXException
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
lastIndex += new String(ch, start, length);
}
/**
* 第三個執行
*
* @param uri
* @param localName
* @param name
* @throws SAXException
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
//t元素也包含字符串
if (isTElement) {//這個程序沒經過
//將單元格內容加入rowlist中,在這之前先去掉字符串前後的空白符
String value = lastIndex.trim();
cellList.add(curCol, value);
curCol++;
isTElement = false;
//如果裏面某個單元格含有值,則標識該行不爲空行
if (value != null && !"".equals(value)) {
flag = true;
}
} else if ("v".equals(name)) {
//v => 單元格的值,如果單元格是字符串,則v標籤的值爲該字符串在SST中的索引
String value = this.getDataValue(lastIndex.trim(), "");//根據索引值獲取對應的單元格值
//補全單元格之間的空單元格
if (!ref.equals(preRef)) {
int len = countNullCell(ref, preRef);
for (int i = 0; i < len; i++) {
cellList.add(curCol, "");
curCol++;
}
}
cellList.add(curCol, value);
curCol++;
//如果裏面某個單元格含有值,則標識該行不爲空行
if (value != null && !"".equals(value)) {
flag = true;
}
} else {
//如果標籤名稱爲row,這說明已到行尾,調用optRows()方法
if ("row".equals(name)) {
//默認第一行爲表頭,以該行單元格數目爲最大數目
if (curRow == 1) {
maxRef = ref;
}
//補全一行尾部可能缺失的單元格
if (maxRef != null) {
int len = countNullCell(maxRef, ref);
for (int i = 0; i <= len; i++) {
cellList.add(curCol, "");
curCol++;
}
}
if (flag&&curRow!=1){ //該行不爲空行且該行不是第一行,則發送(第一行爲列名,不需要)
switch(sheetIndex){
case 1 :
//第一個sheet
List<String> a = new ArrayList<>(cellList);
firstSheetList.add(a);
break;
case 2 :
//第二個sheet
List<String> b = new ArrayList<>(cellList);
secondSheetList.add(b);
break;
case 3 :
//第三個sheet
List<String> c = new ArrayList<>(cellList);
thirdSheetList.add(c);
break;
default :
break;
}
// ExcelReaderUtil.sendRows(filePath, sheetName, sheetIndex, curRow, cellList);
totalRows++;
}
cellList.clear();
curRow++;
curCol = 0;
preRef = null;
ref = null;
flag=false;
}
}
}
/**
* 處理數據類型
*
* @param attributes
*/
public void setNextDataType(Attributes attributes) {
nextDataType = CellDataType.NUMBER; //cellType爲空,則表示該單元格類型爲數字
formatIndex = -1;
formatString = null;
String cellType = attributes.getValue("t"); //單元格類型
String cellStyleStr = attributes.getValue("s"); //
String columnData = attributes.getValue("r"); //獲取單元格的位置,如A1,B1
if ("b".equals(cellType)) { //處理布爾值
nextDataType = CellDataType.BOOL;
} else if ("e".equals(cellType)) { //處理錯誤
nextDataType = CellDataType.ERROR;
} else if ("inlineStr".equals(cellType)) {
nextDataType = CellDataType.INLINESTR;
} else if ("s".equals(cellType)) { //處理字符串
nextDataType = CellDataType.SSTINDEX;
} else if ("str".equals(cellType)) {
nextDataType = CellDataType.FORMULA;
}
if (cellStyleStr != null) { //處理日期
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
formatIndex = style.getDataFormat();
formatString = style.getDataFormatString();
if (formatString.contains("m/d/yy")) {
nextDataType = CellDataType.DATE;
formatString = "yyyy-MM-dd hh:mm:ss";
}
if (formatString == null) {
nextDataType = CellDataType.NULL;
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
}
}
/**
* 對解析出來的數據進行類型處理
* @param value 單元格的值,
* value代表解析:BOOL的爲0或1, ERROR的爲內容值,FORMULA的爲內容值,INLINESTR的爲索引值需轉換爲內容值,
* SSTINDEX的爲索引值需轉換爲內容值, NUMBER爲內容值,DATE爲內容值
* @param thisStr 一個空字符串
* @return
*/
@SuppressWarnings("deprecation")
public String getDataValue(String value, String thisStr) {
switch (nextDataType) {
// 這幾個的順序不能隨便交換,交換了很可能會導致數據錯誤
case BOOL: //布爾值
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR: //錯誤
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA: //公式
thisStr = '"' + value.toString() + '"';
break;
case INLINESTR:
XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
thisStr = rtsi.toString();
rtsi = null;
break;
case SSTINDEX: //字符串
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx));//根據idx索引值獲取內容值
thisStr = rtss.toString();
rtss = null;
} catch (NumberFormatException ex) {
thisStr = value.toString();
}
break;
case NUMBER: //數字
if (formatString != null) {
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString).trim();
} else {
thisStr = value;
}
thisStr = thisStr.replace("_", "").trim();
break;
case DATE: //日期
thisStr = formatter.formatRawCellContents(Double.parseDouble(value), formatIndex, formatString);
// 對日期字符串作特殊處理,去掉T
thisStr = thisStr.replace("T", " ");
break;
default:
thisStr = " ";
break;
}
return thisStr;
}
public int countNullCell(String ref, String preRef) {
//excel2007最大行數是1048576,最大列數是16384,最後一列列名是XFD
String xfd = ref.replaceAll("\\d+", "");
String xfd_1 = preRef.replaceAll("\\d+", "");
xfd = fillChar(xfd, 3, '@', true);
xfd_1 = fillChar(xfd_1, 3, '@', true);
char[] letter = xfd.toCharArray();
char[] letter_1 = xfd_1.toCharArray();
int res = (letter[0] - letter_1[0]) * 26 * 26 + (letter[1] - letter_1[1]) * 26 + (letter[2] - letter_1[2]);
return res - 1;
}
public String fillChar(String str, int len, char let, boolean isPre) {
int len_1 = str.length();
if (len_1 < len) {
if (isPre) {
for (int i = 0; i < (len - len_1); i++) {
str = let + str;
}
} else {
for (int i = 0; i < (len - len_1); i++) {
str = str + let;
}
}
}
return str;
}
/**
* @return the exceptionMessage
*/
public String getExceptionMessage() {
return exceptionMessage;
}
}
4.Excel輔助工具類ExcelReaderUtil,我把這些代碼做了一下處理,現在可以直接一次性讀取3個sheet的內容,直接調用readExcel()方法即可
public class ExcelReaderUtil {
//excel2003擴展名
public static final String EXCEL03_EXTENSION = ".xls";
//excel2007擴展名
public static final String EXCEL07_EXTENSION = ".xlsx";
/**
* @Author cjw
* @Description 獲取整個Excel的內容
* @Date 10:18 2019/1/23
* @Param [file]
* @return 目前最多隻讀取三個sheet,可以自行擴展
**/
public static ImportExcelData readExcel(MultipartFile file) throws Exception {
// 獲取文件名
if (file == null) {
return null;
}
String fileName = file.getOriginalFilename();
// 獲取文件後綴
String prefix = fileName.substring(fileName.lastIndexOf("."));
if (!prefix.toLowerCase().contains("xls") && !prefix.toLowerCase().contains("xlsx")) {
throw new Exception("文件格式錯誤,fileName的擴展名只能是xls或xlsx。");
}
// 防止生成的臨時文件重複
final File excelFile = File.createTempFile(System.currentTimeMillis() + "", prefix);
file.transferTo(excelFile);
Map<String, List<List<String>>> result = new HashMap<>();
if (fileName.endsWith(EXCEL03_EXTENSION)) { //處理excel2003文件
ExcelXlsReader excelXls = new ExcelXlsReader();
result = excelXls.process(excelFile);
} else if (fileName.endsWith(EXCEL07_EXTENSION)) {//處理excel2007文件
ExcelXlsxReader excelXlsxReader = new ExcelXlsxReader();
result = excelXlsxReader.process(excelFile);
} else {
throw new Exception("文件格式錯誤,fileName的擴展名只能是xls或xlsx。");
}
ImportExcelData data = new ImportExcelData();
for (Map.Entry<String, List<List<String>>> entry : result.entrySet()) {
if ("first".equals(entry.getKey())) {
data.setFirstSheetList(result.get(entry.getKey()));
}
if ("second".equals(entry.getKey())) {
data.setSecondSheetList(result.get(entry.getKey()));
}
if ("third".equals(entry.getKey())) {
data.setThirdSheetList(result.get(entry.getKey()));
}
}
//刪除臨時轉換的文件
if (excelFile.exists()) {
excelFile.delete();
}
return data;
}
}
4.1sheet內容封裝類
@Data
public class ImportExcelData {
//第一個sheet
private List<List<String>> firstSheetList;
//第二個sheet
private List<List<String>> secondSheetList;
//第三個sheet
private List<List<String>> thirdSheetList;
}
5.測試
上傳了一個200M左右的Excel,可以看到,整個解析時間大概是一分多鐘