poi事件模式讀取excel很高效,讀取速度快,能讀文件大,佔用內存少,話不多說上代碼(用戶模式讀取也有保留)
package net.bi.util.base;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.commons.compress.utils.Lists;
import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* excel讀寫工具類
*
* @ClassName ExcelToolkit.java
* @author herb
* @date 2019年05月30日
*/
public class ExcelToolkit {
private static Logger logger = Logger.getLogger(ExcelToolkit.class);
private final static String xls = "xls";
private final static String xlsx = "xlsx";
/**********************************************************************************
*
*
* poi用戶模式讀取excel(代碼易讀,效率低,捨棄)
*
*
* *********************************************************************************
*/
/**
* 讀入excel文件,解析後返回 支持多表 一個子list爲一個sheet
*
* @param file
* @return
* @throws IOException
* @author herb
* @date 2019年5月30日
*/
public static List<List<String[]>> readExcel(File file) throws IOException {
//檢查文件
checkFile(file);
//獲得Workbook工作薄對象
Workbook workbook = getWorkBook(file);
//創建返回對象,把sheet數據分別存儲到list,然後作爲一個集合返回
List<List<String[]>> excelData = new ArrayList<List<String[]>>();
if (workbook != null) {
for (int sheetNum = 0; sheetNum < workbook.getNumberOfSheets(); sheetNum++) {
//獲得當前sheet工作表
Sheet sheet = workbook.getSheetAt(sheetNum);
//把每行中的值作爲一個數組,所有行作爲一個sheet集合數據
List<String[]> sheetData = new ArrayList<String[]>();
if (sheet == null) {
continue;
}
//獲得當前sheet的開始行
int firstRowNum = sheet.getFirstRowNum();
//獲得當前sheet的結束行
int lastRowNum = sheet.getLastRowNum();
//循環所有行
for (int rowNum = firstRowNum; rowNum <= lastRowNum; rowNum++) {
//獲得當前行
Row row = sheet.getRow(rowNum);
if (row == null) {
continue;
}
//獲得當前行的開始列
int firstCellNum = row.getFirstCellNum();
//獲得當前行的列數
int lastCellNum = row.getPhysicalNumberOfCells();
String[] cells = new String[row.getPhysicalNumberOfCells()];
//循環當前行
for (int cellNum = firstCellNum; cellNum < lastCellNum; cellNum++) {
Cell cell = row.getCell(cellNum);
cells[cellNum] = getCellValue(cell);
}
sheetData.add(cells);
}
//添加sheetData到excelData
excelData.add(sheetData);
}
workbook.close();
}
return excelData;
}
private static void checkFile(File file) throws IOException {
//判斷文件是否存在
if (null == file) {
logger.error("文件不存在!");
throw new FileNotFoundException("文件不存在!");
}
//獲得文件名
String fileName = file.getName();
//判斷文件是否是excel文件
if (!fileName.endsWith(xls) && !fileName.endsWith(xlsx)) {
logger.error(fileName + "不是excel文件");
throw new IOException(fileName + "不是excel文件");
}
}
private static Workbook getWorkBook(File file) {
//獲得文件名
String fileName = file.getName();
//創建Workbook工作薄對象,表示整個excel
Workbook workbook = null;
try {
//獲取excel文件的io流
InputStream is = new FileInputStream(file);
//根據文件後綴名不同(xls和xlsx)獲得不同的Workbook實現類對象
if (fileName.endsWith(xls)) {
//2003
workbook = new HSSFWorkbook(is);
} else if (fileName.endsWith(xlsx)) {
//2007
workbook = new XSSFWorkbook(is);
}
} catch (IOException e) {
logger.info(e.getMessage());
}
return workbook;
}
private static String getCellValue(Cell cell) {
String cellValue = "";
if (cell == null) {
return cellValue;
}
//把數字當成String來讀,避免出現1讀成1.0的情況(科學計數法會除外)
if (cell.getCellType() == CellType.NUMERIC) {
long longVal = Math.round(cell.getNumericCellValue());
Double doubleVal = cell.getNumericCellValue();
if (Double.parseDouble(longVal + ".0") == doubleVal) {
cell.setCellType(CellType.STRING);
}
}
//判斷數據的類型
switch (cell.getCellType()) {
case NUMERIC: //數字
cellValue = String.valueOf(cell.getNumericCellValue());
break;
case STRING: //字符串
cellValue = String.valueOf(cell.getStringCellValue());
break;
case BOOLEAN: //Boolean
cellValue = String.valueOf(cell.getBooleanCellValue());
break;
case FORMULA: //公式
cellValue = String.valueOf(cell.getCellFormula());
break;
case BLANK: //空值
cellValue = "";
break;
case ERROR: //故障
cellValue = "非法字符";
break;
default:
cellValue = "未知類型";
break;
}
return cellValue;
}
/**
* 從字符獲取順序 主要是轉化excel的a-z
*
* @param chars
* @return
* @author herb
* @date 2019年5月9日
*/
public static int excelLine2Dec(String chars) {
if (null == chars) {
return -1;
}
if (NumberToolkit.isInteger(chars)) {
return Integer.valueOf(chars) - 1;
} else {
int length = chars.length();
chars = chars.toUpperCase();
int lines = 0;
try {
for (int i = length - 1; i >= 0; i--) {
if (chars.charAt(i) < 'A' || chars.charAt(i) > 'Z') {
return -1;
}
lines += (int) (chars.charAt(i) - 'A' + 1) * Math.pow(26, length - i - 1);
}
} catch (Exception e) {
return -1;
}
return lines - 1;
}
}
/**********************************************************************************
*
*
* poi事件模式讀取excel(效率高)
*
*
* *********************************************************************************
* @throws IOException
*/
public static List<List<String[]>> readExcel(String filename) throws IOException{
//檢查文件
checkFile(new File(filename));
OPCPackage pkg = null;
List<List<String[]>> excelList = Lists.newArrayList();
try {
pkg = OPCPackage.open(filename, PackageAccess.READ);
XSSFReader reader = new XSSFReader(pkg);
SharedStringsTable sst = reader.getSharedStringsTable();
StylesTable styles = reader.getStylesTable();
XMLReader parser = fetchSheetParser(sst, excelList,styles);
/**
* 返回一個迭代器,此迭代器會依次得到所有不同的sheet。
* 每個sheet的InputStream只有從Iterator獲取時纔會打開。 解析完每個sheet時關閉InputStream。
*/
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) reader.getSheetsData();
while (sheets.hasNext()) {
InputStream sheetstream = sheets.next();
InputSource sheetSource = new InputSource(sheetstream);
try {
// 解析sheet:
// com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl:522
parser.parse(sheetSource);
} finally {
sheetstream.close();
}
}
} catch (InvalidOperationException | IOException | OpenXML4JException | SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
try {
if(null != pkg)
pkg.close();
else{
return null;
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return excelList;
}
private static XMLReader fetchSheetParser(SharedStringsTable sst, List<List<String[]>> excelList,StylesTable styles) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst, excelList,styles);
parser.setContentHandler(handler);
return parser;
}
/**
* The type of the data value is indicated by an attribute on the cell. The
* value is usually in a "v" element within the cell.
*/
enum xssfDataType {
BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
}
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
// Set when V start element is seen
private boolean vIsOpen;
// Set when cell start element is seen;
// used when cell close element is seen.
private xssfDataType nextDataType;
// Gathers characters as they are seen.
private StringBuffer value;
// Used to format numeric cell values.
private short formatIndex;
private String formatString;
private final DataFormatter formatter;
private int thisColumn = -1;
// The last column printed to the output stream
private int lastColumnNumber = -1;
/**
* Table with styles
*/
private StylesTable stylesTable;
//private boolean isCellNull = false;
private int sheetIndex = -1;
private List<String> rowList = Lists.newArrayList();
private List<String[]> sheetList = Lists.newArrayList();
private List<List<String[]>> excelList = Lists.newArrayList();
private SheetHandler(SharedStringsTable sst, List<List<String[]>> excelList,StylesTable styles) {
this.stylesTable = styles;
this.sst = sst;
this.excelList = excelList;
this.value = new StringBuffer();
this.nextDataType = xssfDataType.NUMBER;
this.formatter = new DataFormatter();
sheetList.clear();
}
@Override
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if ("inlineStr".equals(name) || "v".equals(name)) {
vIsOpen = true;
// Clear contents cache
value.setLength(0);
}
// c => cell
else if ("c".equals(name)) {
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));
// Set up defaults.
this.nextDataType = xssfDataType.NUMBER;
this.formatIndex = -1;
this.formatString = null;
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
//String isEmpty = attributes.getValue("r");
if ("b".equals(cellType))
nextDataType = xssfDataType.BOOL;
else if ("e".equals(cellType))
nextDataType = xssfDataType.ERROR;
else if ("inlineStr".equals(cellType))
nextDataType = xssfDataType.INLINESTR;
else if ("s".equals(cellType))
nextDataType = xssfDataType.SSTINDEX;
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
// It's a number, but almost certainly one
// with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
this.formatString = style.getDataFormatString();
if (this.formatString == null)
this.formatString = BuiltinFormats
.getBuiltinFormat(this.formatIndex);
}
}
}
/**
* 返回單元格的值
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (vIsOpen)
value.append(ch, start, length);
}
/**
* 解析到XML的結束標籤觸發此方法 如:</row>
*
* @param uri
* @param localName
* @param name
*/
@Override
public void endElement(String uri, String localName, String name) throws SAXException {
String thisStr = null;
// v => contents of a cell
if ("v".equals(name)) {
// Process the value contents as required.
// Do now, as characters() may be called more than once
switch (nextDataType) {
case BOOL:
char first = value.charAt(0);
thisStr = first == '0' ? "FALSE" : "TRUE";
break;
case ERROR:
thisStr = "\"ERROR:" + value.toString() + '"';
break;
case FORMULA:
// A formula could result in a string value,
// so always add double-quote characters.
//thisStr = '"' + value.toString() + '"';
thisStr =value.toString();
break;
case INLINESTR:
// TODO: have seen an example of this, so it's untested.
XSSFRichTextString rtsi = new XSSFRichTextString(
value.toString());
// thisStr = '"' + rtsi.toString() + '"';
thisStr = rtsi.toString() ;
break;
case SSTINDEX:
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(
sst.getEntryAt(idx));
//thisStr = '"' + rtss.toString() + '"';
thisStr =rtss.toString();
} catch (NumberFormatException ex) {
}
break;
case NUMBER:
String n = value.toString();
// 判斷是否是日期格式
if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {
Double d = Double.parseDouble(n);
Date date=HSSFDateUtil.getJavaDate(d);
thisStr=formateDateToString(date);
} else if (this.formatString != null)
thisStr = formatter.formatRawCellContents(
Double.parseDouble(n), this.formatIndex,
this.formatString);
else
thisStr = n;
break;
default:
thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
break;
}
// Output after we've seen the string contents
// Emit commas for any fields that were missing on this row
/*if (lastColumnNumber == -1){
lastColumnNumber = 0;
} */
//處理空單元格
for (int i = lastColumnNumber; i < thisColumn - 1; ++i)
{
rowList.add("");//每加一個單元格的值到List中
}
// Update column
if (thisColumn > -1){
lastColumnNumber = thisColumn;
}
rowList.add(thisStr);
} else if ("row".equals(name)) {
//
lastColumnNumber = -1;
optRow(sheetIndex, rowList);
rowList.clear();
}else if ("worksheet".equals(name)) {// Sheet讀取完成
//捨棄空表
if(sheetList.size() >0)
excelList.add(sheetList);
sheetList = Lists.newArrayList();
}
}
/**
* 該方法自動被調用,每讀一行調用一次,在方法中寫自己的業務邏輯即可
*
* @param sheetIndex
* 工作簿序號
* @param curRow
* 處理到第幾行
* @param rowList
* 當前數據行的數據集合
*/
public void optRow(int sheetIndex, List<String> rowList) {
//空行暫不處理
sheetList.add(rowList.toArray(new String[rowList.size()]));
}
/**
* Converts an Excel column name like "C" to a zero-based index.
*
* @param name
* @return Index corresponding to the specified name
*/
private int nameToColumn(String name) {
int column = -1;
for (int i = 0; i < name.length(); ++i) {
int c = name.charAt(i);
column = (column + 1) * 26 + c - 'A';
}
return column;
}
private String formateDateToString(Date date) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//格式化日期
return sdf.format(date);
}
}
public static void main(String[] args) throws Exception {
System.out.println("start read");
ExcelToolkit example = new ExcelToolkit();
example.readExcel("D:/6家老店上線資料9.16.xlsx");
}
}