一、首先配置相關maven包
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.0</version>
</dependency>
二、解析處理大數據Excel文件XLSX2CSV.java類
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Created with IntelliJ IDEA.
* Description: 讀取文件數據
*/
public class XLSX2CSV {
/**
* Uses the XSSF Event SAX helpers to do most of the work
* of parsing the Sheet XML, and outputs the contents
* as a (basic) CSV.
*/
private class SheetToCSV implements SheetContentsHandler {
private boolean firstCellOfRow = false;
private int currentRow = -1;
private int currentCol = -1;
private void outputMissingRows(int number) {
for (int i = 0; i < number; i++) {
curStr = new ArrayList<String>();
for (int j = 0; j < minColumns; j++) {
curStr.add(null);
}
output.add(curStr);
}
}
@Override
public void startRow(int rowNum) {
curStr = new ArrayList<String>();
// If there were gaps, output the missing rows
outputMissingRows(rowNum - currentRow - 1);
// Prepare for this row
firstCellOfRow = true;
currentRow = rowNum;
currentCol = -1;
}
@Override
public void endRow(int rowNum) {
// Ensure the minimum number of columns
for (int i = currentCol; i < minColumns ; i++) {
curStr.add(null);
}
output.add(curStr);
}
@Override
public void cell(String cellReference, String formattedValue,
XSSFComment comment) {
// gracefully handle missing CellRef here in a similar way as XSSFCell does
if (cellReference == null) {
cellReference = new CellAddress(currentRow, currentCol).formatAsString();
}
// Did we miss any cells?
int thisCol = (new CellReference(cellReference)).getCol();
int missedCols = thisCol - currentCol - 1;
for (int i = 0; i < missedCols; i++) {
curStr.add(null);
}
currentCol = thisCol;
// Number or string?
try {
Double.parseDouble(formattedValue);
curStr.add(formattedValue);
} catch (NumberFormatException e) {
// output.append('"');
curStr.add(formattedValue);
// output.append('"');
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}
private final OPCPackage xlsxPackage;
/**
* Number of columns to read starting with leftmost
*/
private final int minColumns;
/**
* Destination for data
*/
private ArrayList<ArrayList<String>> output;
private ArrayList<String> curStr;
public ArrayList<ArrayList<String>> getOutput(){
return output;
}
/**
* Creates a new XLSX -> CSV converter
* @param pkg The XLSX package to process
* @param minColumns The minimum number of columns to output, or -1 for no minimum
*/
public XLSX2CSV(OPCPackage pkg, int minColumns) {
this.xlsxPackage = pkg;
this.minColumns = minColumns;
}
/**
* Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream)
throws ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(
styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
} catch (IOException io) {
io.printStackTrace();
}
}
/**
* Initiates the processing of the XLS workbook file to CSV.
*/
public ArrayList<ArrayList<String>> process(String sheetName) {
try{
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (iter.hasNext()) {
output = new ArrayList<ArrayList<String>> ();
InputStream stream = iter.next();
if(iter.getSheetName().equals(sheetName)){
processSheet(styles, strings, new SheetToCSV(), stream);
close(stream);
return output;
}
close(stream);
continue;
}
} catch (OpenXML4JException open) {
open.printStackTrace();
} catch (ParserConfigurationException parser) {
parser.printStackTrace();
} catch (SAXException sax) {
sax.printStackTrace();
} catch (IOException io) {
io.printStackTrace();
}
return null;
}
private void close(InputStream stream){
try {
stream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
File xlsxFile = new File("F:\\files\\test.xlsx");
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return;
}
// The package open is instantaneous, as it should be.
OPCPackage p = null;
try {
p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
} catch (InvalidFormatException e) {
e.printStackTrace();
}
String[] array = {"序號","名稱","年齡","性別","班級"};
XLSX2CSV xlsx2csv = new XLSX2CSV(p, array.length);
xlsx2csv.process("測試");
ArrayList<ArrayList<String>> data = xlsx2csv.getOutput();
if (!checkFormat(data.get(0),array)) {
System.out.println("文檔標題格式不正確!");
}
try {
p.close();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 校驗excel表頭是否符合規範
* @param list
* @param array
* @return
*/
private static boolean checkFormat(ArrayList<String> list,String[] array){
if (list == null || array == null) {
return false;
}
String[] data = list.toArray(new String[list.size()]);
for(int i = 0; i < array.length; i++){
if (!data[i].equals(array[i])) {
return false;
}
}
return true;
}
}