Java使用poi讀取excel數據轉換成csv再讀取

一、首先配置相關maven包

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi</artifactId>
			<version>4.1.0</version>
		</dependency>

		<dependency>
			<groupId>org.apache.poi</groupId>
			<artifactId>poi-ooxml</artifactId>
			<version>4.1.0</version>
		</dependency>

二、解析處理大數據Excel文件XLSX2CSV.java類

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
 * Created with IntelliJ IDEA.
 * Description: 讀取文件數據
 */
public class XLSX2CSV {

    /**
     * Uses the XSSF Event SAX helpers to do most of the work
     * of parsing the Sheet XML, and outputs the contents
     * as a (basic) CSV.
     */
    private class SheetToCSV implements SheetContentsHandler {
        private boolean firstCellOfRow = false;
        private int currentRow = -1;
        private int currentCol = -1;

        private void outputMissingRows(int number) {
            for (int i = 0; i < number; i++) {
                curStr = new ArrayList<String>();
                for (int j = 0; j < minColumns; j++) {
                    curStr.add(null);
                }
                output.add(curStr);
            }
        }

        @Override
        public void startRow(int rowNum) {
            curStr = new ArrayList<String>();
            // If there were gaps, output the missing rows
            outputMissingRows(rowNum - currentRow - 1);
            // Prepare for this row
            firstCellOfRow = true;
            currentRow = rowNum;
            currentCol = -1;
        }

        @Override
        public void endRow(int rowNum) {
            // Ensure the minimum number of columns
            for (int i = currentCol; i < minColumns ; i++) {
                curStr.add(null);
            }
            output.add(curStr);
        }

        @Override
        public void cell(String cellReference, String formattedValue,
                         XSSFComment comment) {
            // gracefully handle missing CellRef here in a similar way as XSSFCell does
            if (cellReference == null) {
                cellReference = new CellAddress(currentRow, currentCol).formatAsString();
            }
            // Did we miss any cells?
            int thisCol = (new CellReference(cellReference)).getCol();
            int missedCols = thisCol - currentCol - 1;
            for (int i = 0; i < missedCols; i++) {
                curStr.add(null);
            }
            currentCol = thisCol;
            // Number or string?
            try {
                Double.parseDouble(formattedValue);
                curStr.add(formattedValue);
            } catch (NumberFormatException e) {
                // output.append('"');
                curStr.add(formattedValue);
                // output.append('"');
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            // Skip, no headers or footers in CSV
        }
    }

    private final OPCPackage xlsxPackage;

    /**
     * Number of columns to read starting with leftmost
     */
    private final int minColumns;

    /**
     * Destination for data
     */
    private ArrayList<ArrayList<String>> output;
    private ArrayList<String> curStr;

    public ArrayList<ArrayList<String>> getOutput(){
        return output;
    }

    /**
     * Creates a new XLSX -> CSV converter
     *  @param pkg        The XLSX package to process
     * @param minColumns The minimum number of columns to output, or -1 for no minimum
     */
    public XLSX2CSV(OPCPackage pkg, int minColumns) {
        this.xlsxPackage = pkg;
        this.minColumns = minColumns;
    }

    /**
     * Parses and shows the content of one sheet
     * using the specified styles and shared-strings tables.
     *
     * @param styles
     * @param strings
     * @param sheetInputStream
     */
    public void processSheet(
            StylesTable styles,
            ReadOnlySharedStringsTable strings,
            SheetContentsHandler sheetHandler,
            InputStream sheetInputStream)
            throws ParserConfigurationException, SAXException {
        DataFormatter formatter = new DataFormatter();
        InputSource sheetSource = new InputSource(sheetInputStream);
        try {
            XMLReader sheetParser = SAXHelper.newXMLReader();
            ContentHandler handler = new XSSFSheetXMLHandler(
                    styles, null, strings, sheetHandler, formatter, false);
            sheetParser.setContentHandler(handler);
            sheetParser.parse(sheetSource);
        } catch (ParserConfigurationException e) {
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
        } catch (IOException io) {
            io.printStackTrace();
        }
    }

    /**
     * Initiates the processing of the XLS workbook file to CSV.
     */
    public ArrayList<ArrayList<String>> process(String sheetName) {
        try{
            ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
            XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
            StylesTable styles = xssfReader.getStylesTable();
            XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
            while (iter.hasNext()) {
                output = new ArrayList<ArrayList<String>> ();
                InputStream stream = iter.next();
                if(iter.getSheetName().equals(sheetName)){
                    processSheet(styles, strings, new SheetToCSV(), stream);
                    close(stream);
                    return output;
                }
                close(stream);
                continue;
            }
        } catch (OpenXML4JException open) {
            open.printStackTrace();
        } catch (ParserConfigurationException parser) {
            parser.printStackTrace();
        } catch (SAXException sax) {
            sax.printStackTrace();
        } catch (IOException io) {
            io.printStackTrace();
        }
        return null;
    }

    private void close(InputStream stream){
        try {
            stream.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    public static void main(String[] args) {
        File xlsxFile = new File("F:\\files\\test.xlsx");
        if (!xlsxFile.exists()) {
            System.err.println("Not found or not a file: " + xlsxFile.getPath());
            return;
        }
        // The package open is instantaneous, as it should be.
        OPCPackage p = null;
        try {
            p = OPCPackage.open(xlsxFile.getPath(), PackageAccess.READ);
        } catch (InvalidFormatException e) {
            e.printStackTrace();
        }
        String[] array = {"序號","名稱","年齡","性別","班級"};
        XLSX2CSV xlsx2csv = new XLSX2CSV(p, array.length);
        xlsx2csv.process("測試");
        ArrayList<ArrayList<String>> data = xlsx2csv.getOutput();
        if (!checkFormat(data.get(0),array)) {
            System.out.println("文檔標題格式不正確!");
        }
        try {
            p.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 校驗excel表頭是否符合規範
     * @param list
     * @param array
     * @return
     */
    private static boolean checkFormat(ArrayList<String> list,String[] array){
        if (list == null || array == null) {
            return false;
        }
        String[] data = list.toArray(new String[list.size()]);
        for(int i = 0; i < array.length; i++){
            if (!data[i].equals(array[i])) {
                return false;
            }
        }
        return true;
    }

}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章