【utils】XLSXCovertCSVReader

/* ====================================================================
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==================================================================== */

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * 使用CVS模式解決XLSX文件,可以有效解決用戶模式內存溢出的問題
 * 該模式是POI官方推薦的讀取大數據的模式,在用戶模式下,數據量較大、Sheet較多、或者是有很多無用的空行的情況
 * ClassName: XLSXCovertCSVReader 
 * @author liweiyan
 * @date 2015年12月24日
 * @version V1.0
 */
public class XLSXCovertCSVReader {
    /**
     * 定義枚舉類excel數據類型
     * ClassName: xssfDataType 
     * @author 王茂
     * @date 2015年12月24日
     * @version V1.0
     */
    enum xssfDataType {
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }

    /**
     * xssf_sax_API處理Excel
     * ClassName: MyXSSFSheetHandler 
     * @author 王茂
     * @date 2015年12月24日
     * @version V1.0
     */
    class MyXSSFSheetHandler extends DefaultHandler {

        /**
         * Table with styles
         */
        private StylesTable stylesTable;

        /**
         * Table with unique strings
         */
        private ReadOnlySharedStringsTable sharedStringsTable;

        /**
         * Destination for data
         */
        private final PrintStream output;

        /**
         * Number of columns to read starting with leftmost
         */
        private final int minColumnCount;
        // Set when V start element is seen
        private boolean vIsOpen;
        // Set when cell start element is seen;
        // used when cell close element is seen.
        private xssfDataType nextDataType;
        // Used to format numeric cell values.
        private short formatIndex;
        private String formatString;
        private final DataFormatter formatter;
        private int thisColumn = -1;
        // The last column printed to the output stream
        private int lastColumnNumber = -1;
        // Gathers characters as they are seen.
        private StringBuffer value;
        private String[] record;
        private List<String[]> rows = new ArrayList<String[]>();
        private boolean isCellNull = false;

        /**
         * 構造器初始化自定義類型
         * <p>Description: </p>

         * @param styles
         * @param strings
         * @param cols
         * @param target
         */
        public MyXSSFSheetHandler(StylesTable styles,
                                  ReadOnlySharedStringsTable strings, int cols, PrintStream target) {
            this.stylesTable = styles;
            this.sharedStringsTable = strings;
            this.minColumnCount = cols;
            this.output = target;
            this.value = new StringBuffer();
            this.nextDataType = xssfDataType.NUMBER;
            this.formatter = new DataFormatter();
            record = new String[this.minColumnCount];
            rows.clear();// 每次讀取都清空行集合
        }

        /**
         * 讀取excel行
         */
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException {
            if ("inlineStr".equals(name) || "v".equals(name)) {
                vIsOpen = true;
                // Clear contents cache
                value.setLength(0);
            }
            // c => cell
            else if ("c".equals(name)) {
                // Get the cell reference
                String r = attributes.getValue("r");
                int firstDigit = -1;
                for (int c = 0; c < r.length(); ++c) {
                    if (Character.isDigit(r.charAt(c))) {
                        firstDigit = c;
                        break;
                    }
                }
                thisColumn = nameToColumn(r.substring(0, firstDigit));
                // Set up defaults.
                this.nextDataType = xssfDataType.NUMBER;
                this.formatIndex = -1;
                this.formatString = null;
                String cellType = attributes.getValue("t");
                String cellStyleStr = attributes.getValue("s");
                if ("b".equals(cellType))
                    nextDataType = xssfDataType.BOOL;
                else if ("e".equals(cellType))
                    nextDataType = xssfDataType.ERROR;
                else if ("inlineStr".equals(cellType))
                    nextDataType = xssfDataType.INLINESTR;
                else if ("s".equals(cellType))
                    nextDataType = xssfDataType.SSTINDEX;
                else if ("str".equals(cellType))
                    nextDataType = xssfDataType.FORMULA;
                else if (cellStyleStr != null) {
                    // It's a number, but almost certainly one
                    // with a special style or format
                    int styleIndex = Integer.parseInt(cellStyleStr);
                    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                    this.formatIndex = style.getDataFormat();
                    this.formatString = style.getDataFormatString();
                    if (this.formatString == null)
                        this.formatString = BuiltinFormats
                                .getBuiltinFormat(this.formatIndex);
                }
            }
        }

        /**
         * 行讀取完成後將數據存入數組
         */
        public void endElement(String uri, String localName, String name)
                throws SAXException {
            String thisStr = null;
            // v => contents of a cell
            if ("v".equals(name)) {
                // Process the value contents as required.
                // Do now, as characters() may be called more than once
                switch (nextDataType) {

                case BOOL:
                    char first = value.charAt(0);
                    thisStr = first == '0' ? "FALSE" : "TRUE";
                    break;
                case ERROR:
                    thisStr = "\"ERROR:" + value.toString() + '"';
                    break;
                case FORMULA:
                    // A formula could result in a string value,
                    // so always add double-quote characters.
                    thisStr = value.toString();
                    break;
                case INLINESTR:
                    // TODO: have seen an example of this, so it's untested.
                    XSSFRichTextString rtsi = new XSSFRichTextString(
                            value.toString());
                    thisStr = rtsi.toString();
                    break;
                case SSTINDEX:
                    String sstIndex = value.toString();
                    try {
                        int idx = Integer.parseInt(sstIndex);
                        XSSFRichTextString rtss = new XSSFRichTextString(
                                sharedStringsTable.getEntryAt(idx));
                        thisStr = rtss.toString();
                    } catch (NumberFormatException ex) {
                        output.println("Failed to parse SST index '" + sstIndex
                                + "': " + ex.toString());
                    }
                    break;
                case NUMBER:
                    String n = value.toString();
                    // 判斷是否是日期格式
                    if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {
                        Double d = Double.parseDouble(n);
                        Date date = HSSFDateUtil.getJavaDate(d);
                        thisStr = formateDateToString(date);
                    } else if (this.formatString != null)
                        thisStr = formatter.formatRawCellContents(
                                Double.parseDouble(n), this.formatIndex,
                                this.formatString);
                    else
                        thisStr = n;
                    break;
                default:
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                    break;
                }
                // Output after we've seen the string contents
                // Emit commas for any fields that were missing on this row
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
                // 判斷單元格的值是否爲空
                if (thisStr == null || "".equals(isCellNull)) {
                    isCellNull = true;// 設置單元格是否爲空值
                }
                record[thisColumn] = thisStr;
                // Update column
                if (thisColumn > -1)
                    lastColumnNumber = thisColumn;

            } else if ("row".equals(name)) {

                // Print out any missing commas if needed
                if (minColumns > 0) {
                    // Columns are 0 based
                    if (lastColumnNumber == -1) {
                        lastColumnNumber = 0;
                    }
                    if (isCellNull == false && record[0] != null
                            && record[1] != null)// 判斷是否空行
                    {
                        rows.add(record.clone());
                        isCellNull = false;
                        for (int i = 0; i < record.length; i++) {
                            record[i] = null;
                        }
                    }
                }
                lastColumnNumber = -1;
            }
        }
        public List<String[]> getRows() {
            return rows;
        }

        public void setRows(List<String[]> rows) {
            this.rows = rows;
        }
        /**
         * Captures characters only if a suitable element is open. Originally
         * was just "v"; extended for inlineStr also.
         */
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            if (vIsOpen)
                value.append(ch, start, length);
        }

        /**
         * Converts an Excel column name like "C" to a zero-based index.
         * 
         * @param name
         * @return Index corresponding to the specified name
         */
        private int nameToColumn(String name) {
            int column = -1;
            for (int i = 0; i < name.length(); ++i) {
                int c = name.charAt(i);
                column = (column + 1) * 26 + c - 'A';
            }
            return column;
        }

        private String formateDateToString(Date date) {
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");// 格式化日期
            return sdf.format(date);

        }

    }
    private OPCPackage xlsxPackage;
    private int minColumns;
    private PrintStream output;

    /**
     * Cew XLSX -> CSV converter
     * 
     * @param pkg
     *            The XLSX package to process
     * @param output
     *            The PrintStream to output the CSV to
     * @param minColumns
     *            The minimum number of columns to output, or -1 for no minimum
     */
    public XLSXCovertCSVReader(OPCPackage pkg, PrintStream output,
                               int minColumns) {
        this.xlsxPackage = pkg;
        this.output = output;
        this.minColumns = minColumns;
    }

    /**
     * Parses and shows the content of one sheet using the specified styles and
     * shared-strings tables.
     * 
     * @param styles
     * @param strings
     * @param sheetInputStream
     */
    public List<String[]> processSheet(StylesTable styles,
                                       ReadOnlySharedStringsTable strings, InputStream sheetInputStream)
            throws IOException, ParserConfigurationException, SAXException {

        InputSource sheetSource = new InputSource(sheetInputStream);
        SAXParserFactory saxFactory = SAXParserFactory.newInstance();
        SAXParser saxParser = saxFactory.newSAXParser();
        XMLReader sheetParser = saxParser.getXMLReader();
        MyXSSFSheetHandler handler = new MyXSSFSheetHandler(styles, strings,
                this.minColumns, this.output);
        sheetParser.setContentHandler(handler);
        sheetParser.parse(sheetSource);
        return handler.getRows();
    }

    /**
     * 
     * @Description: 讀取excel返回對應集合
     * @param @return
     * @param @throws IOException
     * @param @throws OpenXML4JException
     * @param @throws ParserConfigurationException
     * @param @throws SAXException   
     * @return List<String[]> 
     * @throws
     * @author 王茂
     * @date 2015年12月24日
     */
    public List<String[]> process() throws IOException, OpenXML4JException,
            ParserConfigurationException, SAXException {

        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
        List<String[]> list = null;
        StylesTable styles = xssfReader.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        int index = 0;
        // 獲取sheetIter迭代,並遍歷
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            // 判斷如果爲需要獲取的sheet則執行
            if (0 == index) {
                list = processSheet(styles, strings, stream);
                stream.close();
                ++index;
            }
        }
        return list;
    }

    /**
     * 
     * @Description: 讀取Excel
     * @param @param path   文件地址
     * @param @param minColumns excel行數
     * @param @return
     * @param @throws IOException
     * @param @throws OpenXML4JException
     * @param @throws ParserConfigurationException
     * @param @throws SAXException   
     * @return List<String[]> 
     * @throws
     * @author 王茂
     * @date 2015年12月24日
     */
    public static List<String[]> readerExcel(String path, int minColumns)
            throws IOException, OpenXML4JException,
            ParserConfigurationException, SAXException {
        OPCPackage p = OPCPackage.open(path, PackageAccess.READ);
        XLSXCovertCSVReader xlsx2csv = new XLSXCovertCSVReader(p, System.out,
                minColumns);
        List<String[]> list = xlsx2csv.process();
        p.close();
        return list;
    }

    public static List<String[]> readerExcel(InputStream inputStream, int minColumns) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException{
        OPCPackage p = OPCPackage.open(inputStream);
        XLSXCovertCSVReader xlsx2csv = new XLSXCovertCSVReader(p, System.out,
                minColumns);
        List<String[]> list = xlsx2csv.process();
        p.close();
        return list;
    }

    public static void main(String[] args) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        //String path  = "C:\\Users\\keith\\AppData\\Local\\VcreditChatData\\MessageFile\\推薦碼.xlsx";
        String path = "C:\\Users\\keith\\Desktop\\用戶.xlsx";
        File file = new File(path);
        InputStream inputStream = new FileInputStream(file);
        List<String[]> list = readerExcel(inputStream, 10);
        print(list);
    }

    public static void print(List<String[]> list) {
        if(list == null){
            return ;
        }
        int i = 0;
        for(String[] strings : list){
            StringBuilder stringBuilder = new StringBuilder();
            for(String string : strings){
                stringBuilder.append(string).append(",");
            }
            stringBuilder.append(i++);
            System.out.println(stringBuilder);
        }
    }
}
發佈了64 篇原創文章 · 獲贊 18 · 訪問量 5萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章