【java】poi事件模式驅動高效讀取excel

poi事件模式讀取excel很高效,讀取速度快,能讀文件大,佔用內存少,話不多說上代碼(用戶模式讀取也有保留)

package net.bi.util.base;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.commons.compress.utils.Lists;
import org.apache.log4j.Logger;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * excel讀寫工具類
 * 
 * @ClassName ExcelToolkit.java
 * @author herb 
 * @date 2019年05月30日
 */
public class ExcelToolkit {

    private static Logger logger = Logger.getLogger(ExcelToolkit.class);
    private final static String xls = "xls";
    private final static String xlsx = "xlsx";

    /**********************************************************************************
     * 
     * 
     * 							poi用戶模式讀取excel(代碼易讀,效率低,捨棄)
     * 
     * 
     * *********************************************************************************
     */
    /**
     * 讀入excel文件,解析後返回 支持多表 一個子list爲一個sheet
     *
     * @param file
     * @return
     * @throws IOException
     * @author herb
     * @date 2019年5月30日
     */
    public static List<List<String[]>> readExcel(File file) throws IOException {
        //檢查文件  
        checkFile(file);
        //獲得Workbook工作薄對象  
        Workbook workbook = getWorkBook(file);
        //創建返回對象,把sheet數據分別存儲到list,然後作爲一個集合返回
        List<List<String[]>> excelData = new ArrayList<List<String[]>>();
        if (workbook != null) {
            for (int sheetNum = 0; sheetNum < workbook.getNumberOfSheets(); sheetNum++) {
                //獲得當前sheet工作表  
                Sheet sheet = workbook.getSheetAt(sheetNum);
                //把每行中的值作爲一個數組,所有行作爲一個sheet集合數據
                List<String[]> sheetData = new ArrayList<String[]>();
                if (sheet == null) {
                    continue;
                }
                //獲得當前sheet的開始行  
                int firstRowNum = sheet.getFirstRowNum();
                //獲得當前sheet的結束行  
                int lastRowNum = sheet.getLastRowNum();
                //循環所有行  
                for (int rowNum = firstRowNum; rowNum <= lastRowNum; rowNum++) {
                    //獲得當前行  
                    Row row = sheet.getRow(rowNum);
                    if (row == null) {
                        continue;
                    }
                    //獲得當前行的開始列  
                    int firstCellNum = row.getFirstCellNum();
                    //獲得當前行的列數  
                    int lastCellNum = row.getPhysicalNumberOfCells();
                    String[] cells = new String[row.getPhysicalNumberOfCells()];
                    //循環當前行  
                    for (int cellNum = firstCellNum; cellNum < lastCellNum; cellNum++) {
                        Cell cell = row.getCell(cellNum);
                        cells[cellNum] = getCellValue(cell);
                    }
                    sheetData.add(cells);
                }
                //添加sheetData到excelData
                excelData.add(sheetData);
            }
            workbook.close();
        }
        return excelData;
    }

    private static void checkFile(File file) throws IOException {
        //判斷文件是否存在  
        if (null == file) {
            logger.error("文件不存在!");
            throw new FileNotFoundException("文件不存在!");
        }
        //獲得文件名  
        String fileName = file.getName();
        //判斷文件是否是excel文件  
        if (!fileName.endsWith(xls) && !fileName.endsWith(xlsx)) {
            logger.error(fileName + "不是excel文件");
            throw new IOException(fileName + "不是excel文件");
        }
    }

    private static Workbook getWorkBook(File file) {
        //獲得文件名  
        String fileName = file.getName();
        //創建Workbook工作薄對象,表示整個excel  
        Workbook workbook = null;
        try {
            //獲取excel文件的io流  
            InputStream is = new FileInputStream(file);
            //根據文件後綴名不同(xls和xlsx)獲得不同的Workbook實現類對象  
            if (fileName.endsWith(xls)) {
                //2003  
                workbook = new HSSFWorkbook(is);
            } else if (fileName.endsWith(xlsx)) {
                //2007  
                workbook = new XSSFWorkbook(is);
            }
        } catch (IOException e) {
            logger.info(e.getMessage());
        }
        return workbook;
    }

    private static String getCellValue(Cell cell) {
        String cellValue = "";
        if (cell == null) {
            return cellValue;
        }
        //把數字當成String來讀,避免出現1讀成1.0的情況(科學計數法會除外)  
        if (cell.getCellType() == CellType.NUMERIC) {
            long longVal = Math.round(cell.getNumericCellValue());
            Double doubleVal = cell.getNumericCellValue();
            if (Double.parseDouble(longVal + ".0") == doubleVal) {
                cell.setCellType(CellType.STRING);
            }
        }

        //判斷數據的類型  
        switch (cell.getCellType()) {
            case NUMERIC: //數字  
                cellValue = String.valueOf(cell.getNumericCellValue());
                break;
            case STRING: //字符串  
                cellValue = String.valueOf(cell.getStringCellValue());
                break;
            case BOOLEAN: //Boolean  
                cellValue = String.valueOf(cell.getBooleanCellValue());
                break;
            case FORMULA: //公式  
                cellValue = String.valueOf(cell.getCellFormula());
                break;
            case BLANK: //空值   
                cellValue = "";
                break;
            case ERROR: //故障  
                cellValue = "非法字符";
                break;
            default:
                cellValue = "未知類型";
                break;
        }
        return cellValue;
    }

    /**
     * 從字符獲取順序 主要是轉化excel的a-z
     *
     * @param chars
     * @return
     * @author herb
     * @date 2019年5月9日
     */
    public static int excelLine2Dec(String chars) {
        if (null == chars) {
            return -1;
        }
        if (NumberToolkit.isInteger(chars)) {
            return Integer.valueOf(chars) - 1;
        } else {
            int length = chars.length();
            chars = chars.toUpperCase();
            int lines = 0;
            try {
                for (int i = length - 1; i >= 0; i--) {
                    if (chars.charAt(i) < 'A' || chars.charAt(i) > 'Z') {
                        return -1;
                    }
                    lines += (int) (chars.charAt(i) - 'A' + 1) * Math.pow(26, length - i - 1);
                }
            } catch (Exception e) {
                return -1;
            }
            return lines - 1;
        }
    }
    
    /**********************************************************************************
     * 
     * 
     * 							poi事件模式讀取excel(效率高)
     * 
     * 
     * *********************************************************************************
     * @throws IOException 
     */
    
	public static List<List<String[]>> readExcel(String filename) throws IOException{
		//檢查文件  
        checkFile(new File(filename));
		OPCPackage pkg = null;
		List<List<String[]>> excelList = Lists.newArrayList();
		try {
			pkg = OPCPackage.open(filename, PackageAccess.READ);
			XSSFReader reader = new XSSFReader(pkg);
			SharedStringsTable sst = reader.getSharedStringsTable();
			StylesTable styles = reader.getStylesTable();  
			XMLReader parser = fetchSheetParser(sst, excelList,styles);
			/**
			 * 返回一個迭代器,此迭代器會依次得到所有不同的sheet。
			 * 每個sheet的InputStream只有從Iterator獲取時纔會打開。 解析完每個sheet時關閉InputStream。
			 */
			XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) reader.getSheetsData();
			while (sheets.hasNext()) {
				InputStream sheetstream = sheets.next();
				InputSource sheetSource = new InputSource(sheetstream);				
				try {
					// 解析sheet:
					// com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl:522
					parser.parse(sheetSource);
				} finally {
					sheetstream.close();
				}
			}
		} catch (InvalidOperationException  | IOException | OpenXML4JException | SAXException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				if(null != pkg)
					pkg.close();
				else{
					return null;
				}
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}
		return excelList;
	}

	private static XMLReader fetchSheetParser(SharedStringsTable sst, List<List<String[]>> excelList,StylesTable styles) throws SAXException {
		XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
		ContentHandler handler = new SheetHandler(sst, excelList,styles);
		parser.setContentHandler(handler);
		return parser;
	}
	
	/** 
     * The type of the data value is indicated by an attribute on the cell. The 
     * value is usually in a "v" element within the cell. 
     */  
    enum xssfDataType {  
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,  
    }
    
	private static class SheetHandler extends DefaultHandler {

		private SharedStringsTable sst;
		// Set when V start element is seen  
        private boolean vIsOpen;  
  
        // Set when cell start element is seen;  
        // used when cell close element is seen.  
        private xssfDataType nextDataType;  
        // Gathers characters as they are seen.  
        private StringBuffer value; 
        // Used to format numeric cell values.  
        private short formatIndex;  
        private String formatString;  
        private final DataFormatter formatter;  
  
        private int thisColumn = -1;  
        // The last column printed to the output stream  
        private int lastColumnNumber = -1;  
        /** 
         * Table with styles 
         */  
        private StylesTable stylesTable; 
        
        //private boolean isCellNull = false; 

		private int sheetIndex = -1;
		private List<String> rowList = Lists.newArrayList();
		private List<String[]> sheetList = Lists.newArrayList();
		private List<List<String[]>> excelList = Lists.newArrayList();

		private SheetHandler(SharedStringsTable sst, List<List<String[]>> excelList,StylesTable styles) {
			this.stylesTable = styles; 
			this.sst = sst;
			this.excelList = excelList;
	        this.value = new StringBuffer();  
	        this.nextDataType = xssfDataType.NUMBER;  
	        this.formatter = new DataFormatter(); 
			sheetList.clear();
		}

		@Override
		public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
			if ("inlineStr".equals(name) || "v".equals(name)) {  
                vIsOpen = true;  
                // Clear contents cache  
                value.setLength(0);  
            }  
            // c => cell  
            else if ("c".equals(name)) {  
                // Get the cell reference  
                String r = attributes.getValue("r");  
                int firstDigit = -1;  
                for (int c = 0; c < r.length(); ++c) {  
                    if (Character.isDigit(r.charAt(c))) {  
                        firstDigit = c;  
                        break;  
                    }  
                }  
                thisColumn = nameToColumn(r.substring(0, firstDigit));  
  
                // Set up defaults.  
                this.nextDataType = xssfDataType.NUMBER;  
                this.formatIndex = -1;  
                this.formatString = null;  
                String cellType = attributes.getValue("t");  
                String cellStyleStr = attributes.getValue("s"); 
                //String isEmpty = attributes.getValue("r");
                if ("b".equals(cellType))  
                    nextDataType = xssfDataType.BOOL;  
                else if ("e".equals(cellType))  
                    nextDataType = xssfDataType.ERROR;  
                else if ("inlineStr".equals(cellType))  
                    nextDataType = xssfDataType.INLINESTR;  
                else if ("s".equals(cellType))  
                    nextDataType = xssfDataType.SSTINDEX;  
                else if ("str".equals(cellType))  
                    nextDataType = xssfDataType.FORMULA;  
                else if (cellStyleStr != null) {  
                    // It's a number, but almost certainly one  
                    // with a special style or format  
                    int styleIndex = Integer.parseInt(cellStyleStr);  
                    XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);  
                    this.formatIndex = style.getDataFormat();  
                    this.formatString = style.getDataFormatString();  
                    if (this.formatString == null)  
                        this.formatString = BuiltinFormats  
                                .getBuiltinFormat(this.formatIndex);  
                }  
            }

		}

		/**
		 * 返回單元格的值
		 */
		@Override
		public void characters(char[] ch, int start, int length) throws SAXException {
			  if (vIsOpen)  
	                value.append(ch, start, length); 
		}

		/**
		 * 解析到XML的結束標籤觸發此方法 如:</row>
		 * 
		 * @param uri
		 * @param localName
		 * @param name
		 */
		@Override
		public void endElement(String uri, String localName, String name) throws SAXException {
			String thisStr = null;  
			  
            // v => contents of a cell  
            if ("v".equals(name)) {  
                // Process the value contents as required.  
                // Do now, as characters() may be called more than once  
                switch (nextDataType) {  
  
                case BOOL:  
                    char first = value.charAt(0);  
                    thisStr = first == '0' ? "FALSE" : "TRUE";  
                    break;  
  
                case ERROR:  
                    thisStr = "\"ERROR:" + value.toString() + '"';  
                    break;  
  
                case FORMULA:  
                    // A formula could result in a string value,  
                    // so always add double-quote characters.  
                    //thisStr = '"' + value.toString() + '"';  
                    thisStr =value.toString();  
                    break;  
  
                case INLINESTR:  
                    // TODO: have seen an example of this, so it's untested.  
                    XSSFRichTextString rtsi = new XSSFRichTextString(  
                            value.toString());  
                   // thisStr = '"' + rtsi.toString() + '"';  
                    thisStr = rtsi.toString() ;  
                    break;  
  
                case SSTINDEX:  
                    String sstIndex = value.toString();  
                    try {  
                        int idx = Integer.parseInt(sstIndex);  
                        XSSFRichTextString rtss = new XSSFRichTextString(  
                        		sst.getEntryAt(idx));  
                        //thisStr = '"' + rtss.toString() + '"';  
                        thisStr =rtss.toString();  
                    } catch (NumberFormatException ex) {  
                       
                    }  
                    break;  
  
                case NUMBER:  
                    String n = value.toString();  
                    // 判斷是否是日期格式  
                    if (HSSFDateUtil.isADateFormat(this.formatIndex, n)) {  
                        Double d = Double.parseDouble(n);  
                        Date date=HSSFDateUtil.getJavaDate(d);  
                        thisStr=formateDateToString(date);  
                    } else if (this.formatString != null)  
                        thisStr = formatter.formatRawCellContents(  
                                Double.parseDouble(n), this.formatIndex,  
                                this.formatString);  
                    else  
                        thisStr = n;  
                    break;  
  
                default:  
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";  
                    break;  
                }
                // Output after we've seen the string contents  
                // Emit commas for any fields that were missing on this row  
                /*if (lastColumnNumber == -1){  
                    lastColumnNumber = 0;  
                } */ 
                //處理空單元格
                for (int i = lastColumnNumber; i < thisColumn - 1; ++i) 
                { 
            	   rowList.add("");//每加一個單元格的值到List中                   
                }
                // Update column  
               if (thisColumn > -1){
                    lastColumnNumber = thisColumn;
                }                 
                rowList.add(thisStr);              
            } else if ("row".equals(name)) {  
            	//
            	lastColumnNumber = -1;  
				optRow(sheetIndex, rowList);
				rowList.clear();
            }else if ("worksheet".equals(name)) {// Sheet讀取完成
				//捨棄空表
            	if(sheetList.size() >0)
				excelList.add(sheetList);
				sheetList = Lists.newArrayList();
			} 
			
		}

		/**
		 * 該方法自動被調用,每讀一行調用一次,在方法中寫自己的業務邏輯即可
		 * 
		 * @param sheetIndex
		 *            工作簿序號
		 * @param curRow
		 *            處理到第幾行
		 * @param rowList
		 *            當前數據行的數據集合
		 */
		public void optRow(int sheetIndex, List<String> rowList) {
			//空行暫不處理
			sheetList.add(rowList.toArray(new String[rowList.size()]));
		}
		
		 /** 
         * Converts an Excel column name like "C" to a zero-based index. 
         *  
         * @param name 
         * @return Index corresponding to the specified name 
         */  
        private int nameToColumn(String name) {  
            int column = -1;  
            for (int i = 0; i < name.length(); ++i) {  
                int c = name.charAt(i);  
                column = (column + 1) * 26 + c - 'A';  
            }  
            return column;  
        } 
        
        private String formateDateToString(Date date) {  
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//格式化日期  
            return sdf.format(date);  
        } 
 
    }
	

    public static void main(String[] args) throws Exception {
		System.out.println("start read");
		ExcelToolkit example = new ExcelToolkit();
		example.readExcel("D:/6家老店上線資料9.16.xlsx");

	}

}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章