【第02篇】利用POI框架的SAX方式之讀取大數據2007版Excel(xlsx)【第2版】

【第1版】地址

https://blog.csdn.net/rainyspring4540/article/details/50747122

針對老早寫的POI處理Excel的大數據讀取問題,看到好多人關注,感覺自己還是更新一版,畢竟雖然是自己備份,但是如果新手能少走彎路,也算欣慰。下面的版本是我的項目迭代過程中個人認爲畢竟穩定和健壯的,算作【第2版】吧,裏面修復了【第1版】的很多bug,諸如計算前後單元格差值的函數getLevel,以及識別新行的正則上也略有優化,最後希望,這段代碼能爲你爭取更多的學習時間,而不是懶惰的藉口。

package com.fulong.utils.poi;

import java.io.File;

/* ====================================================================
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
   this work for additional information regarding copyright ownership.
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
==================================================================== */

import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import com.fulong.utils.report.tool.MathUtil;

/**
 * XSSF and SAX (Event API) basic example. See {@link XLSX2CSV} for a fuller
 * example of doing XSLX processing with the XSSF Event code.
 * 目前函數又個缺陷,便是starElement函數中發現新行的策略有問題
 */
public class MyExcel2007ForPaging_high {

	/**
	 * 代表Excel中必須有值的起始列(A、B、C....AA、AB...)
	 */
	private static final String indexC4Data = "A";
	
	/**
	 * 存儲所有行的值
	 */
	public List<List<IndexValue>> dataList = new ArrayList<List<IndexValue>>();
	/*
	 * 臨時存儲當前行的值
	 */
	private List<IndexValue> rowData;
	
	private final int startRow;
	private final int endRow;
	private int currentRow = 0;
	
	private final String filename;
	

	public MyExcel2007ForPaging_high(String filename, int startRow, int endRow) throws Exception {
		if (StringUtils.isBlank(filename))
			throw new Exception("文件名不能空");
		this.filename = filename;
		this.startRow = startRow;
		this.endRow = endRow;
		processFirstSheet();
	}

	/**
	 * 指定獲取第一個sheet
	 * 
	 * @param filename
	 * @throws Exception
	 */
	private void processFirstSheet() throws Exception {
		OPCPackage pkg = OPCPackage.open(filename);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		// To look up the Sheet Name / Sheet Order / rID,
		// you need to process the core Workbook stream.
		// Normally it's of the form rId# or rSheet#
		InputStream sheet1 = r.getSheet("rId1");
		InputSource sheetSource = new InputSource(sheet1);
		parser.parse(sheetSource);
		sheet1.close();
		pkg.close();
	}

	private XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
		XMLReader parser = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser");
		ContentHandler handler = new PagingHandler(sst);
		parser.setContentHandler(handler);
		return parser;
	}

	/**
	 * See org.xml.sax.helpers.DefaultHandler javadocs
	 */
	private class PagingHandler extends DefaultHandler {
		private SharedStringsTable sst;
		private String lastContents;
		private boolean nextIsString;
		private String index = null;

		private PagingHandler(SharedStringsTable sst) {
			this.sst = sst;
		}

		/**
		 * 每個單元格開始時的處理
		 */
		@Override
		public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
			// c => cell
			if (name.equals("c")) {
				// Print the cell reference
				// System.out.print(attributes.getValue("r") + " - ");

				index = attributes.getValue("r");
				System.out.println(index);
				if (index.contains("N")) {
					System.out.println("##" + attributes + "##");
				}

				// 這是一個新行
				if (Pattern.compile("^"+indexC4Data+"[0-9]+$").matcher(index).find()) {

					// 存儲上一行數據
					if (rowData != null && isAccess() && !rowData.isEmpty()) {
						dataList.add(rowData);
					}
					rowData = new ArrayList<IndexValue>();
					;// 新行要先清除上一行的數據
					currentRow++;// 當前行+1
					// System.out.println(currentRow);
				}
				if (isAccess()) {
					// Figure out if the value is an index in the SST
					String cellType = attributes.getValue("t");
					if (cellType != null && cellType.equals("s")) {
						nextIsString = true;
					} else {
						nextIsString = false;
					}
				}

			}
			// Clear contents cache
			lastContents = "";
		}

		/**
		 * 每個單元格結束時的處理
		 */
		@Override
		public void endElement(String uri, String localName, String name) throws SAXException {
			if (isAccess()) {
				// Process the last contents as required.
				// Do now, as characters() may be called more than once
				if (nextIsString) {
					int idx = Integer.parseInt(lastContents);
					lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
					nextIsString = false;
				}

				// v => contents of a cell
				// Output after we've seen the string contents
				if (name.equals("v")) {
					// System.out.println(lastContents);

					rowData.add(new IndexValue(index, lastContents));

				}
			}

		}

		/**
		 * 目前流的方式值支持  Excel單元格是文本  格式;日期、數字、公式不支持
		 */
		@Override
		public void characters(char[] ch, int start, int length) throws SAXException {
			if (isAccess()) {
				lastContents += new String(ch, start, length);
			}

		}

		/**
		 * 如果文檔結束後,發現讀取的末尾行正處在當前行中,存儲下這行
		 * (存在這樣一種情況,當待讀取的末尾行正好是文檔最後一行時,最後一行無法存到集合中,
		 * 因爲最後一行沒有下一行了,所以不爲啓動starElement()方法, 當然我們可以通過指定最大列來處理,但不想那麼做,擴展性不好)
		 */
		@Override
		public void endDocument() throws SAXException {
			if (rowData != null && isAccess() && !rowData.isEmpty()) {
				dataList.add(rowData);
				System.out.println("--end");
			}

		}

	}

	private boolean isAccess() {
		if (currentRow >= startRow && currentRow <= endRow) {
			return true;
		}
		return false;
	}

	private class IndexValue {
		String v_index;
		String v_value;

		public IndexValue(String v_index, String v_value) {
			super();
			this.v_index = v_index;
			this.v_value = v_value;
		}

		@Override
		public String toString() {
			return "IndexValue [v_index=" + v_index + ", v_value=" + v_value + "]";
		}

		/**
		 * 去掉數字部分(行信息),直接比較英文部分(列信息),計算前後兩個值相距多少空列
		 * @param p
		 * @return
		 */
		public int getLevel(IndexValue p) {
			
			/*char[] other = p.v_index.replaceAll("[0-9]", "").toCharArray();
			char[] self = this.v_index.replaceAll("[0-9]", "").toCharArray();
			if (other.length != self.length)
				return -1;
			for (int i = 0; i < other.length; i++) {
				if (i == other.length - 1) {
					return self[i] - other[i];
				} else {
					if (self[i] != other[i]) {
						return -1;
					}
				}

			}
			return -1;*/
			
			String other = p.v_index.replaceAll("[0-9]", "");
			String self = this.v_index.replaceAll("[0-9]", "");
			return MathUtil.fromNumberSystem26(self)-MathUtil.fromNumberSystem26(other);

		}
	}

	/**
	 * 獲取真實的數據(處理空格)
	 * 
	 * @return
	 * @throws Exception
	 */
	public List<List<String>> getMyDataList() throws Exception {

		List<List<String>> myDataList = new ArrayList<List<String>>();
		if (dataList == null || dataList.size() <= 0)
			return myDataList;
		/*
		 * 是否是最後一行的數據
		 */
		boolean islastRow = false;
		for (int i = 0; i < dataList.size(); i++) {
			List<IndexValue> i_list = dataList.get(i);
			List<String> row = new ArrayList<String>();
			int j = 0;
			
			for (; j < i_list.size() - 1; j++) {
				// 獲取當前值,並存儲
				IndexValue current = i_list.get(j);
				//去掉空格
				String tempV = current.v_value!=null?current.v_value.trim():current.v_value;
				row.add(tempV);
				// 預存下一個
				IndexValue next = i_list.get(j + 1);
				// 獲取差值
				int level = next.getLevel(current);
				/*if(i==2214){
					System.out.println("--"+i);
				}*/
				if (level <= 0){
					System.err.println("---!!!到達最後一行,行號:"+(i+1)+";level:"+level+"[超出處理範圍]");
					islastRow = true;
					break;
				}
				//將差值補充爲null,
				for (int k = 0; k < level - 1; k++) {
					row.add(null);
				}
			}
			/*
			 * 每行的最後一個值,留在最後插入
			 * 但最後一行除外
			 */
			if(!islastRow){
				row.add(i_list.get(j).v_value);
			}
			myDataList.add(row);

		}
		return myDataList;
	}
	
	public static void main(String[] args) throws Exception {
		File file = new File("e:/a.xlsx");
		System.out.println(new MyExcel2007ForPaging_high(file.getPath(), 1, 50).getMyDataList());
	}
}

輔助類

package com.fulong.utils.report.tool;

import org.apache.commons.lang.StringUtils;

public class MathUtil {
	
	/**
	 * /// <summary>
	/// 將指定的自然數轉換爲26進製表示。映射關係:[1-26] ->[A-Z]。
	/// </summary>
	/// <param name="n">自然數(如果無效,則返回空字符串)。</param>
	/// <returns>26進製表示。</returns>
	
	 */
	public static String toNumberSystem26(int n){
		String s = "";
	    while (n > 0){
	        int m = n % 26;
	        if (m == 0) m = 26;
	        s = (char)(m + 64) + s;
	        n = (n - m) / 26;
	    }
	    return s;
	} 

	/**
	 * <summary>
	 將指定的26進製表示轉換爲自然數。映射關係:[A-Z] ->[1-26]。
	 </summary>
	 <param name="s">26進製表示(如果無效,則返回0)。</param>
	 <returns>自然數。</returns>
	 */
	public static int fromNumberSystem26(String s){
	    if (StringUtils.isBlank(s)) return 0; 
	    s = s.toUpperCase();
	    int n = 0;
	    char[] arr = s.toCharArray();
	    for (int i = arr.length - 1, j = 1; i >= 0; i--, j *= 26){
	        char c = arr[i];
	        if (c < 'A' || c > 'Z') return 0;
	        //A的ASCII值爲65
	        n += ((int)c - 64) * j;
	    }
	    return n;
	}


    public static void main(String[] args) {
		System.out.println(fromNumberSystem26("aa"));
		System.out.println(toNumberSystem26(27));
	}
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章