1.前言
項目中要求讀取excel文件內容,並將其轉化爲xml格式。常見讀取excel文檔一般使用POI和JExcelAPI這兩個工具。這裏我們介紹使用POI實現讀取excel文檔。
2.代碼實例:
package edu.sjtu.erplab.poi;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* 操作Excel表格的功能類
*/
public class ExcelReader {
private POIFSFileSystem fs;
private HSSFWorkbook wb;
private HSSFSheet sheet;
private HSSFRow row;
/**
* 讀取Excel表格表頭的內容
* @param InputStream
* @return String 表頭內容的數組
*/
public String[] readExcelTitle(InputStream is) {
try {
fs = new POIFSFileSystem(is);
wb = new HSSFWorkbook(fs);
} catch (IOException e) {
e.printStackTrace();
}
sheet = wb.getSheetAt(0);
row = sheet.getRow(0);
// 標題總列數
int colNum = row.getPhysicalNumberOfCells();
System.out.println("colNum:" + colNum);
String[] title = new String[colNum];
for (int i = 0; i < colNum; i++) {
//title[i] = getStringCellValue(row.getCell((short) i));
title[i] = getCellFormatValue(row.getCell((short) i));
}
return title;
}
/**
* 讀取Excel數據內容
* @param InputStream
* @return Map 包含單元格數據內容的Map對象
*/
public Map<Integer, String> readExcelContent(InputStream is) {
Map<Integer, String> content = new HashMap<Integer, String>();
String str = "";
try {
fs = new POIFSFileSystem(is);
wb = new HSSFWorkbook(fs);
} catch (IOException e) {
e.printStackTrace();
}
sheet = wb.getSheetAt(0);
// 得到總行數
int rowNum = sheet.getLastRowNum();
row = sheet.getRow(0);
int colNum = row.getPhysicalNumberOfCells();
// 正文內容應該從第二行開始,第一行爲表頭的標題
for (int i = 1; i <= rowNum; i++) {
row = sheet.getRow(i);
int j = 0;
while (j < colNum) {
// 每個單元格的數據內容用"-"分割開,以後需要時用String類的replace()方法還原數據
// 也可以將每個單元格的數據設置到一個javabean的屬性中,此時需要新建一個javabean
// str += getStringCellValue(row.getCell((short) j)).trim() +
// "-";
str += getCellFormatValue(row.getCell((short) j)).trim() + " ";
j++;
}
content.put(i, str);
str = "";
}
return content;
}
/**
* 獲取單元格數據內容爲字符串類型的數據
*
* @param cell Excel單元格
* @return String 單元格數據內容
*/
private String getStringCellValue(HSSFCell cell) {
String strCell = "";
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_STRING:
strCell = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_NUMERIC:
strCell = String.valueOf(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
strCell = String.valueOf(cell.getBooleanCellValue());
break;
case HSSFCell.CELL_TYPE_BLANK:
strCell = "";
break;
default:
strCell = "";
break;
}
if (strCell.equals("") || strCell == null) {
return "";
}
if (cell == null) {
return "";
}
return strCell;
}
/**
* 獲取單元格數據內容爲日期類型的數據
*
* @param cell
* Excel單元格
* @return String 單元格數據內容
*/
private String getDateCellValue(HSSFCell cell) {
String result = "";
try {
int cellType = cell.getCellType();
if (cellType == HSSFCell.CELL_TYPE_NUMERIC) {
Date date = cell.getDateCellValue();
result = (date.getYear() + 1900) + "-" + (date.getMonth() + 1)
+ "-" + date.getDate();
} else if (cellType == HSSFCell.CELL_TYPE_STRING) {
String date = getStringCellValue(cell);
result = date.replaceAll("[年月]", "-").replace("日", "").trim();
} else if (cellType == HSSFCell.CELL_TYPE_BLANK) {
result = "";
}
} catch (Exception e) {
System.out.println("日期格式不正確!");
e.printStackTrace();
}
return result;
}
/**
* 根據HSSFCell類型設置數據
* @param cell
* @return
*/
private String getCellFormatValue(HSSFCell cell) {
String cellvalue = "";
if (cell != null) {
// 判斷當前Cell的Type
switch (cell.getCellType()) {
// 如果當前Cell的Type爲NUMERIC
case HSSFCell.CELL_TYPE_NUMERIC:
case HSSFCell.CELL_TYPE_FORMULA: {
// 判斷當前的cell是否爲Date
if (HSSFDateUtil.isCellDateFormatted(cell)) {
// 如果是Date類型則,轉化爲Data格式
//方法1:這樣子的data格式是帶時分秒的:2011-10-12 0:00:00
//cellvalue = cell.getDateCellValue().toLocaleString();
//方法2:這樣子的data格式是不帶帶時分秒的:2011-10-12
Date date = cell.getDateCellValue();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
cellvalue = sdf.format(date);
}
// 如果是純數字
else {
// 取得當前Cell的數值
cellvalue = String.valueOf(cell.getNumericCellValue());
}
break;
}
// 如果當前Cell的Type爲STRIN
case HSSFCell.CELL_TYPE_STRING:
// 取得當前的Cell字符串
cellvalue = cell.getRichStringCellValue().getString();
break;
// 默認的Cell值
default:
cellvalue = " ";
}
} else {
cellvalue = "";
}
return cellvalue;
}
public static void main(String[] args) {
try {
// 對讀取Excel表格標題測試
InputStream is = new FileInputStream("d:\\test2.xls");
ExcelReader excelReader = new ExcelReader();
String[] title = excelReader.readExcelTitle(is);
System.out.println("獲得Excel表格的標題:");
for (String s : title) {
System.out.print(s + " ");
}
// 對讀取Excel表格內容測試
InputStream is2 = new FileInputStream("d:\\test2.xls");
Map<Integer, String> map = excelReader.readExcelContent(is2);
System.out.println("獲得Excel表格的內容:");
for (int i = 1; i <= map.size(); i++) {
System.out.println(map.get(i));
}
} catch (FileNotFoundException e) {
System.out.println("未找到指定路徑的文件!");
e.printStackTrace();
}
}
}
3.總結
因爲excel單元格中的內容往往都有一定的格式,比如日期型,數字型,字符串型,因此在讀取的時候要進行格式判斷,不然會出現錯誤。常見的就是不能正常讀取日期。在代碼實例中有一個方法:
getCellFormatValue(HSSFCell cell)
往這個方法中傳入excel單元格就能識別單元格格式,並轉化爲正確的格式。
ps:2012-2-23
代碼實例中有一段代碼:
int colNum = row.getPhysicalNumberOfCells();
其中的HSSFRow.getPhysicalNumberOfCells();這個方法是用於獲取一行中存在的單元格數,POI的官方API中有給出getPhysicalNumberOfCells方法的解釋
getPhysicalNumberOfCells
public int getPhysicalNumberOfCells()
- gets the number of defined cells (NOT number of cells in the actual row!). That is to say if only columns 0,4,5 have values then there would be 3.
-
- Specified by:
getPhysicalNumberOfCells
in interfaceRow
-
- Returns:
- int representing the number of defined cells in the row.