这里使用的是maven项目,在pom里面添加依赖
<!-- xls解析 -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.14</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.14</version>
</dependency>
程序代码:
package com.zttech.demo.excel;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.Test;
/**
* Java读取Excel内容
* @author OrangQcer
* @date 2018年3月20日
*/
public class ExcelOperate {
@Test
public void test() throws IOException {
String filePath = "E:\\项目相关\\kms\\安全职责\\test01.xls";
boolean isExcel2003 = true;
if (isExcel2007(filePath)) {
isExcel2003 = false;
}
File file = new File(filePath);
String[][] result = getData(file, 1, isExcel2003);
int rowLength = result.length;
for (int i = 0; i < rowLength; i++) {
for (int j = 0; j < result[i].length; j++) {
System.out.print(result[i][j] + "|");
}
System.out.println("\t\t");
}
}
/**
* 读取Excel的内容,第一维数组存储的是一行中格列的值,二维数组存储的是多少个行
* @param file 读取数据的源Excel
* @param ignoreRows 读取数据忽略的行数,比如行头不需要读入 忽略的行数为1
* @param isExcel2003 判断Excel的格式,true为xls,false为xlsx
* @return 读出的Excel中数据的内容
* @throws IOException
*/
private String[][] getData(File file, int ignoreRows, boolean isExcel2003) throws IOException {
List<String[]> result = new ArrayList<>();
int rowSize = 0;
BufferedInputStream in = new BufferedInputStream(new FileInputStream(file));
/** 打开Workbook */
Workbook wb = null;
if (isExcel2003) {
wb = new HSSFWorkbook(in);
}
else {
wb = new XSSFWorkbook(in);
}
Cell cell = null;
/** 循环所有Sheet */
for (int sheetIndex = 0; sheetIndex <wb.getNumberOfSheets(); sheetIndex++ ) {
Sheet st = wb.getSheetAt(sheetIndex);
/** 循环行,跳过标题 */
for (int rowIndex = ignoreRows; rowIndex <= st.getLastRowNum(); rowIndex++ ) {
Row row = st.getRow(rowIndex);
if (row == null) {
continue;
}
int tempRowSize = row.getLastCellNum();
if (tempRowSize > rowSize) {
rowSize = tempRowSize;
}
String[] values = new String[rowSize];
Arrays.fill(values, "");
boolean hasValue = false;
for (int columnIndex = 0; columnIndex < row.getLastCellNum(); columnIndex ++) {
String value = "";
cell = row.getCell(columnIndex);
if (cell != null) {
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_STRING:
value = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_NUMERIC:
if (HSSFDateUtil.isCellDateFormatted(cell)) {
Date date = cell.getDateCellValue();
if (date != null) {
value = new SimpleDateFormat("yyyy-MM-dd").format(date);
} else {
value = "";
}
} else {
value = new DecimalFormat("0").format(cell.getNumericCellValue());
}
break;
case HSSFCell.CELL_TYPE_FORMULA:
/** 入时如果为公式生成的数据则无值 */
if (!cell.getStringCellValue().equals("")) {
value = cell.getStringCellValue();
} else {
value = cell.getNumericCellValue() + "";
}
break;
case HSSFCell.CELL_TYPE_BLANK:
value = "";
break;
case HSSFCell.CELL_TYPE_ERROR:
value = "";
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
value = (cell.getBooleanCellValue() == true ? "Y" : "N");
break;
default:
value = "";
}
}
if (columnIndex == 0 && value.trim().equals("")) {
continue;
}
values[columnIndex] = value.trim();
hasValue = true;
}
if (hasValue) {
result.add(values);
}
}
}
in.close();
String[][] returnArray = new String[result.size()][rowSize];
for (int i = 0; i <returnArray.length; i++) {
returnArray[i] = result.get(i);
}
return returnArray;
}
public static boolean isExcel2003(String filePath) {
return filePath.matches("^.+\\.(?i)(xls)$");
}
public static boolean isExcel2007(String filePath) {
return filePath.matches("^.+\\.(?i)(xlsx)$");
}
}
这个是要读取的文件
下面是输出的值,可以看到标题被跳过了,4行也被省略。
A2|B2|C2|
|B3|C3|
A5||C5|
之前看了网上的代码有两点小问题:
第一,如果一行的第一格为空,那么一整行都不会被读取;
第二,每行的读取都会多出一格。
另外,它会先判断要读取的文件是xls还是xlsx,避免版本不同报错。