一、POI對Word處理
1、讀取Word
1、讀取Excel
3、下載地址:http://www.apache.org/dyn/closer.cgi/poi/dev/
代碼實現:
package com.qianyan.test;
import java.io.File;
import java.io.FileInputStream;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFChart.HSSFSeries;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.junit.Ignore;
import org.junit.Test;
public class TestPOI {
@Test
// @Ignore
public void testRead1(){
File file=new File("e:/test.doc");
try {
FileInputStream fis=new FileInputStream(file);
WordExtractor wordExtractor=new WordExtractor(fis);
System.out.println(wordExtractor.getText());
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
* 分段讀操作
*/
@Test
@Ignore
public void testPara(){
File file=new File("e:/test.doc");
try {
FileInputStream fis=new FileInputStream(file);
WordExtractor wordExtractor=new WordExtractor(fis);
String [] paras=wordExtractor.getParagraphText();
for(int i=0;i<paras.length;i++){
System.out.print("第"+i+"段-->");
System.out.println(paras[i]);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 一個Excel文件的層次:Excel文件-->工作表-->行-->單元格 對應到POI中,爲:wordbook->sheet->cess
*/
/**
* 讀取excel,遍歷各個小格獲取其中信息
* <p/>
* <p/>
* 注意:1.sheet,以0開始,以workbook.getNumberOfSheets()-1結束
* 2.row,以0開始(getFirstRowNum),以getLastRowNum結束
* 3.cell,*以0開始(getFirstCellNum),以getLastCellNum結束
*/
@Test
@Ignore
public void testReadExcel() throws Exception {
//創建對Excel工作簿文件的引用
String fileToBeReade = "e:/test.xls";
HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(fileToBeReade));
System.out.println("===SheetNum===" + workbook.getNumberOfSheets()); //獲取sheet數
if(null != workbook.getSheetAt(0)){
HSSFSheet aSheet = workbook.getSheetAt(0); //獲得一個sheet
System.out.println("+++getFirstRowNum+++"
+ aSheet.getFirstRowNum());
System.out.println("+++getLastRowNum+++"
+ aSheet.getLastRowNum());
for(int rowNumOfSheet = 0; rowNumOfSheet <= aSheet.getLastRowNum(); rowNumOfSheet++){
if(null != aSheet.getRow(rowNumOfSheet)){
HSSFRow aRow = aSheet.getRow(rowNumOfSheet);
//System.out.println(">>>getFirstCellNum<<<"
// + aRow.getFirstCellNum());
//System.out.println(">>>getLastCellNum<<<"
// + aRow.getLastCellNum());
System.out.println("row" + rowNumOfSheet + "->");
for(int cellNumOfRow = 0; cellNumOfRow <= aRow.getLastCellNum() + 1; cellNumOfRow++){
if(null != aRow.getCell(cellNumOfRow)){
HSSFCell aCell = aRow.getCell(cellNumOfRow);
int cellType = aCell.getCellType();
String strCell = "";
//System.out.println(" " + cellType + " ");
switch(cellType){
case HSSFCell.CELL_TYPE_NUMERIC:{ //Numeric
if(HSSFDateUtil.isCellDateFormatted(aCell)){
//如果是Date類型則,取得該Cell的Date值
strCell = HSSFDateUtil.getJavaDate(aCell.getNumericCellValue()).toString();
System.out.println(strCell + " ");
}else{
strCell = aCell.getNumericCellValue() + "";
System.out.println(strCell + " ");
}
}
break;
case HSSFCell.CELL_TYPE_STRING://String
strCell = aCell.getRichStringCellValue().toString();
System.out.println(strCell+ " ");
break;
default:
System.out.println("");//其他格式的數據
}
}
}
}
}
}
}
}
二、使用Jacob來處理Word文檔
1、官方的解釋是Java COM Bridge,即java和com組件間的橋樑
2、com一般表現爲dll或exe等二進制文件
3、office是建立在windows平臺之上的,本身是一個軟件,除了他自己提供的宏似乎沒有什麼能對他進行直接的操作;在windows平臺上爲了解決像這樣的不同應用軟件,通信缺乏通用api問題,推出了com的解決方案;我們使用dll中的一組或多組相關的函數存取組件數據,總的合成爲藉口具體到每個細節的實現成爲方法;如果我們需調用藉口裏的方法,唯一的途徑就是調用指向藉口的指針所以總的來說是使用就是dll完成api的轉換。
4、Jacob的就是通過一個藉口來操作word的activex對象。現在的版本是1.15
5、下載地址:http://sourceforge.net/projects/jacob-project/
代碼實現:(注意:除添加jar包,還需jacob-1.17-M2-x64.dll jacob-1.17-M2-x84.dll兩個文件複製到C:WINDOWS\SYSTEM32目錄或者工程目錄下)
package com.qianyan.test;
import com.jacob.activeX.ActiveXComponent;
import com.jacob.com.Dispatch;
import com.jacob.com.Variant;
public class TestJacob {
public static void extractDoc(String inputFIle, String outputFile) {
boolean flag = false;
// 打開Word應用程序
ActiveXComponent app = new ActiveXComponent("Word.Application");
try {
// 設置word不可見
app.setProperty("Visible", new Variant(false));
// 打開word文件
Dispatch doc1 = app.getProperty("Documents").toDispatch();
Dispatch doc2 = Dispatch.invoke(
doc1,
"Open",
Dispatch.Method,
new Object[] { inputFIle, new Variant(false),
new Variant(true) }, new int[1]).toDispatch();
// 作爲txt格式保存到臨時文件 Variant(7)
// 作爲html格式保存到臨時文件 Variant(8)
Dispatch.invoke(doc2, "SaveAs", Dispatch.Method, new Object[] {
outputFile, new Variant(8) }, new int[1]);
// 關閉word
Variant f = new Variant(false);
Dispatch.call(doc2, "Close", f);
flag = true;
} catch (Exception e) {
e.printStackTrace();
} finally {
app.invoke("Quit", new Variant[] {});
}
if (flag == true) {
System.out.println("Transformed Successfully");
} else {
System.out.println("Transform Failed");
}
}
public static void main(String[] args) {
extractDoc("E:/test.doc","E:/test.htm");
}
}