一.安裝xpdf
1.下載
ok,我們需要下載的xpdf安裝包主要有三個:
簡體中文支持:ftp://ftp.foolabs.com/pub/xpdf/xpdf-chinese-simplified.tar.gz
繁體中文支持:ftp://ftp.foolabs.com/pub/xpdf/xpdf-chinese-traditional.tar.gz
(1)進入下載目錄,將主程序解壓至/usr,也可以是其他地方,根據個人情況而定。
#cd usr
#mv /usr/xpdf/xpdf-chinese-simplified /usr/xpdf/chinese-simplified
#tar zvfx xpdf-chinese-traditional.tar.gz -C /usr/xpdf
#mv /usr/xpdf/xpdf-chinese-traditional /usr/xpdf/chinese-traditional
export PATH=/usr/xpdf/:$PATH
確保重啓機器後,在控制檯輸入xpdf不會提示找不到命令或文件即可。
(4)資源配置
#cp sample-xpdfrc xpdfrc
#vi xpdfrc
cidToUnicode Adobe-GB1 "/usr/xpdf/chinese-simplified/Adobe-GB1.cidToUnicode"
unicodeMap ISO-2022-CN "/usr/xpdf/chinese-simplified/ISO-2022-CN.unicodeMap"
unicodeMap EUC-CN "/usr/xpdf/chinese-simplified/EUC-CN.unicodeMap"
unicodeMap GBK "/usr/xpdf/chinese-simplified/GBK.unicodeMap"
cMapDir Adobe-GB1 "/usr/xpdf/chinese-simplified/CMap"
toUnicodeDir "/usr/xpdf/chinese-simplified/CMap"
#displayCIDFontTT Adobe-GB1 /usr/..../gkai00mp.ttf
#----- end Chinese Simplified support package
#----- begin Chinese Traditional support package (2004-jul-27)
cidToUnicode Adobe-CNS1 "/usr/xpdf/chinese-traditional/Adobe-CNS1.cidToUnicode"
unicodeMap Big5 "/usr/xpdf/chinese-traditional/Big5.unicodeMap"
unicodeMap Big5ascii "/usr/xpdf/chinese-traditional/Big5ascii.unicodeMap"
cMapDir Adobe-CNS1 "/usr/xpdf/chinese-traditional/CMap"
toUnicodeDir "/usr/xpdf/chinese-traditional/CMap"
#displayCIDFontTT Adobe-CNS1 /usr/..../bkai00mp.ttf
#----- end Chinese Traditional support package
* @param filePath pdf文件路徑
* @return
*/
public String getPdfContent(String filePath){
String excute="pdftotext";
String[] cmd=new String[]{excute, "-enc", "UTF-8", "-q", filePath,"-"};
Process p=null;
try {
p=Runtime.getRuntime().exec(cmd);
} catch (IOException e) {
e.printStackTrace();
}
BufferedInputStream bis=new BufferedInputStream(p.getInputStream());
InputStreamReader reader=null;
try {
reader=new InputStreamReader(bis,"UTF-8");
} catch (UnsupportedEncodingException e1) {
e1.printStackTrace();
}
StringBuffer sb=new StringBuffer();
try {
BufferedReader br = new BufferedReader(reader);
String line = br.readLine();
sb = new StringBuffer();
while (line != null) {
sb.append(line);
sb.append(" ");
line = br.readLine();
}
} catch (Exception e) {
e.printStackTrace();
}
return sb.toString();
}