動機:使用Java將PDF論文分割成一個個4頁大小的PDF,便於使用有道翻譯免費翻譯PDF文檔(因爲每次只免費翻譯4頁,使用了不少翻譯軟件,還是有道翻譯好用,翻譯質量好,就是收費);順便練習下寫代碼。使用的idea平臺,導出了可執行的jar包。
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
public class PdfTools {
public static void main(String[] args) {
if (args.length == 0) {
System.out.println("請輸入PDF文件名");
}
for (String filename : args) {
System.out.println(splitPdf(filename) + "\t" + filename);
}
}
/**
* 將filename文件切分成多個4頁大小的文件
* @param filename
* @return 將filename文件劃分成的子文件數目
*/
public static int splitPdf(String filename) {
// String filename = "1904.08394.pdf";
PdfReader reader = null;
try {
reader = new PdfReader(filename);
} catch (IOException e) {
return -1;
}
int numberOfPages = reader.getNumberOfPages();
int splitSize = 4;
int numberOfNewFiles = 0, pageNumber = 1;
while (pageNumber <= numberOfPages) {
Document doc = new Document();
String outputFilename = String.format(filename.substring(0, filename.length()-4) + "_%02d" + ".pdf", numberOfNewFiles);
PdfWriter writer = null;
try {
writer = PdfWriter.getInstance(doc, new FileOutputStream(outputFilename));
} catch (FileNotFoundException e) {
return -2 - numberOfNewFiles * 10;
} catch (DocumentException e) {
return -3 - numberOfNewFiles * 10;
}
doc.open();
PdfContentByte cb = writer.getDirectContent();
// 這裏判斷加到了循環裏不好,有優化空間
for (int j = 1; pageNumber <= numberOfPages && j <= splitSize; ++j, pageNumber++) {
doc.newPage();
// 查看源碼得知pageNumber是從1開始計數的
cb.addTemplate(writer.getImportedPage(reader, pageNumber), 0, 0);
}
doc.close();
numberOfNewFiles++;
writer.close();
}
return numberOfNewFiles;
}
}