因爲一個需求,需要把pdf的第一頁做成專輯封面,
然後數量很大,就用python來做這個事情,但是遇到了很多困難:
首先先說一下怎麼整吧
我的環境 win10 64位, python 2.7 32位
需要安裝以下包:
pyPdf(pip可以安裝)
wand(pip可以安裝)
http://101.96.10.64/ftp.icm.edu.pl/packages/ImageMagick/binaries/ImageMagick-6.7.7-6-Q16-windows-dll.exe
https://mirrors.netix.net/sourceforge/g/gh/ghostscript/GPL%20Ghostscript/9.05/gs905w64.exe
反正後兩個包版本別錯,踩過無數坑的經驗,然後上python 代碼:
#coding=utf8 import os from wand.image import Image from pyPdf import PdfFileReader, PdfFileWriter path = r'C:\Users\guanjia\Desktop\2.PDF' # 傳入整個pdf的二進制流,製成一整張圖片 # (所以如果只想要一張圖,只能先用getPagesOfPdf()切分pdf) def convert_all_pdf_pages_to_png(pdf_file_blob): pdf = Image(blob=pdf_file_blob) pages = len(pdf.sequence) image = Image( width=pdf.width, height=pdf.height * pages ) for i in xrange(pages): image.composite( pdf.sequence[i], top=pdf.height * i, left=0 ) return image.make_blob('png') # 獲取單頁的pdf,組成一個新的pdf # 頁數可以是參數,這裏是 0 def getPagesOfPdf(path): pdfOne = PdfFileReader(file(path, "rb")) filepath, ext = path.split('.') if ext.lower() != 'pdf': raise Exception, '文件必須是pdf' tmp_pdf_path = filepath + '_thumb.pdf' outputStream = file(tmp_pdf_path, "wb") output = PdfFileWriter() output.addPage(pdfOne.getPage(0)) output.write(outputStream) outputStream.close() return tmp_pdf_path # def createPng(tmp_pdf_path): with open(tmp_pdf_path, 'rb') as pdf_file: filepath, ext = tmp_pdf_path.split('.') if ext.lower() != 'pdf': raise Exception, '文件必須是pdf' with open(filepath + '.png', 'wb') as pdf_write: pdf_write.write(convert_all_pdf_pages_to_png(pdf_file.read())) os.remove(tmp_pdf_path) if __name__ == '__main__': tmp_pdf_path = getPagesOfPdf(path)# 切分pdf createPng(tmp_pdf_path) # pdf 做成圖片