python-對docx文檔操作demo + word批量轉pdf 及[AttributeError]解決方案

pip install python-docx

1.讀取sample.docx:

import docx
from docx import Document

from pprint import pprint

def getText(filename):
	doc = Document(filename)
	fullText = []
	for para in doc.paragraphs:
		fullText.append(para.text)
	return(fullText)

pprint(getText('sample.docx'))

2.創建demo_docx.docx:

import docx
from docx import Document

document = Document()

document.add_heading('This is Title',0)	# Title

p = document.add_paragraph('A paragraph!')	

p.add_run('bold text ').bold = True
p.add_run('italic text').italic = True

document.add_paragraph(
	'unordered list 1', style='ListBullet'
)

for i in range(3):
	document.add_paragraph(
		'ordered list {}'.format(i) , style='ListNumber'
	)
	
# doc.add_picture('pic.png',width=shared.Inches(num),height=shared.Cm(num))
document.add_picture('1.jpg')


# ----------------add table ------------
table = document.add_table(rows=1, cols=2)
headr_cells = table.rows[0].cells # 0
headr_cells[0].text = 'name'
headr_cells[0].text = 'gender'

d = [dict(name='A',gender='male') , dict(name='B',gender='female')]

for item in d :
	row_cells = table.add_row().cells # 1
	row_cells[0].text = str(item['name'])
	row_cells[1].text = str(item['gender'])

document.add_page_break() # 分頁

document.save('demo_docx.docx')

# ------------

word批量轉pdf 及 失敗解決方案

利用下面code批量將dir_word內的docxdoc文檔轉爲dir_pdf內的.pdf,只是簡單利用word的另存爲pdf功能。
但不加sleep時每次只能轉成功第一個word,報錯
raise AttributeError("%s.%s" % (self._username_, attr)) AttributeError: <unknown>.Open

檢測代碼也沒錯,後來想到是不是因爲上個操作沒完成導致的,就加了個休眠1s,運行就正常了。

	
from win32com.client import Dispatch
import os
from time import sleep

wdFormatPDF = 17

def doc2pdf(input_file,output_file):
	print(input_file)
	print(output_file)
	word = Dispatch('Word.Application')
	doc = word.Documents.Open(input_file)
	doc.SaveAs(output_file, FileFormat=wdFormatPDF)
	doc.Close()
	word.Quit()


if __name__ == "__main__":
	
	dir_word = "F:\\python\\word2pdf\\word"	# word目錄
	dir_pdf = "F:\\python\\word2pdf\\pdf"	# pdf存放目錄
	
	for root, dirs, filenames in os.walk(dir_word):
		for file in filenames:
			
			if file.endswith(".doc") :
				doc2pdf( str(dir_word + "\\" + file), str(dir_pdf + "\\" + file.replace(".doc",".pdf")) )
			elif file.endswith(".docx"):
				doc2pdf( str(dir_word + "\\" + file), str(dir_pdf + "\\" + file.replace(".docx",".pdf")) )
				
			sleep(1)	# 每次間隔1s
	
	
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章