先上一段代碼
import docx
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
import re
all_data = open('F:\\vscode\\.vscode\\python\\python課\\文件操作\\結(jié)果匯總\\newans.txt',encoding='utf-8')
document = Document()
# add the title
document.add_heading('四六級單詞', 0)
# add a paragraph
document.add_paragraph('test paragraphs')
'''
from the net
'''
# set the font
document.styles['Normal'].font.name = u'黑體'
p = document.add_paragraph()
#set the align , this is left
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
# add the text , into the paragraphs
run = p.add_run(u'START')
#set color
run.font.color.rgb = RGBColor(54,95,145)
#set font size
run.font.size = Pt(26)
# add a page break
document.add_page_break()
all_data = all_data.readlines()
for i in all_data:
data = i.replace('>>','')
data = data.split('|')
# print('單詞'+data[0])
# print('詞頻'+data[1])
# print('詞義'+data[2])
# print('例句'+data[3])
skip = re.findall('"(.*?)"',data[2])
if len(skip) < 1:
print("skip")
continue
word = data[0]
# new a paragraph , set the align is LEFT
p = document.add_paragraph()
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
'''
write in words
'''
words = p.add_run('單詞:')
words.bold = True
words.font.size = Pt(17)
words.font.color.rgb = RGBColor(54,95,145)
word_content = p.add_run(word+'\n')
# ans.writelines('單詞:')
# ans.writelines(word+"\n")
'''
write the frequency
'''
frequency = data[1]
p_frequency = p.add_run('詞頻:')
p_frequency.font.size = Pt(17)
p_frequency.font.color.rgb = RGBColor(54,100,100)
p.add_run(frequency+'\n').bold
p_frequency.bold = True
# ans.writelines('詞頻:')
# ans.writelines(frequency+"\n")
'''
write the means
'''
mean = re.findall('"(.*?)"',data[2])
p_all_mean = p.add_run('詞義:\n')
p_all_mean.bold = True
p_all_mean.font.size = Pt(17)
p_all_mean.font.color.rgb = RGBColor(15,15,145)
# ans.writelines('詞義:\n')
mean_count = 0
sentence_count = 0
if len(mean) >0:
for i in mean:
if len(i) > 20:
mean_count+=1
strr = str(mean_count)+'. '
p.add_run(strr+i+'\n')
print(i)
else:
print(word+" has no mean")
sentence = re.findall("'(.*?)'",data[3])
p_sentence = p.add_run('例句:\n')
p_sentence.bold = True
p_sentence.font.color.rgb = RGBColor(54,95,45)
p_sentence.font.size = Pt(17)
if len(sentence) > 0:
for i in sentence:
if len(i)>20:
sentence_count+=1
strr = str(sentence_count)+'. '
p.add_run(strr+i+'\n')
print(i)
else:
print(word+' has no sentence')
document.add_page_break()
# save as
document.save('result.docx')
這段代碼是我把一個分詞結(jié)果寫入docx文件所使用的蚣常,總結(jié)一下docx怎么用
概述
個人理解撞鹉,為自己而作
docx這個模塊還是很隨主流操作方式的,我認(rèn)為在此模塊中以段和字為單位嵌巷,當(dāng)然了不包括圖片和表格,僅僅說文字屏富。圖片和表格操作大同小異晴竞。
除了title和head,其他的文字一般都是寫入一個paragraph中狠半,然后再在這個段落中插入文字噩死,add_run(‘text’) , 如果我們給這次插入存入一個變量,如
p = Document.add_paragraph()
content = p.add_run("words")
我們就可以對content操作神年,然后改變'words'的顯示形式
如果我們只需要設(shè)置一個屬性已维,比如粗體顯示
content.bold = True
這樣就會粗體顯示了
其他操作也是大同小異,當(dāng)然了已日,基本操作垛耳,具體可以看文檔