此问题已标记为删除
答案 0 :(得分:0)
这可能看起来不太漂亮(或专业),但它完成了工作:
from docx import Document
from docx.enum.text import WD_BREAK
import re
doc = Document('temp.docx')
new_doc = Document()
word = 'Hello'
'''Get the whole document text from all paragraphs'''
whole_text=''
for p in doc.paragraphs:
if whole_text!='':
whole_text+='\n'
whole_text += p.text
'''To split whole_text but include Hello in the created list
put parentheses around the word and use re.split'''
split_p = re.split(word.join('()'),whole_text)
'''Now if 'Hello' was the first word of whole_text re.split will create
['','Hello','...']
if that's the case, remove first '' to avoid unwanted page_break at the
start'''
if split_p[0]=='':
split_p.remove('')
i = 0
while i<len(split_p):
if split_p[i] == word:
'''We don't want to add break at the start of document'''
if len(new_doc.paragraphs)>0:
'''new_doc.add_page_break() creates a newline with page break on it
but the below command will put page break on the last paragraph
so there won't be additional empty lines
If you don't want to have an additional import line
then just put 7 instead of WD_BREAK.PAGE'''
new_doc.paragraphs[-1].add_run().add_break(WD_BREAK.PAGE)
'''Add 'Hello' + the text coming after it'''
new_doc.add_paragraph(split_p[i]+split_p[i+1])
i+=2
else:
'''If the first Hello is not at the start of document just add the
text'''
new_doc.add_paragraph(split_p[i])
i+=1
new_doc.save('hello.docx')