我想问
当我尝试添加其他输入无效时,如何知道我的程序对于一个输入有效,如何从html文件中生成许多pdf作为输入。
def get_static_report_information():
#Access to the local URL(Html file)
#html parsing
for filename in glob.glob(os.path.join(file_input_dir, "*.html")):
with codecs.open(filename,"rb","utf8") as f:
page = f.read()#return a list
page_soup = soup(page,"html.parser")
print("------------- before output1 \n")
tree = html.fromstring(page)
sections_seperator = page_soup.find("td", {'class':'row_cell'}, string=re.compile(r'\.pactext'))
#extract la reference du rapport
ref = page_soup.find("h1",{"id": False})
#reference_rapport = ref.text.strip()
reference_rapport = ref.replace("Volume","")
vers = page_soup.find("td", {'class':'row_cell'}, string=re.compile('\d'))
version = vers.text.strip()
nom = page_soup.find("a",{"href": "#1"})
nom_rapport = nom.text.strip()
chapters = page_soup.find_all(re.compile('h1|h2'), {'id':re.compile('\d+$')})
sections_seperator = sections_seperator.get_text()
sections = page_soup.find_all(re.compile('h3'))
raw_chapters = [re.sub('\s+$', '', i.text) for i in chapters if i.name == 'h2']
data = [[i.name, re.sub('\s+$', '', i.text)]for i in chapters]
grouped, _count = [[a, list(b)]
for a, b in groupby(data, key=lambda x:x[0] == 'h1')], count(1)
grouped, _count_list = [[a, list(b)]
for a, b in groupby(data, key=lambda x:x[0] == 'h1')], count(1)
new_grouped = [[grouped[i][-1][0][-1], [c for _, c in grouped[i+1][-1]]]
for i in range(0, len(grouped), 2)]
table_content = '\n'.join(f'{next(_count)}-{a}+\n'+'\n'.join(f'\t{i}-{c}'
for i, c in enumerate(b, 1))
for a, b in new_grouped)
return raw_chapters, nom_rapport, reference_rapport, page_soup, sections_seperator, new_grouped, version