将 PDF 文件导出到 csv 时,它返回错误:writeheader() 需要 1 个位置参数,但已给出 2 个
from tabula import read_pdf
from tabulate import tabulate
import csv
df = read_pdf("asd.pdf")
print(df)
with open('ddd.csv', "w", newline="") as file:
columns = ['specialty ',"name",'number_of_seats','Total_seats,' "document_type", "concent"]
writer = csv.DictWriter(file, fieldnames=columns)
writer.writeheader(df)
答案 0 :(得分:0)
从http://theautomatic.net/2019/05/24/3-ways-to-scrape-tables-from-pdfs-with-python/复制的代码,还有更多细节...
import tabula
file = "http://lab.fs.uni-lj.si/lasin/wp/IMIT_files/neural/doc/seminar8.pdf"
#tables = tabula.read_pdf(file, pages = "all", multiple_tables = True)
# output just the first table in the PDF to a CSV
tabula.convert_into(file, "output.csv", output_format="csv")
# output all the tables in the PDF to a CSV
tabula.convert_into(file, "output.csv", output_format="csv", pages='all')