#!/usr/bin/env python3
# Script to convert HTML files provided by The Online Plain Text English
# Dictionary (http://www.mso.anu.edu.au/~ralph/OPTED/) into SQLite database
import sys
import sqlite3
from argparse import ArgumentParser, FileType
from bs4 import BeautifulSoup
def parse_args():
parser = ArgumentParser("Create database from HTML dictionary pages")
parser.add_argument("files", metavar="file", nargs="+", type=FileType("rb"))
parser.add_argument("--out", "-o", required=True)
return parser.parse_args()
def create_tables(conn):
conn.execute("DROP TABLE IF EXISTS words")
conn.execute("CREATE TABLE words (id integer primary key, word text, description text)")
conn.commit()
def words(handle):
doc = BeautifulSoup(handle)
for p in doc.find_all("p"):
if len(p.contents) == 4:
word = p.contents[0].string.lower()
definition = p.contents[3].lstrip(") ").replace("\n", " ")
yield word, definition
def insert_words(conn, iter):
conn.executemany("INSERT INTO words VALUES (NULL, ?, ?)", iter)
def main():
args = parse_args()
db = sqlite3.connect(args.out)
create_tables(db)
for handle in args.files:
print("Processing \"{}\"".format(handle.name), file=sys.stderr)
insert_words(db, words(handle))
db.commit()
db.close()
if __name__ == "__main__":
main()
我试过python my_script.py
但它显示了这一点:
用法:从HTML词典页面[-h] - OUT中创建数据库 档案[档案...] 从HTML词典页面创建数据库:错误:以下参数是r equired:file, - out / -o
我不使用python。我只是想运行这个脚本,抱歉我的新代码表示不好。
答案 0 :(得分:0)
您必须为输出指定一个文件以使用-o
选项。例如,python script.py -o MYOUTFILE