我需要你的建议和帮助。
我正在编写一个代码来解析区域名称以及某个网站上相应区域的链接。之后我想存储区域的名称和数据库中的链接(sqlite3)。创建了数据库,创建了表,但是无法将数据插入到表中。我尝试了一些试验和错误但没有奏效。因此,我做了这个帖子。
这是我的代码:
'''
usage python capstonePy.py http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs
URL: http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs
Official supporters URL pattern:
http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/[region]
'''
from sys import argv
from os.path import exists
from BeautifulSoup import *
import urllib
import re
import sqlite3
class FindSupporters:
def __init__(self, *args, **kwargs):
#parsing the url from the command line
url = argv[1]
#make a new database
cur = new_db('liverpudlian.sqlite3')
#open and read the url
fhand = open_and_read(url)
#print how many characters have been retrieved
suc_ret(len(fhand))
#make a list of links (href)
linklst = find_link(fhand)
#make a list of supporters regions
offsuplinklst = fans_link(linklst)
#make a new table and insert the data
officialsup_table(cur, offsuplinklst, 'liverpudlian.sqlite3')
sqlite3.connect('liverpudlian.sqlite3').close()
def new_db(name):
conn = sqlite3.connect(name)
cur = conn.cursor()
return cur
def open_and_read(url):
try:
fhand = urllib.urlopen(url).read()
except:
print '\n'
print "+------------------------------------------------------------------------------+"
print "|\t\t\t\tError: URL not found.\t\t\t\t|"
print "+------------------------------------------------------------------------------+"
print '\n'
quit()
return fhand
def suc_ret(length):
print '\n'
print "+------------------------------------------------------------------------------+"
print "|\t\t", length, "characters have been successfully retrieved\t\t|"
print "+------------------------------------------------------------------------------+"
print '\n'
def find_link(fhand):
links = []
tags = []
soup = BeautifulSoup(fhand)
tags = soup('a')
for tag in tags:
tag = tag.get('href',None)
if tag is not None :
links.append(tag)
return links
def fans_link(linklst):
offsuplinklst = []
for link in linklst:
link = str(link)
link = link.rstrip()
fans = re.findall('.*fans/.+clubs/(.+)', link)
if len(fans) > 0:
offsuplinklst.append(fans[0])
return offsuplinklst
def officialsup_table(cur, offsuplinklst, name):
cur.execute('''
create table if not exists OfficialSup
(ID integer primary key,
Region text unique,
Link text unique,
Retrieved integer)''')
cur.execute('select Region from OfficialSup where Retrieved = 1 limit 1')
try :
cur.fetchone()[0]'
except :
for i in range(len(offsuplinklst)):
reg = offsuplinklst[i]
link = 'http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/'+offsuplinklst[i]
cur.execute('insert into OfficialSup (Region, Link, Retrieved) values (?, ?, 1)', (reg, link))
sqlite3.connect(name).commit()
FindSupporters()
可能是officialup_table方法中的错误。尽管如此,我的尝试并没有带来任何好结果。
非常感谢!
此致 阿诺德A.
答案 0 :(得分:1)
您需要使用创建光标的同一连接实例提交。改进new_db
以返回conn
和cur
:
def new_db(name):
conn = sqlite3.connect(name)
cur = conn.cursor()
return conn, cur
您现在需要以不同的方式阅读函数的结果:
class FindSupporters:
def __init__(self, *args, **kwargs):
#parsing the url from the command line
url = argv[1]
#make a new database
conn, cur = new_db('liverpudlian.sqlite3')
# ...
也将连接对象传递给officialsup_table
函数并调用commit()
:
def officialsup_table(conn, cur, offsuplinklst, name):
cur.execute('''
create table if not exists OfficialSup
(ID integer primary key,
Region text unique,
Link text unique,
Retrieved integer)''')
conn.commit()
cur.execute('select Region from OfficialSup where Retrieved = 1 limit 1')
try :
cur.fetchone()[0]
except :
for i in range(len(offsuplinklst)):
reg = offsuplinklst[i]
link = 'http://www.liverpoolfc.com/fans/lfc-official-supporters-clubs/'+offsuplinklst[i]
cur.execute('insert into OfficialSup (Region, Link, Retrieved) values (?, ?, 1)', (reg, link))
conn.commit()