我正在写一些我想编码成JSON的python类。当我尝试将我的对象jsonify时,我收到一个错误,提到了一个循环引用'。我想我理解循环引用意味着什么,但我在代码中找不到任何示例。
class Address:
def __init__(self, address1, address2, city, state, zip):
self.address1 = address1
self.address2 = address2
self.city = city
self.state = state
self.zip = zip
class Signup:
def __init__(self, registrant, classId, date, time, paid, seatCost, notes, className, seats, groupId, agentName, agentCompany):
self.registrant = registrant
self.classId = classId
self.date = date
self.time = time
self.paid = paid
self.seatCost = seatCost
self.notes = notes
self.className = className
self.seats = seats
self.groupId = groupId
self.agentName = agentName
self.agentCompany = agentCompany
class Registrant:
def __init__(self, firstName, lastName, address, phone, email):
self.firstName = firstName
self.lastName = lastName
self.address = address
self.phone = phone
self.email = email
def scrape(br):
signups = []
soup = libStuff.getSoup(br, 'http://thepaintmixer.com/admin/viewdailysignups.php')
table = soup.find(id='Calendar')
rows = table.find_all('tr')
rowNumber = 0
for row in rows:
if rowNumber == 0:
rowNumber = rowNumber + 1
continue
cells = row.find_all('td')
cellNumber = 0
for cell in cells:
if cellNumber == 0:
try:
firstName = cell.contents[0]
except IndexError:
firstName = None
elif cellNumber == 1:
try:
lastName = cell.contents[0]
except IndexError:
lastName = None
elif cellNumber == 2:
try:
address1 = cell.contents[0]
except IndexError:
address1 = None
elif cellNumber == 3:
try:
address2 = cell.contents[0]
except IndexError:
address2 = None
elif cellNumber == 4:
try:
city = cell.contents[0]
except IndexError:
city = None
elif cellNumber == 5:
try:
state = cell.contents[0]
except IndexError:
state = None
elif cellNumber == 6:
try:
zip = cell.contents[0]
except IndexError:
zip = None
elif cellNumber == 7:
try:
phone = cell.contents[0]
except IndexError:
phone = None
elif cellNumber == 8:
try:
email = cell.contents[0]
except IndexError:
email = None
elif cellNumber == 9:
try:
classId = cell.contents[0]
except IndexError:
classId = None
elif cellNumber == 10:
try:
date = cell.contents[0]
except IndexError:
date = None
elif cellNumber == 11:
try:
time = cell.contents[0]
except IndexError:
time = None
elif cellNumber == 12:
try:
paid = cell.contents[0]
except IndexError:
paid = None
elif cellNumber == 13:
try:
seatCost = cell.contents[0]
except IndexError:
seatCost = None
elif cellNumber == 14:
try:
notes = cell.contents[0]
except IndexError:
notes = None
elif cellNumber == 15:
try:
className = cell.contents[0]
except IndexError:
className = None
elif cellNumber == 16:
try:
seats = cell.contents[0]
except IndexError:
seats = None
elif cellNumber == 17:
try:
groupId = cell.contents[0]
except IndexError:
groupId = None
elif cellNumber == 18:
try:
agentName = cell.contents[0]
except IndexError:
agentName = None
elif cellNumber == 19:
try:
agentCompany = cell.contents[0]
except IndexError:
agentCompany = None
cellNumber = cellNumber + 1
address = Address(address1, address2, city, state, zip)
registrant = Registrant(firstName, lastName, address, phone, email)
signup = Signup(registrant, classId, date, time, paid, seatCost, notes, className, seats, groupId, agentName, agentCompany)
signups.append(signup)
return signups
#I then call json.dumps() on that returned list
json.dumps(scrape(br), default=lambda o: o.__dict__)
我的构造函数搞砸了吗?我传递了一些我不应该做的事情吗?
答案 0 :(得分:2)
可能的原因是cell.contents[0]
返回一个复杂的BeautifulSoup对象而不是直接文本。 BeautifulSoup对象知道他们的父母,兄弟姐妹,解析器类,属性以及可能是共享或循环的其他对象。
当<td>
元素包含内部html时会出现这种情况。这在表中很常见(例如,表条目可以用粗体或斜体表示)。
您问题的一个可能解决方案是确保使用BeautifulSoup的.text
来确保您只获取文字而不是内部的BeautifulSoup元素:
columns = [col.text for col in row.findAll('td')]
FWIW,这是一个简单的诊断技术,可以看到实际发生的情况。只需修改 json.dumps()中的默认函数,使其输出可见:
def view_dict(obj):
print '--------------'
print 'Type:', obj.__class__
d = obj.__dict__
pprint.pprint(d)
return d
json.dumps(scrape(br), default=view_dict)
循环引用应该弹出。希望这可以解开这个谜团(因为否则你的代码看起来很好并且没有明确地创建循环引用)。
答案 1 :(得分:0)
我无法找到错误,因此我重构了使用命名元组(Credit @metatoaster)。重新分解修复了问题。
def scrape(br):
signups = []
soup = libStuff.getSoup(br, 'http://thepaintmixer.com/admin/viewdailysignups.php')
table = soup.find(id='Calendar')
rows = table.find_all('tr')
rowNumber = 0
for row in rows:
if rowNumber == 0:
rowNumber = rowNumber + 1
continue
cells = row.find_all('td')
cells = [cell.string if cell.string != None else '' for cell in cells]
signup = Signup(*cells)
signups.append(signup)
return signups