我不知道我是否在这里做正确的事情,基本上我希望我的两个类都是json-serializable。
import json
class gpagelet(json.JSONEncoder):
"""
Holds 1) the pagelet xpath, which is a string
2) the list of pagelet shingles, list
"""
def __init__(self, parent):
if not isinstance( parent, gwebpage):
raise Exception("Parent must be an instance of gwebpage")
self.parent = parent # This must be a gwebpage instance
self.xpath = None # This is just an id for the pagelet (not unique across page), historically called xpath
self.visibleShingles = []
self.invisibleShingles = []
self.urls = []
def __str__(self):
"""String representation of this object"""
ret = ""
ret += "xpath: %s\n" % self.xpath
def appendShingles():
ret += "shingles: \n"
for each in self.shingles:
ret += "%s\n" % str(each)
ret += "urls:\n"
for each in self.urls:
ret += "%s\n" % str( each)
return ret
class gwebpage(json.JSONEncoder):
"""
Holds all the datastructure after the results have been parsed
holds: 1) lists of gpagelets
2) loc, string, location of the file that represents it
"""
def __init__(self, url):
self.url = url # This will be http://
self.netloc = False # This will be http:// too
self.gpagelets = []
# Appended by functions
self.page_key = ""
def __str__(self):
ret = ""
ret += "url: %s\n" % self.url
ret += "netloc: %s\n" % self.netloc
ret += "page_key: %s\n" % self.page_key
ret += "pagelets:\n"
for each in self.gpagelets:
ret += "%s\n" % each.__str__()
return ret
class GpageletEncoder( json.JSONEncoder):
def default(self, gp):
gwebpageEncoder = GwebpageEncoder()
if not isinstance( gp, gpagelet):
raise Exception( "Cannot use GpageletEncoder on a non gpagelet instance")
u = { }
u['parent'] = gwebpageEncoder.default( gp.parent)
u['xpath'] = gp.xpath
u['visibleShingles'] = gp.visibleShingles
u['invisibleShingles'] = gp.invisibleShingles
u['urls'] = gp.urls
return u
class GwebpageEncoder( json.JSONEncoder):
def default(self, gw):
gpageletEncoder = GpageletEncoder()
if not isinstance( gw, gwebpage):
raise Exception( "Cannot use gwebpageEncoder on a non gwebpage instance")
u = { }
u['url'] = gw.url
u['netloc'] = gw.netloc
u['gpagelets'] = [ gpageletEncoder.default( each) for each in gw.gpagelets ]
u['page_key'] = gw.page_key
return u
if __name__ == "__main__":
import simplejson
mom = gwebpage('http://www.google.com')
son = gpagelet( mom)
mom.gpagelets.append( son)
print simplejson.dumps( mom, cls=GwebpageEncoder)
其中一个麻烦就是 1)我不知道默认是做什么的 2)我不知道GWebpage的默认值是否要返回默认或编码的gwebpage
现在我得到了无限的递归。
有人可以帮忙吗?
答案 0 :(得分:0)
FYI 我看到很多麻烦: u ['gpagelets'] = [gpage.Efcoder.default(每个)gw.gpagelets中的每一个]