Json编码器python递归引用

时间:2009-09-22 07:46:25

标签: python json

我不知道我是否在这里做正确的事情,基本上我希望我的两个类都是json-serializable。

import json

class gpagelet(json.JSONEncoder):
    """
    Holds   1) the pagelet xpath, which is a string
            2) the list of pagelet shingles, list
    """
    def __init__(self, parent):
        if not isinstance( parent, gwebpage):
            raise Exception("Parent must be an instance of gwebpage")
        self.parent = parent    # This must be a gwebpage instance
        self.xpath = None       # This is just an id for the pagelet (not unique across page), historically called xpath
        self.visibleShingles = []
        self.invisibleShingles = []
        self.urls = []


    def __str__(self):
        """String representation of this object"""
        ret = ""
        ret += "xpath: %s\n"  % self.xpath

        def appendShingles():
            ret += "shingles: \n"
            for each in self.shingles:
                ret += "%s\n" % str(each)

        ret += "urls:\n"
        for each in self.urls:
            ret += "%s\n" % str( each)
        return ret





class gwebpage(json.JSONEncoder):
    """
    Holds all the datastructure after the results have been parsed
    holds:  1) lists of gpagelets
            2) loc, string, location of the file that represents it
    """
    def __init__(self, url):
        self.url = url              # This will be http://
        self.netloc = False         # This will be http:// too
        self.gpagelets = []
        # Appended by functions
        self.page_key = ""


    def __str__(self):
        ret = ""
        ret += "url: %s\n" % self.url
        ret += "netloc: %s\n" % self.netloc
        ret += "page_key: %s\n" % self.page_key
        ret += "pagelets:\n"
        for each in self.gpagelets:
            ret += "%s\n" % each.__str__()
        return ret


class GpageletEncoder( json.JSONEncoder):

    def default(self, gp):
        gwebpageEncoder = GwebpageEncoder()
        if not isinstance( gp, gpagelet):
            raise Exception( "Cannot use GpageletEncoder on a non gpagelet instance")
        u = { }
        u['parent'] = gwebpageEncoder.default( gp.parent)
        u['xpath'] = gp.xpath
        u['visibleShingles'] = gp.visibleShingles
        u['invisibleShingles'] = gp.invisibleShingles
        u['urls'] = gp.urls
        return u




class GwebpageEncoder( json.JSONEncoder):


    def default(self, gw):
        gpageletEncoder = GpageletEncoder()
        if not isinstance( gw, gwebpage):
            raise Exception( "Cannot use gwebpageEncoder on a non gwebpage instance")
        u = { }
        u['url'] = gw.url
        u['netloc'] = gw.netloc
        u['gpagelets'] = [ gpageletEncoder.default( each) for each in gw.gpagelets ]
        u['page_key'] = gw.page_key
        return u





if __name__ == "__main__":

    import simplejson
    mom = gwebpage('http://www.google.com')
    son = gpagelet( mom)
    mom.gpagelets.append( son)
    print simplejson.dumps( mom, cls=GwebpageEncoder)

其中一个麻烦就是 1)我不知道默认是做什么的 2)我不知道GWebpage的默认值是否要返回默认或编码的gwebpage

现在我得到了无限的递归。

有人可以帮忙吗?

1 个答案:

答案 0 :(得分:0)

FYI 我看到很多麻烦: u ['gpagelets'] = [gpage.Efcoder.default(每个)gw.gpagelets中的每一个]