所以我正在处理这个将CSV转换为JSON的文件,但是我不断收到此错误消息,但似乎无法弄明白。缩进似乎是正确的,所以我有点迷失在哪里。代码如下:
追踪(最近一次通话):
File "/home/uwp/widgets/contentFreshness/freshmap.py", line 308, in <module>
main()
File "/home/uwp/widgets/contentFreshness/freshmap.py", line 303, in main
mySite.writeJSONFile(options)
File "/home/uwp/widgets/contentFreshness/freshmap.py", line 247, in writeJSONFile
outputFile.write('"' + str(dateOfCrawl) + '"' )
NameError: global name 'dateOfCrawl' is not defined
代码
class Site:
dateOfCrawl = 0;
def __init__(self,csvFilePath):
self.pageList = [] # ordered list of page IDs
self.pageData={} # dictionary of individual page dictionaries, indexed on page ID
self.titleDict = { } # dictionary of unique titles
self.buildPageData(csvFilePath)
self.homePageId=self.pageList[0] # only use of site.pageList
self.depth=0
def buildPageData(self,csvFilePath):
global dateOfCrawl
# read data from CSV file, build a dictionary of page data, including list of children, in order
lines = csv.reader(open(csvFilePath, "rb"))
for line in lines:
pageURL=line[0]
pageURL=re.sub('\/\Z', '',pageURL) # remove any trailing slash
self.pageData[pageURL]={}
self.pageData[pageURL]["URL"]=pageURL
self.pageData[pageURL]["Title"]=self.cleanTitle(line[1],pageURL)
# when taking the home page and chop its url the parent will be http:/
# which should be avoided by setting it to ''
parent = chopPath(pageURL)
if(parent == 'http:/'):
parent=''
dateOfCrawl = line[2]
self.pageData[pageURL]["Parent"]= parent
self.pageData[pageURL]["Modified"]=line[2]
self.pageData[pageURL]["Children"]=[]
list = self.pageData.keys()
# sort IDs before attempting to match children
self.pageList = self.pageData.keys()
self.pageList.sort()
lineCount = 0
for pageURL in self.pageList:
# record page as child of its parent (parents must already be in the list!)
parentURL=self.pageData[pageURL]["Parent"]
if (lineCount > 0):
while( self.pageData.has_key(parentURL)== False):
if(parentURL == ''):
sys.exit(pageURL + " has no parent at " + parentURL)
parentURL = chopPath(parentURL)
self.pageData[parentURL]["Children"].append(pageURL)
lineCount+=1
self.pageCount=lineCount
def writeJSONFile(self,options):
global dateOfCrawl
outputFile = options ["outputFile"]
#see http://code.google.com/intl/en/apis/visualization/documentation/reference.html#DataTable
outputFile.write('[')
outputFile.write('"' + str(dateOfCrawl) + '"' )
self.homePage.toJSON(options)
outputFile.write(']')
outputFile.close()
答案 0 :(得分:0)
您将dateOfCrawl = 0;
(请阅读PEP-8)指定为类属性。出于某种原因,您也混合了global
,这是完全独立的。不清楚为什么dateOfCrawl
应该是一个类(而不是实例)属性,或者为什么你也有global
。你应该只是:
dateOfCrawl = 0
内移动__init__
);那么self.dateOfCrawl
)中访问 。答案 1 :(得分:0)
答案 2 :(得分:-1)
您需要定义global dateOfCrawl
。在dateOfCrawl = 0;
类的__init__
内移动Site
,如下所示:
class Site:
def __init__(self,csvFilePath):
global dateOfCrawl
dateOfCrawl = 0
....