我正在编写一段代码,用于在用户在页面的表单字段中输入网址时解析页面:" url.html"。当用户在表单字段中输入url时,它将显示在此视图中(在instance.web_url中)。但是我得到HTTPResponse对象的这个错误。这是代码:
def url(request):
if request.method == 'POST':
form_url = WebURLForm(request.POST or None)
title = "Search via URL here"
instance = form_url.save(commit = False)
instance.save()
if instance.web_url == "":
instance.web_url = "http://www.facebook.com/"
print instance.web_url
html = urllib.urlopen(instance.web_url).read()
soup = BeautifulSoup(html,"lxml")
lines = []
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
#break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
#break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
#drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
text=text.encode('utf-8')
words = text.split(".")
count=0
terrorism_level=0
for i in words:
print count
if not words[count]:
words[count] = "this was empty before."
json_result = natural_language_classifier.classify('90e7b7x198-nlc-50734',words[count])
classes = json_result['classes']
result = json.dumps(classes, indent=2)
if (classes[0]['confidence'] > 0.98 and classes[0]['class_name'] == "nhate."):
print words[count]
print result
terrorism_level +=1
count=count+1
context = {"form_url":form_url, "title":title}
return render(request,'url.html',context)
答案 0 :(得分:1)
def url(request):
if request.method == 'POST':
form_url = WebURLForm(request.POST or None)
title = "Search via URL here"
instance = form_url.save(commit = False)
instance.save()
if instance.web_url == "":
instance.web_url = "http://www.facebook.com/"
print instance.web_url
html = urllib.urlopen(instance.web_url).read()
soup = BeautifulSoup(html,"lxml")
lines = []
# kill all script and style elements
for script in soup(["script", "style"]):
script.extract() # rip it out
# get text
text = soup.get_text()
#break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
#break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
#drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
text=text.encode('utf-8')
words = text.split(".")
count=0
terrorism_level=0
for i in words:
print count
if not words[count]:
words[count] = "this was empty before."
json_result = natural_language_classifier.classify('90e7b7x198-nlc-50734',words[count])
classes = json_result['classes']
result = json.dumps(classes, indent=2)
if (classes[0]['confidence'] > 0.98 and classes[0]['class_name'] == "nhate."):
print words[count]
print result
terrorism_level +=1
count=count+1
context = {"form_url":form_url, "title":title}
else:
form_url = WebURLForm()
context = {"form_url":form_url, "title":None}
return render(request,'url.html',context)
尝试一次
答案 1 :(得分:0)
因为您可能使用GET
方法调用了视图。另外,请阅读Python教程和PEP-8。您的代码格式难看,不符合PEP-8标准。