我的python脚本正在搜索谷歌页面的特定内容。就像url中的路径一样。这是我的代码
import urllib2
import urllib
import json
def search(target):
num_queries = 50 * 4
for start in range(0, num_queries, 4):
dork = 'intext: hacking'
dork = urllib.urlencode({'q' : dork})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % dork
for start in range(0, num_queries, 4):
request_url = '{0}&start={1}'.format(url, start)
search_results = urllib.urlopen(request_url)
#try:
j = json.loads(search_results.read())
#except ValueError:
#print "Error: "
#continue
results = j['responseData']['results']
for result in results:
title = result['title']
url = result['url']
if target in url:
print ( '[*]' + url )
def main():
target = raw_input("Enter target >> ")
search(target)
if __name__ == "__main__":
main()
在第3个结果之后我得到No JSON对象可以被解码。这是整个错误:
Enter target >> .com
[*]some site
[*]some site
[*]some site
Traceback (most recent call last):
File "gs.py", line 32, in <module>
main()
File "gs.py", line 28, in main
search(target)
File "gs.py", line 15, in search
j = json.loads(search_results.read())
File "/usr/lib/python2.7/json/__init__.py", line 326, in loads
return _default_decoder.decode(s)
File "/usr/lib/python2.7/json/decoder.py", line 365, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python2.7/json/decoder.py", line 383, in raw_decode
raise ValueError("No JSON object could be decoded")
ValueError: No JSON object could be decoded
从错误发生的地方以及如何解决它的想法?
答案 0 :(得分:0)
您使用返回的值覆盖url
。
根据结果:
url = result['url']
然后它循环回到顶部:
request_url = '{0}&start={1}'.format(url, start)
这可能是由于额外的循环。试试这个,没有额外的循环:
def search(target):
num_queries = 50 * 4
for start in range(0, num_queries, 4):
dork = 'intext: hacking'
dork = urllib.urlencode({'q' : dork})
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % dork
request_url = '{0}&start={1}'.format(url, start)
search_results = urllib.urlopen(request_url)
#try:
j = json.loads(search_results.read())
#except ValueError:
#print "Error: "
#continue
results = j['responseData']['results']
for result in results:
title = result['title']
url = result['url']
if target in url:
print ( '[*]' + url )