我有一个脚本,可以从文件中收集给定网址的response header
信息。我现在经过的Argument/Input File
来自外面。 Execution Method: python collect.py <Input.txt>
。
输入文件:
1,http://www.example.com
2,http://www.blahblah.com
3,......
现在,我希望将带有URL的ID作为单个参数传递,如
python collect.py 1,http://www.example.com
并执行结果并将其写入outfile。
#!/usr/bin/python
import subprocess
import json
import sys
import httplib
import urlparse
import pickle
import sys
class HeaderFetcher:
def __init__(self,url,headers={'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}):
self.report = {}
self.initial_url = url
self.request_headers = headers
def fetch(self):
self.fetchheaders(self.initial_url,self.request_headers)
def fetchheaders(self,url,req_headers):
try:
u = urlparse.urlparse(url)
scheme = u.scheme
ServerConnection = None
con = None
if 'http' == scheme:
ServerConnection = httplib.HTTPConnection
elif 'https' == scheme:
ServerConnection = httplib.HTTPSConnection
if None != ServerConnection:
con = ServerConnection(u.hostname,u.port,timeout=10)
con.request("GET",url,None,req_headers)
res = con.getresponse()
#print res
self.report[url] = res.getheaders()
if 301 == res.status or 302 == res.status:
redirect_url = res.getheader('Location')
if not redirect_url in self.report:
if len(self.report.keys())<40:
self.fetchheaders(redirect_url,req_headers)
except:
test="Error In Loading"
#print test
def process(infile='Input.txt'):
#f = open('Input.txt','r')
f = open(sys.argv[1],"r")
agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
finalJson = {}
for line in f.readlines():
App=line.strip().split(',')
#print "Processing " + App[1]
App_URL=App[1]
ua=agents
fetcher = HeaderFetcher(App_URL,{'User-Agent':ua})
fetcher.fetch()
url=fetcher.report
keys = list(url)
headerJson = {}
for k in keys:
headers=url[k]
for header in headers:
headerJson[header[0]] = header[1]
finalJson[App[0]] = headerJson
header_final=json.dumps(finalJson,indent=4)
#server_details = json.loads(response.read(header_final))
#print header_final
#json_data=open(header_final)
#server_details = json.loads(header_final)
server_details=json.loads(unicode(header_final),"ISO-8859-1")
with open("Headers_Final_List.txt",'wb') as f :
for appid, headers in server_details.iteritems():
htypes = [h for h in headers if h in (
'x-powered-by','server','x-aspnet-version','x-aspnetmvc-version')]
headers='{},{}'.format(appid, ','.join(htypes))
f.write(headers+'\n')
f.close()
if __name__ == '__main__':
process(sys.argv[-1])
有关如何从命令行解析单个参数的任何建议。
答案 0 :(得分:1)
sys.argv不是一个文件,但你试图像一个文件一样阅读。
这样:
@Singleton
@Lock(LockType.READ)
public class SomeSingleton {}
应该更像:
def process(infile='Input.txt'):
#f = open('Input.txt','r')
f = open(sys.argv[1],"r")
agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
finalJson = {}
for line in f.readlines():
然后您可以在其他方法中使用id和url。
答案 1 :(得分:0)
#!/usr/bin/python
import sys
print 'Number of arguments:', len(sys.argv), 'arguments.'
print 'Argument List:', str(sys.argv)
n = len(sys.argv) -1
args = []
for i in sys.argv[n].split(','):
args.append(i)
for i in args:
print i
python arg.py 1,abc.txt
Number of arguments: 2 arguments.
Argument List: ['arg.py', '1,abc.txt']
1
abc.txt
如果在没有参数的情况下调用程序或者参数中没有逗号
,则此方法不会因IndexError:list index超出范围而失效答案 2 :(得分:0)
id, url = sys.argv[1].split(',')