我试图使用我的代码来分析Apache日志行。 日志行是这样的:
124.150.110.226 - - [26 / Jun / 2008:06:48:29 +0000]" GET / HTTP / 1.1" 200 99" - " " Mozilla / 5.0(X11; U; Linux i686; en-US; rv:1.8.1.14)Gecko / 20080419 Ubuntu / 8.04(hardy)Firefox / 2.0.0.14"
但我收到错误消息:
198 199 def parse_apache(line): --> 200 log_split = log_regexp.match(line) 201 if not log_split: 202 print "Line didn't match!", line TypeError: expected string or buffer
我的代码如下:
apache_log_headers = ['host', 'client_id', 'user_id',
'datetime', 'method', 'request', 'http_proto',
'status', "size", 'referrer', 'user_agent']
log_format = (r'(\S+) (\S+) (\S+) \[(.*?)\] '
r'"(\S+) (\S+) (\S+)" (\S+) (\S+) '
r'"(.+)" "(.+)"')
log_regexp = re.compile(log_format)
def parse_apache(line):
log_split = log_regexp.match(line)
if not log_split:
print "Line didn't match!", line
return {}
log_split = log_split.groups()
result = dict(zip(apache_log_headers,log_split))
result['status'] = int(result['status'])
if result['size'].isdigit():
result['size'] = int(result['size'])
else:
result['size'] = 0
return result
def apache_lines(dir_name,file_type):
return (parse_apache(line) for line in log_lines(dir_name,file_type))
if __name__ == '__main__':
for each_line in log_lines('/Users/SJX/Desktop/python project/python_learning','.log'):
print each_line
print parse_apache(each_line)
print sum((each_line['size'] for each_line in apache_lines('/Users/SJX/Desktop/python project/python_learning','.log')
if each_line.get('status',0) == 200))