我正在读取访问日志文件,并将其转换为JSON格式。 我想在Hbase中存储数据 只插入一行是如何迭代的? 如何在python中解决这个问题 下面是我的代码:
import happybase
import time
import datetime
import socket
import json
import random
from thriftpy2.transport import TTransportException
connection = happybase.Connection('localhost')
connection.open()
print(connection.tables())
table = connection.table('testlog')
print(table.scan())
def parse_log_line(line):
strptime = datetime.datetime.strptime
hostname = socket.gethostname()
time = line.split(' ')[3][1::]
entry = {}
entry['datetime'] = strptime(
time, "%d/%b/%Y:%H:%M:%S").strftime("%Y-%m-%d %H:%M")
entry['source'] = "{}".format(hostname)
entry['type'] = "www_access"
entry['log'] = "'{}'".format(line.rstrip())
#seed(1)
# print(entry)
return entry
# return entry
def show_entry(entry):
temp = ",".join([
entry['datetime'],
entry['source'],
entry['type'],
entry['log']
])
log_entry = {'log': entry}
temp = json.dumps(log_entry)
# print(temp)
# print("{}".format(temp))
print(entry['datetime'])
b = table
b.put(entry,{'datetime:col1':entry['datetime'],
'source:col2':entry['source'],
'type:col3':entry['type'],
'log:col4':entry['log']
})
b.send()
print(entry['datetime']) //shows the all data
return temp
def follow(syslog_file):
# pubsub=happybase.Connection('localhost')
while True:
line = syslog_file.readline()
# print(line)
if not line:
time.sleep(0.1)
continue
else:
entry = parse_log_line(line)
# print(entry)
if not entry:
continue
json_entry = show_entry(entry)
f = open("/Users/evioxtech/Downloads/access.log","r")
follow(f)
在hbase中仅完成一项 如何一一插入数据 我正在使用happybase print(entry ['datetime'])显示所有数据