我已将我的ELB日志启用到S3存储桶。我正在尝试使用lambda中的以下脚本将S3日志发送到Elasticsearch。
日志以* .log.gz格式存储在s3存储桶中。如何将发送zip文件以json格式发送到elasticsearch。
让我知道是否有更好的方法可以做到这一点。
import boto3
import re
import requests
from requests_aws4auth import AWS4Auth
region = '' # e.g. us-west-1
service = 'es'
credentials = boto3.Session().get_credentials()
awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, service, session_token=credentials.token)
host = '' # the Amazon ES domain, including https://
index = 'lambda-s3-index'
type = 'lambda-type'
url = host + '/' + index + '/' + type
headers = { "Content-Type": "application/json" }
s3 = boto3.client('s3')
# Regular expressions used to parse some simple log lines
ip_pattern = re.compile('(\d+\.\d+\.\d+\.\d+)')
time_pattern = re.compile('\[(\d+\/\w\w\w\/\d\d\d\d:\d\d:\d\d:\d\d\s-\d\d\d\d)\]')
message_pattern = re.compile('\"(.+)\"')
# Lambda execution starts here
def handler(event, context):
for record in event['Records']:
# Get the bucket name and key for the new file
bucket = record['s3']['bucket']['name']
key = record['s3']['object']['key']
# Get, read, and split the file into lines
obj = s3.get_object(Bucket=bucket, Key=key)
body = obj['Body'].read()
lines = body.splitlines()
# Match the regular expressions to each line and index the JSON
for line in lines:
ip = ip_pattern.search(line).group(1)
timestamp = time_pattern.search(line).group(1)
message = message_pattern.search(line).group(1)
document = { "ip": ip, "timestamp": timestamp, "message": message }
r = requests.post(url, auth=awsauth, json=document, headers=headers)
答案 0 :(得分:0)
执行此操作的更好方法是使用Logstash(已安装s3输入插件)来解析ELB访问日志并将其发送到弹性搜索。
logstash.conf:
input {
s3 {
access_key_id => "..."
secret_access_key => "..."
bucket => "..."
region => "eu-central-1"
prefix => "dxlb/AWSLogs/.../elasticloadbalancing/eu-central-1/2019/09/"
type => "elb"
}
}
filter {
if [type] == "elb" {
grok {
match => [ "message", "%{WORD:connection} %{TIMESTAMP_ISO8601:timestamp} %{NOTSPACE:elb} %{IP:clientip}:%{INT:clientport:float} (?:(%{IP:backendip}:?:%{INT:backendport:int})|-) %{NUMBER:request_processing_time:float} %{NUMBER:backend_processing_time:float} %{N
UMBER:response_processing_time:float} (?:-|%{INT:elb_status_code:int}) (?:-|%{INT:backend_status_code:int}) %{INT:received_bytes:int} %{INT:sent_bytes:int} \"%{ELB_REQUEST_LINE}\" \"(?:-|%{DATA:user_agent})\" (?:-|%{NOTSPACE:ssl_cipher}) (?:-|%{NOTSPACE:ssl_protocol})
" ]
#match => ["message", "%{ELB_ACCESS_LOG} \"%{DATA:userAgent}\"( %{NOTSPACE:ssl_cipher} %{NOTSPACE:ssl_protocol})?"]
}
date {
match => [ "timestamp", "ISO8601" ]
}
geoip {
source => "clientip"
}
}
}
output {
if [type] == "elb" {
elasticsearch {
hosts => ["http://node:9200"]
index => "logstash-%{+YYYY.MM}"
user => "..."
password => "..."
}
}
}