这是access.log文件的内容之一:
{"time":"2016-01-13T06:40:40+00:00", "remote_addr":"109.100.96.13",
"remote_user":"-", "host":"myhost", "request":"GET /mypath/myurl/ HTTP/1.1",
"status":"502", "body_bytes_sent":"568",
"referer":"https://search.yahoo.com/", "user_agent":"Mozilla/5.0 (Windows NT
6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106
Safari/537.36", "request_time":"120.231", "handler":"hhvm", "country":"RO",
"android_dupe_bug":"", "ssl_cipher":"ECDHE-RSA-AES128-GCM-SHA256",
"ssl_protocol":"TLSv1.2"}
这是error.log
2016/01/13 07:09:35 [error] 9134#0: *677679 limiting requests, excess: 0.729
by zone "bots", client: 5.189.177.31, server: , request: "GET /cgi-bin
/index.cgi HTTP/1.1", host: "54.93.166.80"
我的任务是我被许多机器人访问过,但我不是100%。使用此命令:
pnl --yesterday--php --bots --fields ua | sort | uniq -c | sort -n
结果是我被超过40.000个机器人(谷歌,苹果,雅虎,......)访问过。 我该如何解决这个问题? THX
更新
这是nginx.conf文件:
## Ansible managed: /srv/hypernode/ansible-playbooks/roles/magweb/templates/nginx/nginx.conf modified on 2015-12-10 11:51:53 by jenkins on hn-control3
user app;
worker_processes 4;
pid /var/run/nginx.pid;
events {
worker_connections 768;
# multi_accept on;
}
http {
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
server_tokens off;
server_names_hash_bucket_size 64;
# server_name_in_redirect off;
# default: 1m, so required to allow photo uploads
# see ticket #1315276 --WdG
client_max_body_size 120m;
include /etc/nginx/mime.types;
default_type application/octet-stream;
# GeoIP support is included in the 12.04 nginx
# This enables logging, and the following:
# if ($geoip_country_code ~ (BR|CN|KR|RU) ) {
# return 403;
# }
geoip_country /usr/share/GeoIP/GeoIP.dat;
log_format hypernode '{'
'"time":"$time_iso8601", '
'"remote_addr":"$remote_addr", '
'"remote_user":"$remote_user", '
'"host":"$http_host", '
'"request":"$request", '
'"status":"$status", '
'"body_bytes_sent":"$body_bytes_sent", '
'"referer":"$http_referer", '
'"user_agent":"$http_user_agent", '
'"request_time":"$request_time", '
'"handler":"$log_handler", '
'"country":"$geoip_country_code", '
'"android_dupe_bug":"$android_log_dupe", '
'"ssl_cipher":"$ssl_cipher", '
'"ssl_protocol":"$ssl_protocol"'
'}';
access_log /var/log/nginx/access.log hypernode;
error_log /var/log/nginx/error.log;
gzip on;
gzip_disable "msie6";
gzip_min_length 1000;
gzip_proxied any;
gzip_types text/html text/xml text/plain text/css text/js application/xml application/x-javascript application/json;
# website has been administratively disabled for security reasons
# CDN compatibility, so that the proxied IP turns up in ACL and logs
include /etc/nginx/cdn.conf;
# Rate limit all non-google/bing bots
# https://gist.github.com/supairish/2951524
# (Alternatively, use key $binary_remote_addr to rate limit per ip.)
# This config shares the same rate for all bots.
# These regexes are case-insensitive. --WdG
map $http_user_agent $limit_bots {
default '';
~*(google|bing|pingdom|monitis.com|Zend_Http_Client) '';
~*(http|crawler|spider|bot|search|ForusP|Wget/|Python-urllib|PHPCrawl|bGenius) 'bot';
}
# An empty '$limit_bots' would disable rate limiting for this requests
limit_req_zone $limit_bots zone=bots:1m rate=1r/s;
limit_req_log_level error;
index index.html index.php;
include /etc/nginx/app/http.*;
include /etc/nginx/conf.d/*.conf;
include /etc/nginx/ssl/*.conf;
}