我的端口80上的服务器上安装了清漆缓存,端口8080上的Apache作为内容服务器。如果我运行wget -mirror example.com,它应该遍历我的整个网站并创建清漆缓存,对吧?它不是。例如,通过运行wget -mirror example.com,我可以在输出中看到它已经通过example.com/abc.html运行。但是当我从浏览器访问example.com/abc.html时,从我的响应标题中我可以看到它正在返回Varnish MISS(这也需要很长时间)。但是,如果我通过浏览器再次访问相同的URL,这次生成了缓存,因为我可以在响应头中看到Varnish HIT
这是另一个有趣的事实:如果我只运行wget example.com/abc.html,它将创建清漆缓存!另一个有趣的事实:如果我运行wget --mirror example.com/abc.html它将为abc.html而不是更多页面创建清漆缓存
因此,出于某种原因使用--mirror example.com为第一页创建了清漆缓存,但没有为其他页创建。
如果它有任何区别,我正在使用Magento
我试过了: wget --mirror --no-http-keep-alive example.com 但它不起作用
这是我的清漆vcl
# This is a basic VCL configuration file for PageCache powered by Varnish for Magento module.
# default backend definition. Set this to point to your content server.
backend default {
.host = "127.0.0.1";
.port = "8080";
}
# admin backend with longer timeout values. Set this to the same IP & port as your default server.
backend admin {
.host = "127.0.0.1";
.port = "8080";
.first_byte_timeout = 18000s;
.between_bytes_timeout = 18000s;
}
# add your Magento server IP to allow purges from the backend
acl purge {
"localhost";
"127.0.0.1";
}
sub vcl_recv {
if (client.ip ~ purge) {
set req.hash_always_miss = true;
}
if (req.restarts == 0) {
if (req.http.x-forwarded-for) {
set req.http.X-Forwarded-For =
req.http.X-Forwarded-For + ", " + client.ip;
} else {
set req.http.X-Forwarded-For = client.ip;
}
}
if (req.request != "GET" &&
req.request != "HEAD" &&
req.request != "PUT" &&
req.request != "POST" &&
req.request != "TRACE" &&
req.request != "OPTIONS" &&
req.request != "DELETE" &&
req.request != "PURGE") {
/* Non-RFC2616 or CONNECT which is weird. */
return (pipe);
}
# purge request
if (req.request == "PURGE") {
if (!client.ip ~ purge) {
error 405 "Not allowed.";
}
ban("obj.http.X-Purge-Host ~ " + req.http.X-Purge-Host + " && obj.http.X-Purge-URL ~ " + req.http.X-Purge-Regex + " && obj.http.Content-Type ~ " + req.http.X-Purge-Content-Type);
error 200 "Purged.";
}
# switch to admin backend configuration
if (req.http.cookie ~ "adminhtml=") {
set req.backend = admin;
}
# we only deal with GET and HEAD by default
if (req.request != "GET" && req.request != "HEAD") {
return (pass);
}
# normalize url in case of leading HTTP scheme and domain
set req.url = regsub(req.url, "^http[s]?://[^/]+", "");
# static files are always cacheable. remove SSL flag and cookie
if (req.url ~ "^/(media|js|skin)/.*\.(png|jpg|jpeg|gif|css|js|swf|ico)$") {
unset req.http.Https;
unset req.http.Cookie;
}
# not cacheable by default
if (req.http.Authorization || req.http.Https) {
return (pass);
}
# do not cache any page from
# - index files
# - ...
if (req.url ~ "^/(index)") {
return (pass);
}
# as soon as we have a NO_CACHE cookie pass request
if (req.http.cookie ~ "NO_CACHE=") {
return (pass);
}
# normalize Aceept-Encoding header
# http://varnish.projects.linpro.no/wiki/FAQ/Compression
if (req.http.Accept-Encoding) {
if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
# No point in compressing these
remove req.http.Accept-Encoding;
} elsif (req.http.Accept-Encoding ~ "gzip") {
set req.http.Accept-Encoding = "gzip";
} elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
set req.http.Accept-Encoding = "deflate";
} else {
# unkown algorithm
remove req.http.Accept-Encoding;
}
}
# remove Google gclid parameters
set req.url = regsuball(req.url,"\?gclid=[^&]+$",""); # strips when QS = "?gclid=AAA"
set req.url = regsuball(req.url,"\?gclid=[^&]+&","?"); # strips when QS = "?gclid=AAA&foo=bar"
set req.url = regsuball(req.url,"&gclid=[^&]+",""); # strips when QS = "?foo=bar&gclid=AAA" or QS = "?foo=bar&gclid=AAA&bar=baz"
return (lookup);
}
# sub vcl_pipe {
# # Note that only the first request to the backend will have
# # X-Forwarded-For set. If you use X-Forwarded-For and want to
# # have it set for all requests, make sure to have:
# # set bereq.http.connection = "close";
# # here. It is not set by default as it might break some broken web
# # applications, like IIS with NTLM authentication.
# return (pipe);
# }
#
# sub vcl_pass {
# return (pass);
# }
#
sub vcl_hash {
hash_data(req.url);
if (req.http.host) {
hash_data(req.http.host);
} else {
hash_data(server.ip);
}
if (!(req.url ~ "^/(media|js|skin)/.*\.(png|jpg|jpeg|gif|css|js|swf|ico)$")) {
call design_exception;
}
return (hash);
}
#
# sub vcl_hit {
# return (deliver);
# }
#
# sub vcl_miss {
# return (fetch);
# }
sub vcl_fetch {
if (beresp.status == 500) {
set beresp.saintmode = 10s;
return (restart);
}
set beresp.grace = 5m;
# add ban-lurker tags to object
set beresp.http.X-Purge-URL = req.url;
set beresp.http.X-Purge-Host = req.http.host;
if (beresp.status == 200 || beresp.status == 301 || beresp.status == 404) {
if (beresp.http.Content-Type ~ "text/html" || beresp.http.Content-Type ~ "text/xml") {
if ((beresp.http.Set-Cookie ~ "NO_CACHE=") || (beresp.ttl < 1s)) {
set beresp.ttl = 0s;
return (hit_for_pass);
}
# marker for vcl_deliver to reset Age:
set beresp.http.magicmarker = "1";
# Don't cache cookies
unset beresp.http.set-cookie;
} else {
# set default TTL value for static content
set beresp.ttl = 4h;
}
return (deliver);
}
return (hit_for_pass);
}
sub vcl_deliver {
# debug info
if (resp.http.X-Cache-Debug) {
if (obj.hits > 0) {
set resp.http.X-Cache = "HIT";
set resp.http.X-Cache-Hits = obj.hits;
} else {
set resp.http.X-Cache = "MISS";
}
set resp.http.X-Cache-Expires = resp.http.Expires;
} else {
# remove Varnish/proxy header
remove resp.http.X-Varnish;
remove resp.http.Via;
remove resp.http.Age;
remove resp.http.X-Purge-URL;
remove resp.http.X-Purge-Host;
}
if (resp.http.magicmarker) {
# Remove the magic marker
unset resp.http.magicmarker;
set resp.http.Cache-Control = "no-store, no-cache, must-revalidate, post-check=0, pre-check=0";
set resp.http.Pragma = "no-cache";
set resp.http.Expires = "Mon, 31 Mar 2008 10:00:00 GMT";
set resp.http.Age = "0";
}
}
# sub vcl_error {
# set obj.http.Content-Type = "text/html; charset=utf-8";
# set obj.http.Retry-After = "5";
# synthetic {"
# <?xml version="1.0" encoding="utf-8"?>
# <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
# "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
# <html>
# <head>
# <title>"} + obj.status + " " + obj.response + {"</title>
# </head>
# <body>
# <h1>Error "} + obj.status + " " + obj.response + {"</h1>
# <p>"} + obj.response + {"</p>
# <h3>Guru Meditation:</h3>
# <p>XID: "} + req.xid + {"</p>
# <hr>
# <p>Varnish cache server</p>
# </body>
# </html>
# "};
# return (deliver);
# }
#
# sub vcl_init {
# return (ok);
# }
#
# sub vcl_fini {
# return (ok);
# }
sub design_exception {
}
编辑,答案:
我不知道是否添加了--no-cookies修复了它(不知道wget -mirror是否存储了cookie,如果确实存在,那么它会修复它)或者是否添加标题修复它,但是这可以工作并创建我可以通过浏览器看到的清漆缓存:
wget --spider --recursive --no-cookies --header "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" --header "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.3" --header "Accept-Language: en-US,en;q=0.8" --header "Cache-Control: max-age=0" --header "Connection: keep-alive" --header "Host: www.example.com" --header "User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.19 (KHTML, like Gecko) Ubuntu/10.04 Chromium/18.0.1025.168 Chrome/18.0.1025.168 Safari/535.19" www.example.com
第二次编辑,与上一个答案相关。重要
任何使用Magento的人都不要使用我的上述解决方案。由于--no-cookies,Magento最终会在每个请求的var / session文件夹下创建一个新的会话文件。这导致我的会话文件夹用每个wget运行的上述命令填充250,000个文件!这导致文件夹已满,因为Magento无法为他们创建任何更多的会话文件,所以我的客户都无法向他们的购物篮添加任何内容。我正在寻找更多我的问题的替代方案
答案 0 :(得分:1)
你应该看两点:
Accept-Encoding ~ 'gzip'
标头,因此如果您的蜘蛛不使用此标头,则不会使用缓存条目(一个体面的后端生成gzipped响应会添加一个vary:Accept-Encoding
标头)。