清漆缓存 - 无法处理4000个并发用户

时间:2016-08-08 12:17:23

标签: varnish varnish-vcl

在WP站点上加载~4000个并发用户时遇到问题。 这是我的配置:

F5负载均衡器--->清漆4,8芯,32 Gb RAM ---> 9个后端,4个内核,每个16个RAM,运行WP站点。

虽然负载大约为2500-3000个用户,但一切正常,没有任何错误,但当用户达到4k时,varnish会停止响应,直到它计算所有排队的请求,加上我们看到很多502错误。

有2个池,每个5000个线程;的malloc = 30G

Additionaly将SOMAXCONN和TCP_MAX_SYN_Backlog添加到sysctl

这是VCL:

  vcl 4.0;
import directors;
import std;
backend qa2 { .host = "xxx"; .port = "80"; }
backend qa3 { .host = "xxx"; .port = "80"; }
backend qa4 { .host = "xxx"; .port = "80"; }
backend qa5 { .host = "xxx"; .port = "80"; }
backend qa6 { .host = "xxx"; .port = "80"; }
backend qa7 { .host = "xxx"; .port = "80"; }
backend qa8 { .host = "xxx"; .port = "80"; }
backend qa9 { .host = "xxx"; .port = "80"; }
backend qa10 { .host = "xxx"; .port = "80"; }

# .connect_timeout = 2s; .first_byte_timeout = 10m; .between_bytes_timeout = 10m;

acl purge_list {
    "xxx";
    "xxx";
    "xxx";
    "xxx";
    "xxx";
    "xxx";
    "xxx";
    "xxx";
    "xxx";
    "xxx";
}
sub vcl_init {
    new rr = directors.round_robin();
    rr.add_backend(qa2);
    rr.add_backend(qa3);
    rr.add_backend(qa4);
    rr.add_backend(qa5);
    rr.add_backend(qa6);
    rr.add_backend(qa7);
    rr.add_backend(qa8);
    rr.add_backend(qa9);
    rr.add_backend(qa10);
}

sub vcl_recv {
  set req.backend_hint = rr.backend();
 if (req.method == "PURGE") {
        if (!client.ip ~ purge_list) {
            return(synth(405, "not allowed."));
        }
        ban("req.url ~ .css");
        return(synth(200, "CSS Files Cleared from Cache!"));
  }
  # Don't check cache for POSTs and various other HTTP request types
  if (req.method != "GET" && req.method != "HEAD") {
    #ban("req.http.host == " + req.http.host);
    return(pass);

  }
  # Don't check cache for POSTs and various other HTTP request types
   if (req.http.Cookie ~ "SESS[a-f|0-9]+" || 
    req.http.Authorization || 
    req.url ~ "login" ||
    req.method == "POST" ||
    req.http.Cookie || 
    req.url ~ "/wp-(login|admin)") {
    return (pass);
   }
  if (req.http.Accept-Encoding) {
    if (req.url ~ "\.(jpg|png|gif|gz|tgz|bz2|tbz|mp3|ogg)$") {
          unset req.http.Accept-Encoding;
      } elsif (req.http.Accept-Encoding ~ "gzip") {
          set req.http.Accept-Encoding = "gzip";
      } elsif (req.http.Accept-Encoding ~ "deflate") {
          set req.http.Accept-Encoding = "deflate";
      } else {
          unset req.http.Accept-Encoding;
      }
  }

  if (req.url ~ "\.(aif|aiff|au|avi|bin|bmp|cab|carb|cct|cdf|class|css)$"  ||
      req.url ~ "\.(dcr|doc|dtd|eps|exe|flv|gcf|gff|gif|grv|hdml|hqx)$"    ||
      req.url ~ "\.(ico|ini|jpeg|jpg|js|mov|mp3|nc|pct|pdf|png|ppc|pws)$"  ||
      req.url ~ "\.(swa|swf|tif|txt|vbs|w32|wav|wbmp|wml|wmlc|wmls|wmlsc)$"||
      req.url ~ "\.(xml|xsd|xsl|zip|woff)($|\?)") {
      unset req.http.Cookie;
      #unset req.http.Authorization; 
      #unset req.http.Authenticate; 
      return (hash);
  }

  return(hash);
}

# Cache hit: the object was found in cache
sub vcl_hit {
    if (req.method == "PURGE") {
        return (synth(200, "Purged!"));
    }
}
# Cache miss: request is about to be sent to the backend
sub vcl_miss {
    if (req.method == "PURGE") {
        return (synth(200, "Purged (Not in cache)"));
    }
}
sub vcl_backend_response {
  if (bereq.url ~ "\.(aif|aiff|au|avi|bin|bmp|cab|carb|cct|cdf|class|css)$"  ||
      bereq.url ~ "\.(dcr|doc|dtd|eps|exe|flv|gcf|gff|gif|grv|hdml|hqx)$"    ||
      bereq.url ~ "\.(ico|ini|jpeg|jpg|js|mov|mp3|nc|pct|pdf|png|ppc|pws)$"  ||
      bereq.url ~ "\.(swa|swf|tif|txt|vbs|w32|wav|wbmp|wml|wmlc|wmls|wmlsc)$"||
      bereq.url ~ "\.(xml|xsd|xsl|zip|woff)($|\?)") {
    set beresp.grace = 30s;
    set beresp.ttl = 1d;
    set beresp.http.Cache-Control = "public, max-age=600";
    set beresp.http.expires = beresp.ttl;  
    return (deliver);
  }
}
# Deliver the response to the client
sub vcl_deliver {
  # Add an X-Cache diagnostic header
  if (obj.hits > 0) {
    set resp.http.X-Cache = "HIT";
    set resp.http.X-Cache-Hits = obj.hits;
    # Don't echo cached Set-Cookie headers
    unset resp.http.Set-Cookie;
  } else {
    set resp.http.X-Cache = "MISS";
  }
  # Remove some headers not needed on production systems
  #  unset resp.http.Via;
  #  unset resp.http.X-Generator;
  #  return(deliver);
}* 

以下是上次测试的结果:

enter image description here

enter image description here

实际上响应时间很好,但是吞吐量很差,正如我已经写过的那样,Varnish会冻结,直到它完成解决所有先前的请求。

所以问题是 - 清漆并发用户是否存在理论上的限制?如何调整它以使用超过4k的并发连接?

PS。还在每个Apache服务器上扩展了MaxClients。

2 个答案:

答案 0 :(得分:0)

Varnish永远不会产生502返回代码,这意味着你很可能不会缓存响应。

您很可能会改为对后端进行基准测试。

并发用户数没有内置限制。你的线程数看起来很好。对于仅4000个会话,您不需要进行任何内核/操作系统调整,默认值应该没问题。如果你点击了somaxconn,这很可能是你的基准测试工具的一个神器,而且可能不是实际流量的情况。

总结:检查命中率并查看varnishlog以找出未缓存内容的原因。

答案 1 :(得分:0)

502 Bad Gateway意味着你的后端以某种方式失败,而且Varnish无法连接,这可能就是为什么它会被一个巨大的队列冻结。

我注意到你在vcl_recv中传递了任何带有设置cookie的东西:

browserHistory.push('/bands/<newbandid>/songs')

特别是使用Wordpress,这会破坏你的命中率,并导致大量的后端连接,因为清漆不会缓存cookie。

以下是我们使用vcl_recv处理Cookie的一些示例代码:

  # Don't check cache for POSTs and various other HTTP request types
   if (req.http.Cookie ~ "SESS[a-f|0-9]+" || 
    req.http.Authorization || 
    req.url ~ "login" ||
    req.method == "POST" ||
*** req.http.Cookie || 
    req.url ~ "/wp-(login|admin)") {
    return (pass);
   }

这应该足以添加到vcl_backend_response:

# Pass if the page is login, admin, preview, search or xmlrpc
if (req.url ~ "wp-(login|admin|json)" || req.url ~ "preview=true" || req.url ~ "\?s=" || req.url ~ "xmlrpc.php" || req.http.Cookie ~ "wordpress_logged_in_") {
    return (pass);
}

# Some wordpress URL manipulation
if (req.url ~ "\?(utm_(campaign|medium|source|term)|adParams|client|cx|eid|fbid|feed|ref(id|src)?|v(er|iew))=") {
    set req.url = regsub(req.url, "\?.*$", "");
}

# Some generic URL manipulation, useful for all templates that follow
# First remove the Google Analytics added parameters, useless for our backend
if (req.url ~ "(\?|&)(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=") {
    set req.url = regsuball(req.url, "&(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)", "");
    set req.url = regsuball(req.url, "\?(utm_source|utm_medium|utm_campaign|utm_content|gclid|cx|ie|cof|siteurl)=([A-z0-9_\-\.%25]+)", "?");
    set req.url = regsub(req.url, "\?&", "?");
    set req.url = regsub(req.url, "\?$", "");
}

# Strip hash, server doesn't need it.
if (req.url ~ "\#") {
    set req.url = regsub(req.url, "\#.*$", "");
}

# Strip a trailing ? if it exists
if (req.url ~ "\?$") {
    set req.url = regsub(req.url, "\?$", "");
}

# Remove the wp-settings-1 cookie
set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-1=[^;]+(; )?", "");

# Remove the wp-settings-time-1 cookie
set req.http.Cookie = regsuball(req.http.Cookie, "wp-settings-time-1=[^;]+(; )?", "");

# Remove the wp test cookie
set req.http.Cookie = regsuball(req.http.Cookie, "wordpress_test_cookie=[^;]+(; )?", "");

# Remove the cloudflare cookie
set req.http.Cookie = regsuball(req.http.Cookie, "__cfduid=[^;]+(; )?", "");

# Remove the PHPSESSID in members area cookie
set req.http.Cookie = regsuball(req.http.Cookie, "PHPSESSID=[^;]+(; )?", "");

# Remove the Quant Capital cookies (added by some plugin, all __qca)
set req.http.Cookie = regsuball(req.http.Cookie, "__qc.=[^;]+(; )?", "");

# Are there cookies left with only spaces or that are empty?
if (req.http.cookie ~ "^\s*$") {
    unset req.http.cookie;
}

# Unset Cookies except for WordPress admin and WooCommerce pages 
if (!(req.url ~ "(wp-login|wp-admin|cart|my-account/*|checkout|addons|wordpress-social-login|wp-login\.php|forumPM|members|logout|lost-password|product/*)")) {
    unset req.http.cookie; 
}