下面我的nginx配置为嵌套页面返回404,例如/博客经过一些更改后,将静态页面提供给抓取工具。问题是由这一行引起的:
if ($request_uri ~* "([^/]*$)")
似乎这个测试实际上是在重写request_uri - 但我该如何解决?
完整配置
server {
root /home/user/code/app-client/dist;
index index.html index.htm;
listen 443 ssl;
server_name www.example.com;
listen [::]:80 default_server ipv6only=on;
access_log /home/user/code/app-server/log/nginx_access.log;
error_log /home/user/code/app-server/log/nginx_error.log info;
location / {
set $snapshot 0;
# Extract all text after last / from url
if ($request_uri ~* "([^/]*$)") {
set $last_path_component $1;
}
if ($last_path_component = "") {
set $last_path_component "home";
}
# Google crawler
if ($args ~* "(^|.*&)_escaped_fragment_=.*") {
set $snapshot 1;
}
# Social sharing bots
if ($http_user_agent ~* "Facebot|FacebookExternalHit|LinkedInBot|TwitterBot|Baiduspider") {
set $snapshot 2;
}
# Google crawler for pages other than homepage
if ($arg__escaped_fragment_) {
set $snapshot 3;
}
# Google crawl for nested pages e.g. article/the-title
if ($arg__escaped_fragment_ ~* "(/)") {
set $snapshot 2;
}
#excluded suffixes (assumed static). Broken into two conditionals for readability
if ($uri ~ "\.(jpe?g|png|svg|gif|ico|tiff?|css|less|js|doc|zip|rar|exe|iso|dmg)") {
set $snapshot 0;
}
if ($uri ~ "\.(dat|ppt|psd|pdf|xls|mp3|mp4|m4a|wav|avi|mpe?g|swf|flv|mkv|torrent)") {
set $snapshot 0;
}
#prevent loops
if ($http_x_ajs_calltype) {
set $snapshot 0;
}
# Google crawl for the homepage
if ($snapshot = 1) {
rewrite ^(.*)$ /snapshot-proxyhome.html last;
}
# Social media crawlers and crawler nested urls
if ($snapshot = 2) {
rewrite ^(.*)$ /snapshot-proxy$last_path_component.html last;
}
# Google crawl for pages (non-nested) other than homepage
if ($snapshot = 3) {
rewrite ^(.*)$ /snapshot-proxy$arg__escaped_fragment_.html last;
}
try_files $uri /index.html;
}
location /static {
root /home/user/code/app-server/;
}
location /assets {
root /home/user/code/app-client/dist/; # do nothing and let nginx handle this as usual
}
}
#redirect to https
server {
listen 80;
server_name www.app.com;
return 301 https://$server_name$request_uri;
}
答案 0 :(得分:2)
try_files $uri /index.html;
这应该对你有用..它应该尝试从磁盘获取文件,如果它找不到它,它将获得index.html
..我不知道为什么你有{ {1}}在您的配置中,但删除它应该可以解决您的问题。
答案 1 :(得分:0)
好的,最后用地图解决了。一些说明:
_
map $request_uri $last_path_component {
~*(?P<last>[^/]*$) $last;
}
server {
#your setup here
}
location / {
set $snapshot 0;
if ($last_path_component = "") {
set $last_path_component "home";
}
# Google crawler
if ($args ~* "(^|.*&)_escaped_fragment_=.*") {
set $snapshot 1;
}
# Social sharing bots
if ($http_user_agent ~* "Facebot|FacebookExternalHit|LinkedInBot|TwitterBot|Baiduspider") {
set $snapshot 2;
}
# Google crawler for pages other than homepage
if ($arg__escaped_fragment_) {
set $snapshot 3;
}
# Google crawl for nested pages e.g. article/the-title
if ($arg__escaped_fragment_ ~* "(/)") {
set $snapshot 2;
}
#excluded suffixes (assumed static). Broken into two conditionals for readability
if ($uri ~ "\.(jpe?g|png|svg|gif|ico|tiff?|css|less|js|doc|zip|rar|exe|iso|dmg)") {
set $snapshot 0;
}
if ($uri ~ "\.(dat|ppt|psd|pdf|xls|mp3|mp4|m4a|wav|avi|mpe?g|swf|flv|mkv|torrent)") {
set $snapshot 0;
}
#prevent loops
if ($http_x_ajs_calltype) {
set $snapshot 0;
}
# Google crawl for the homepage
if ($snapshot = 1) {
rewrite ^(.*)$ /snapshot-proxyhome.html last;
}
# Social media crawlers and crawler nested urls
if ($snapshot = 2) {
rewrite ^(.*)$ /snapshot-proxy$last_path_component.html last;
}
# Google crawl for pages (non-nested) other than homepage
if ($snapshot = 3) {
rewrite ^(.*)$ /snapshot-proxy$arg__escaped_fragment_.html last;
}
try_files $uri $uri/ /index.html =404;
}