识别asp.net httphandler中的机器人?

时间:2014-11-18 10:22:48

标签: asp.net web-crawler httpmodule robot

我正在使用在web.config中注册为http模块的UrlRewriter,我有一些301重定向,当我通过网络浏览器请求页面时有效但是当请求来自谷歌机器人然后它返回404错误。< / p>

这是否意味着当通过抓取工具发出请求时,httpmodules不会被激活?即使请求来自爬虫,我如何确保我的301重定向被强制?

Imports System.Web

Imports System.IO

Public Class UrlRewriter     实现System.Web.IHttpModule

Public Sub Init(ByVal app As System.Web.HttpApplication) Implements System.Web.IHttpModule.Init
    AddHandler app.AuthorizeRequest, AddressOf AuthorizeRequest
End Sub

Public Sub Dispose() Implements System.Web.IHttpModule.Dispose
End Sub

Protected Sub AuthorizeRequest(ByVal sender As Object, ByVal e As EventArgs)

    Dim app As HttpApplication = CType(sender, HttpApplication)
    Dim host As Eteach.Entity.Host = Eteach.UI.Web.Host.Current(app.Context)

    Dim rewriteUrl As String = app.Context.Request.Path
    Dim reqPath As String = app.Context.Request.Path.ToLower
    Dim reqUrl As String = reqPath
    If app.Context.Request.QueryString.Count > 0 Then
        rewriteUrl &= "?" & app.Context.Request.QueryString.ToString
        reqUrl &= "?" & app.Context.Request.QueryString.ToString.ToLower
    End If

    If Not host Is Nothing AndAlso host.UrlRewrites.ContainsKey(reqUrl) Then
        Dim url As String = host.UrlRewrites(reqUrl)

        Dim qs As String = String.Empty
        If url.Contains("?") Then
            qs = url.Substring(url.IndexOf("?") + 1)
            url = url.Substring(0, url.IndexOf("?"))
        End If

        app.Context.Items("Page_RewriteUrl") = rewriteUrl
        app.Context.RewritePath(url, String.Empty, qs)
    ElseIf Not host Is Nothing AndAlso host.UrlRedirect301s.ContainsKey(reqUrl) Then
        app.Context.Response.Clear()
        app.Context.Response.StatusCode = 301
        app.Context.Response.Status = "301 Moved Permanently"
        app.Context.Response.AddHeader("Location", host.UrlRedirect301s(reqUrl))
        app.Context.Response.End()
    ElseIf Not host Is Nothing AndAlso host.UrlRedirects.ContainsKey(reqUrl) Then
        app.Context.Response.Clear()
        app.Context.Response.Redirect(host.UrlRedirects(reqUrl))
        app.Context.Response.End()
    ElseIf reqPath.EndsWith("/") AndAlso File.Exists(app.Context.Server.MapPath(reqPath & "default.aspx")) Then
        app.Context.Items("Page_RewriteUrl") = rewriteUrl
        If app.Context.Request.QueryString.Count > 0 Then
            app.Context.RewritePath(reqPath & "default.aspx", String.Empty, app.Context.Request.QueryString.ToString)
        Else
            app.Context.RewritePath(reqPath & "default.aspx", String.Empty, String.Empty)
        End If
    ElseIf reqPath.EndsWith("/default.aspx") Then
        Dim url As String = app.Context.Request.Path
        url = url.Substring(0, url.Length - 12)
        If app.Context.Request.QueryString.Count > 0 Then
            url &= "?" & app.Context.Request.QueryString.ToString
        End If

        app.Context.Response.Clear()
        app.Context.Response.StatusCode = 301
        app.Context.Response.Status = "301 Moved Permanently"
        app.Context.Response.AddHeader("Location", url)
        app.Context.Response.End()

    ElseIf reqPath.StartsWith("/cms/" & host.ID.ToString & "/") Then
        Dim url As String = app.Context.Request.Path
        url = url.Remove(0, 6 + host.ID.ToString.Length)
        url = url.Insert(0, "/DataFiles/CMS/" & host.ID.ToString & "/pages/")

        app.Context.Items("Page_RewriteUrl") = rewriteUrl
        If app.Context.Request.QueryString.Count > 0 Then
            app.Context.RewritePath(url, String.Empty, app.Context.Request.QueryString.ToString)
        Else
            app.Context.RewritePath(url, String.Empty, String.Empty)
        End If
    End If
End Sub

结束班

0 个答案:

没有答案