从网页上阅读源代码并从中提取一些数据

时间:2017-10-16 10:15:53

标签: regex vbscript

我想从网页上读取源代码并从中提取一些数据。 我在我的例子中使用RegEx来提取数据,但我没有得到 任何数据,也许这是由于unicode或模式不匹配? 当我用RegExBuddy测试这个模式时它匹配,但是在vbscript中没有? 也许,我会错过代码中的某些内容,或者我必须通过其他方式重写?

这是我的尝试:

Option Explicit
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = CreateObject("Wscript.Shell")
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "txt"
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile
end If

sSrcUrl = "https://fr.giveawayoftheday.com/"
Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
bGetAsAsync = False
oHTTP.open "GET", sSrcUrl, bGetAsAsync
oHTTP.send
If oHTTP.status <> 200 Then
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText
WScript.Quit
End If
Data = oHTTP.responseText
WriteLog Data,LogFile
wscript.echo Extract(Data)
'****************************************************************
Function Extract(Data)
    Dim oRE,oMatches,Match,Line
    set oRE = New RegExp
    oRE.IgnoreCase = True
    oRE.Global = True
    oRE.MultiLine = True
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(\r.*\n.*){17}</div>"
    set oMatches = oRE.Execute(Data)
    If not isEmpty(oMatches) then
        For Each Match in oMatches   
            Line = Match.Value
            Extract = Line
        Next
    End if 
End Function
'*****************************************************************
Sub WriteLog(strText,LogFile)
    Dim fs,ts 
    Const ForWriting = 2
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set ts = fs.OpenTextFile(LogFile,ForWriting,True,-1)
    ts.WriteLine strText
    ts.Close
End Sub
'*****************************************************************

所以我期望的结果是:

<div class="giveaway_wrap cf">
                <div class="giveaway_img">
                    <img src="https://giveawayoftheday.com/wp-content/uploads/2017/10/82810932353ab590bf475ea3980f3038.png" alt="Excel Url Validator 1.0 Giveaway" />
                    <div class="giveaway_label">
                        <a href="https://fr.giveawayoftheday.com/excel-url-validator-1-0/" class="label_link"></a>
                        <div class="old_price">$40.00</div>
                        <div class="free">
                            <span class="big">GRATUIT</span> aujourd’hui
                        </div>
                    </div>
                </div>
                <div class="over">
                    <div class="giveaway_title">
                        <a href="https://fr.giveawayoftheday.com/excel-url-validator-1-0/">Excel Url Validator 1.0</a>
                        <div class="giveaway_date">16 octobre 2017</div>
                    </div>
                    <div class="giveaway_descr">Excel Url Validator trouve des liens rompus dans les feuilles de calcul Excel.</div>
                </div>

1 个答案:

答案 0 :(得分:0)

我得到了这样的解决方案:

Option Explicit
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,Data
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = CreateObject("Wscript.Shell")
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "htm"
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile
end If

sSrcUrl = "https://fr.giveawayoftheday.com/"
Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
bGetAsAsync = False
oHTTP.open "GET", sSrcUrl, bGetAsAsync
oHTTP.send
If oHTTP.status <> 200 Then
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText
WScript.Quit
End If
Data = oHTTP.responseText
WriteLog Extract(Data),LogFile
wscript.echo Extract(Data)
'****************************************************************
Function Extract(Data)
    Dim oRE,oMatches,Match,Line
    set oRE = New RegExp
    oRE.IgnoreCase = True
    oRE.Global = True
    oRE.MultiLine = True
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(?:(?!""giveaway_counter first"">)[\s\S])*</div>"
    set oMatches = oRE.Execute(Data)
    If not isEmpty(oMatches) then
        For Each Match in oMatches   
            Line = Match.Value
            Extract = Line
        Next
    End if 
End Function
'*****************************************************************
Sub WriteLog(strText,LogFile)
    Dim fs,ts 
    Const ForWriting = 2
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set ts = fs.OpenTextFile(LogFile,ForWriting,True,-1)
    ts.WriteLine strText
    ts.Close
End Sub
'*****************************************************************

编辑于2017年10月29日

更新代码以弹出显示当天赠品的HTA文件

Option Explicit
Dim URL,fso,ws,LogFile,sSrcUrl,oHTTP,bGetAsAsync,HTA,Data
Set fso = CreateObject("Scripting.FileSystemObject")
Set ws = CreateObject("Wscript.Shell")
LogFile = Left(Wscript.ScriptFullName,InstrRev(Wscript.ScriptFullName, ".")) & "hta"
if fso.FileExists(LogFile) Then 
    fso.DeleteFile LogFile
end If

If IsInternetConnected = True Then
    If Lang = True Then
        sSrcUrl = "https://fr.giveawayoftheday.com/"
    Else
        sSrcUrl = "https://www.giveawayoftheday.com/"
    End if
End If

Set oHTTP = CreateObject("MSXML2.ServerXMLHTTP.6.0")
bGetAsAsync = False
oHTTP.open "GET", sSrcUrl, bGetAsAsync
oHTTP.send
If oHTTP.status <> 200 Then
WScript.Echo "unexpected status = " & oHTTP.status & vbCrLf & oHTTP.statusText
WScript.Quit
End If
Data = oHTTP.responseText
HTA = "<html>" & vbCrLf &_
"<title>Giveaway of the day by Hackoo</title>" & vbCrLf &_
"<head>" & vbCrLf &_
"<HTA:APPLICATION" & vbCrLf &_
  "APPLICATIONNAME=""GiveAway of the Day""" & vbCrLf &_
  "Icon=DxDiag.exe" & vbCrLf &_
  "BORDER=""thin""" & vbCrLf &_
  "MAXIMIZEBUTTON=""no""" & vbCrLf &_
  "MINIMIZEBUTTON=""no""" & vbCrLf &_
  "SCROLL=""no""" & vbCrLf &_
  "SINGLEINSTANCE=""yes""" & vbCrLf &_
  "CONTEXTMENU=""no""" & vbCrLf &_
  "SELECTION=""no""/>" & vbCrLf &_
"<SCRIPT language=""VBScript"">" & vbCrLf &_
"Sub Window_OnLoad" & vbCrLf &_
    "window.resizeTo 450,380" & vbCrLf &_
    "WindowLeft = (window.screen.availWidth - 450)" & vbCrLf &_  
    "WindowTop  = (window.screen.availHeight - 380)" & vbCrLf &_
    "window.moveTo WindowLeft, WindowTop" & vbCrLf &_
"end sub" & vbCrLf &_
"</script>" & vbCrLf &_
"</head>" & vbCrLf &_
"<center>" & vbCrLf &_
"<meta http-equiv=""Content-Type"" content=""text/html; charset=UTF-8"" />" & vbCrLf &_
"<meta http-equiv=""X-UA-Compatible"" content=""IE=edge"">" & vbCrLf &_
"<link rel=""stylesheet"" href=""https://www.giveawayoftheday.com/css/main.css"" />"
WriteLog HTA,LogFile
WriteLog Extract(Data),LogFile
WriteLog "</html>",LogFile
ws.run LogFile
'****************************************************************
Function Extract(Data)
    Dim oRE,oMatches,Match,Line
    set oRE = New RegExp
    oRE.IgnoreCase = True
    oRE.Global = True
    oRE.MultiLine = True
    oRE.Pattern = "<div class=""giveaway_wrap cf"">(?:(?!""giveaway_counter first"">)[\s\S])*</div>"
    set oMatches = oRE.Execute(Data)
    If not isEmpty(oMatches) then
        For Each Match in oMatches   
            Line = Match.Value
            Extract = Line
        Next
    End if 
End Function
'*****************************************************************
Sub WriteLog(strText,LogFile)
    Dim fs,ts 
    Const ForAppending = 8
    Set fs = CreateObject("Scripting.FileSystemObject")
    Set ts = fs.OpenTextFile(LogFile,ForAppending,True,-1)
    ts.WriteLine strText
    ts.Close
End Sub
'*****************************************************************
Function Lang()
Dim sComputer,oWMI,colOperatingSystems,oOS,iOSLang
    sComputer = "."
    Set oWMI = GetObject("winmgmts:" _
        & "{impersonationLevel=impersonate}!\\" _
        & sComputer _
        & "\root\cimv2")
Set colOperatingSystems = oWMI.ExecQuery _
        ("Select * from Win32_OperatingSystem")
For Each oOS in colOperatingSystems
    iOSLang = oOS.OSLanguage
Next
If (iOSLang = 1036) Then
    Lang = True
Else
    Lang = False
End If
End Function
'*****************************************************************
Function IsInternetConnected()
Dim MyLoop,strComputer,objPing,objStatus
IsInternetConnected = False
MyLoop = True
While MyLoop = True
    strComputer = "smtp.gmail.com"
    Set objPing = GetObject("winmgmts:{impersonationLevel=impersonate}!\\").ExecQuery _
    ("select * from Win32_PingStatus where address = '" & strComputer & "'")
    For Each objStatus in objPing
        If objStatus.Statuscode = 0 Then
            MyLoop = False
            IsInternetConnected = True
            Exit Function
        End If
    Next
    MsgBox "Check your internet connection !",vbExclamation,"Check your internet connection !"
    Pause(10) 'To sleep for 10 secondes
Wend
End Function
'******************************************************************
 Sub Pause(NSeconds)
    Wscript.Sleep(NSeconds*1000)
 End Sub
'******************************************************************

enter image description here