使用VBS在网页上保存可见文本

时间:2015-04-13 04:35:20

标签: vbscript

Dim url: url = "http://some.url" 'set your page url here'
With WScript.CreateObject("InternetExplorer.Application", "IE_")
.Visible = False
.Navigate url
Do
    WScript.Sleep 100
Loop While .ReadyState < 4 And .Busy
Dim data: data = .Document.Body.innerText
With CreateObject("ADODB.Stream")
    .Open
    .Type     = 2 'adTypeText'
    .Position = 0
    .Charset  = "utf-8"
    .WriteText data
    .SaveToFile "output.txt", 2
    .Close
End With
.Quit
End With

我发现了这一点,但正确的代码会有所帮助。

2 个答案:

答案 0 :(得分:2)

你可以试试这样的东西!

您可以在HTML和文本模式下保存此页面;)

Const TriStateTrue = -1 ' Pour la prise en charge de l'Unicode
URL = InputBox("Entrez l'URL pour y extraire son Code Source HTML "&vbcr&vbcr&_
"Exemple ""http://www.google.fr""","Extraction du Code Source © Hackoo © 2013","http://stackoverflow.com/questions/29597909/saving-visible-text-on-web-page-using-vbs")
If URL = "" Then WScript.Quit
Titre = "Extraction du Code Source de " & URL
Set ie = CreateObject("InternetExplorer.Application")
Set objFSO = CreateObject("Scripting.FileSystemObject")
ie.Navigate(URL)
ie.Visible=false
DO WHILE ie.busy
LOOP
DataHTML = ie.document.documentElement.innerHTML
DataTxt = ie.document.documentElement.innerText
strFileHTML = "CodeSourceHTML.txt"
strFileTxt = "CodeSourceTxt.txt"
Set objHTMLFile = objFSO.OpenTextFile(strFileHTML,2,True, TriStateTrue)
objHTMLFile.WriteLine(DataHTML)
objHTMLFile.Close
Set objTxtFile = objFSO.OpenTextFile(strFileTxt,2,True, TriStateTrue)
objTxtFile.WriteLine(DataTxt)
objTxtFile.Close
ie.Quit
Set ie=Nothing
 Ouvrir(strFileHTML)
 Ouvrir(strFileTxt)
wscript.Quit
'*************************************************
Function Ouvrir(File)
    Set ws=CreateObject("wscript.shell")
    ws.run "Notepad.exe "& File,1,False
end Function
'*************************************************

答案 1 :(得分:0)

Sub HttpGet
On Error Resume Next
'   Have to use MSXML2 as Microsoft.XMLHTTP caused Access Denied errors after the page had been repeatedly gotten, go figure that one
'   Set File = WScript.CreateObject("MSXML2.ServerXMLHTTP.4.0")
    Set File = WScript.CreateObject("Microsoft.XMLHTTP")
    File.Open "GET", Arg(1), False
    File.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 1.1.4322; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C; .NET4.0E; BCD2000; BCD2000)"
    File.Send
    txt=File.ResponseText
    'Putting in line endings
    Outp.write txt
    If err.number <> 0 then 
        Outp.writeline "" 
        Outp.writeline "Error getting file" 
        Outp.writeline "==================" 
        Outp.writeline "" 
        Outp.writeline "Error " & err.number & "(0x" & hex(err.number) & ") " & err.description 
        Outp.writeline "Source " & err.source 
        Outp.writeline "" 
        Outp.writeline "HTTP Error " & File.Status & " " & File.StatusText
        Outp.writeline  File.getAllResponseHeaders
        Outp.writeline LCase(Arg(1))
    End If
End Sub

'=============================================

Sub RemoveHTMLTags
    Set ie = CreateObject("InternetExplorer.Application") 
    ie.Visible = 0
    ie.Silent = 1 
    ie.Navigate2 "file://" & FilterPath & "Filter.html"
    Do 
        wscript.sleep 50            

    Loop Until ie.document.readystate = "complete"
    ie.document.body.innerhtml = Inp.readall
    Outp.write ie.document.body.innertext
'   ie.quit
End Sub