Dim url: url = "http://some.url" 'set your page url here'
With WScript.CreateObject("InternetExplorer.Application", "IE_")
.Visible = False
.Navigate url
Do
WScript.Sleep 100
Loop While .ReadyState < 4 And .Busy
Dim data: data = .Document.Body.innerText
With CreateObject("ADODB.Stream")
.Open
.Type = 2 'adTypeText'
.Position = 0
.Charset = "utf-8"
.WriteText data
.SaveToFile "output.txt", 2
.Close
End With
.Quit
End With
我发现了这一点,但正确的代码会有所帮助。
答案 0 :(得分:2)
你可以试试这样的东西!
您可以在HTML和文本模式下保存此页面;)
Const TriStateTrue = -1 ' Pour la prise en charge de l'Unicode
URL = InputBox("Entrez l'URL pour y extraire son Code Source HTML "&vbcr&vbcr&_
"Exemple ""http://www.google.fr""","Extraction du Code Source © Hackoo © 2013","http://stackoverflow.com/questions/29597909/saving-visible-text-on-web-page-using-vbs")
If URL = "" Then WScript.Quit
Titre = "Extraction du Code Source de " & URL
Set ie = CreateObject("InternetExplorer.Application")
Set objFSO = CreateObject("Scripting.FileSystemObject")
ie.Navigate(URL)
ie.Visible=false
DO WHILE ie.busy
LOOP
DataHTML = ie.document.documentElement.innerHTML
DataTxt = ie.document.documentElement.innerText
strFileHTML = "CodeSourceHTML.txt"
strFileTxt = "CodeSourceTxt.txt"
Set objHTMLFile = objFSO.OpenTextFile(strFileHTML,2,True, TriStateTrue)
objHTMLFile.WriteLine(DataHTML)
objHTMLFile.Close
Set objTxtFile = objFSO.OpenTextFile(strFileTxt,2,True, TriStateTrue)
objTxtFile.WriteLine(DataTxt)
objTxtFile.Close
ie.Quit
Set ie=Nothing
Ouvrir(strFileHTML)
Ouvrir(strFileTxt)
wscript.Quit
'*************************************************
Function Ouvrir(File)
Set ws=CreateObject("wscript.shell")
ws.run "Notepad.exe "& File,1,False
end Function
'*************************************************
答案 1 :(得分:0)
Sub HttpGet
On Error Resume Next
' Have to use MSXML2 as Microsoft.XMLHTTP caused Access Denied errors after the page had been repeatedly gotten, go figure that one
' Set File = WScript.CreateObject("MSXML2.ServerXMLHTTP.4.0")
Set File = WScript.CreateObject("Microsoft.XMLHTTP")
File.Open "GET", Arg(1), False
File.setRequestHeader "User-Agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 1.1.4322; .NET CLR 3.5.30729; .NET CLR 3.0.30618; .NET4.0C; .NET4.0E; BCD2000; BCD2000)"
File.Send
txt=File.ResponseText
'Putting in line endings
Outp.write txt
If err.number <> 0 then
Outp.writeline ""
Outp.writeline "Error getting file"
Outp.writeline "=================="
Outp.writeline ""
Outp.writeline "Error " & err.number & "(0x" & hex(err.number) & ") " & err.description
Outp.writeline "Source " & err.source
Outp.writeline ""
Outp.writeline "HTTP Error " & File.Status & " " & File.StatusText
Outp.writeline File.getAllResponseHeaders
Outp.writeline LCase(Arg(1))
End If
End Sub
'=============================================
Sub RemoveHTMLTags
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = 0
ie.Silent = 1
ie.Navigate2 "file://" & FilterPath & "Filter.html"
Do
wscript.sleep 50
Loop Until ie.document.readystate = "complete"
ie.document.body.innerhtml = Inp.readall
Outp.write ie.document.body.innertext
' ie.quit
End Sub