我想将网站另存为txt文件。我设法使用以下代码与各种网站合作(感谢S.O.贡献者):
import urllib.request
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
url = "https://stackoverflow.com/questions/19285966/write-contents-of-url-request-to-file"
headers={'User-Agent':user_agent,}
request=urllib.request.Request(url,None,headers)
response = urllib.request.urlopen(request)
f = open("output.txt", "wb")
content = response.read()
f.write(content)
f.close()
print(content)
但是,虽然这适用于Google搜索网址,但生成的txt文件似乎不包含页面中所需的信息。以下是我要保存的URL链接示例: https://www.google.com/search?tbm=fin&ei=mOVMWszfDqzIgAaH54OoAg&q=JSE%3Agfi&oq=JSE%3Agfi&gs_l=finance-immersive.3..81i8k1.355270.355832.0.355925.3.3.0.0.0.0.241.241.2-1.1.0....0...1c..64.finance-immersive..2.1.240....0.7I3B1yH_BUk#scso=uid__eZMWrSaGqfUgAb6upTYDg_5:0,uid_GchNWpacMcKcgAbCyK7YBg_5:0&wptab=COMPANY
虽然页面中有“Revenue”,但txt文件中没有“Revenue”。这可能是谷歌设计的故意屏障,“google.com/search”网址不像其他普通链接一样保存吗?
答案 0 :(得分:0)
我不确定如何使用Python执行此操作,但如果您需要Excel / VBA解决方案,则可以运行下面的代码,它将执行您想要的操作。
Sub Sample()
Dim ie As Object
Dim retStr As String
Set ie = CreateObject("internetexplorer.application")
With ie
.Navigate "http://www.wikihow.com/Choose-an-Email-Address"
.Visible = True
End With
Do While ie.readystate <> 4: Wait 5: Loop
DoEvents
retStr = ie.document.body.innerText
'~> Write the above to a text file
Dim filesize As Integer
Dim FlName As String
'~~> Change this to the relevant path
FlName = "C:\your_path_here\Sample.Txt"
filesize = FreeFile()
Open FlName For Output As #filesize
Print #filesize, retStr
Close #filesize
End Sub
Private Sub Wait(ByVal nSec As Long)
nSec = nSec + Timer
While nSec > Timer
DoEvents
Wend
End Sub