我几个月来一直在拼命地尝试自动化一个过程,在这个过程中,csv文件被下载,编辑并保存在给定的位置。 到目前为止,我只使用excel vba来打开网页并点击底部下载csv文件,但代码停止并需要手动干预才能完成,我希望它尽可能完全自动化。 看到使用的代码(我不是作者):
Sub WebDataExtraction()
Dim URL As String
Dim IeApp As Object
Dim IeDoc As Object
Dim ieForm As Object
Dim ieObj As Object
Dim objColl As Collection
URL = "http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT"
Set IeApp = CreateObject("InternetExplorer.Application")
IeApp.Visible = True
IeApp.Navigate URL
Do Until IeApp.ReadyState = READYSTATE_COMPLETE
Loop
Set IeDoc = IeApp.Document
For Each ele In IeApp.Document.getElementsByTagName("span")
If ele.innerHTML = "CSV" Then
Application.Wait (Now + TimeValue("0:00:15"))
DoEvents
ele.Click
'At this point you need to Save the document manually
' or figure out for yourself how to automate this interaction.
Test_Save_As_Set_Filename
File_Download_Click_Save
End If
Next
IeApp.Quit
End Sub"
提前致谢
南兹奥
答案 0 :(得分:1)
我发布了第二个答案,因为我认为我的第一个答案对于许多类似的应用程序来说已经足够了,因此在这种情况下它不起作用。
为什么其他方法失败:
.Click
方法:这会引发一个新窗口,需要在运行时输入用户,似乎不可能使用WinAPI
来控制此窗口。或者,至少我不能确定任何方式。代码执行在.Click
行停止,直到用户手动干预,无法使用GoTo
或Wait
或任何其他方法来规避此行为。WinAPI
函数直接下载源文件不起作用,因为按钮的URL不包含文件,而是包含动态提供文件的js函数。 以下是我建议的解决方案解决方案:
您可以阅读网页的.body.InnerText
,使用FileSystemObject
将其写入纯文本/ csv文件,然后使用Regular Expressions
和字符串操作的组合,将数据解析为正确分隔的CSV文件。
Sub WebDataExtraction()
Dim url As String
Dim fName As String
Dim lnText As String
Dim varLine() As Variant
Dim vLn As Variant
Dim newText As String
Dim leftText As String
Dim breakTime As Date
'## Requires reference to Microsoft VBScript Regular Expressions 5.5
Dim REMatches As MatchCollection
Dim m As Match
'## Requires reference to Microsoft Internet Controls
Dim IeApp As InternetExplorer
'## Requires reference to Microsoft HTML object library
Dim IeDoc As HTMLDocument
Dim ele As HTMLFormElement
'## Requires reference to Microsoft Scripting Runtime
Dim fso As FileSystemObject
Dim f As TextStream
Dim ln As Long: ln = 1
breakTime = DateAdd("s", 60, Now)
url = "http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT"
Set IeApp = CreateObject("InternetExplorer.Application")
With IeApp
.Visible = True
.Navigate url
Do Until .ReadyState = 4
Loop
Set IeDoc = .Document
End With
'Wait for the data to display on the page
Do
If Now >= breakTime Then
If MsgBox("The website is taking longer than usual, would you like to continue waiting?", vbYesNo) = vbNo Then
GoTo EarlyExit
Else:
breakTime = DateAdd("s", 60, Now)
End If
End If
Loop While Trim(IeDoc.body.innerText) = "XML CSV Please Wait Data Loading Sorting"
'## Create the text file
fName = ActiveWorkbook.Path & "\exported-csv.csv"
Set fso = CreateObject("Scripting.FileSystemObject")
Set f = fso.OpenTextFile(fName, 2, True, -1)
f.Write IeDoc.body.innerText
f.Close
Set f = Nothing
'## Read the text file
Set f = fso.OpenTextFile(fName, 1, False, -1)
Do
lnText = f.ReadLine
'## The data starts on the 4th line in the InnerText.
If ln >= 4 Then
'## Return a collection of matching date/timestamps to which we can parse
Set REMatches = SplitLine(lnText)
newText = lnText
For Each m In REMatches
newText = Replace(newText, m.Value, ("," & m.Value & ","), , -1, vbTextCompare)
Next
'## Get rid of consecutive delimiters:
Do
newText = Replace(newText, ",,", ",")
Loop While InStr(1, newText, ",,", vbBinaryCompare) <> 0
'## Then use some string manipulation to parse out the first 2 columns which are
' not a match to the RegExp we used above.
leftText = Left(newText, InStr(1, newText, ",", vbTextCompare) - 1)
leftText = Left(leftText, 10) & "," & Right(leftText, Len(leftText) - 10)
newText = Right(newText, Len(newText) - InStr(1, newText, ",", vbTextCompare))
newText = leftText & "," & newText
'## Store these lines in an array
ReDim Preserve varLine(ln - 4)
varLine(ln - 4) = newText
End If
ln = ln + 1
Loop While Not f.AtEndOfStream
f.Close
'## Re-open the file for writing the delimited lines:
Set f = fso.OpenTextFile(fName, 2, True, -1)
'## Iterate over the array and write the data in CSV:
For Each vLn In varLine
'Omit blank lines, if any.
If Len(vLn) <> 0 Then f.WriteLine vLn
Next
f.Close
EarlyExit:
Set fso = Nothing
Set f = Nothing
IeApp.Quit
Set IeApp = Nothing
End Sub
Function SplitLine(strLine As String) As MatchCollection
'returns a RegExp MatchCollection of Date/Timestamps found in each line
'## Requires reference to Microsoft VBScript Regular Expressions 5.5
Dim RE As RegExp
Dim matches As MatchCollection
Set RE = CreateObject("vbscript.regexp")
With RE
.MultiLine = False
.Global = True
.IgnoreCase = True
'## Use this RegEx pattern to parse the date & timestamps:
.Pattern = "(19|20)\d\d[-](0[1-9]|1[012])[-](0[1-9]|[12][0-9]|3[01])[ ]\d\d?:\d\d:\d\d"
End With
Set matches = RE.Execute(strLine)
Set SplitLine = matches
End Function
答案 1 :(得分:0)
修改强>
我使用网址测试了原始答案代码:
http://www.bmreports.com/bsp/BMRSSystemData.php?pT=DDAD&zT=N&dT=NRT#saveasCSV
但是对于这个网站来说,这种方法似乎不起作用。 ele.Click
似乎没有启动下载,只是打开网页上的数据表格。要下载,您需要右键单击/另存为。如果你已经走得那么远(我怀疑,基于你正在调用的子程序,但你没有提供代码),那么你可以使用Win API来获取Save对话框的HWND并可能自动化事件。 Santosh提供了一些相关信息:
VBA - Go to website and download file from save prompt
这也是一个有助于解决问题的好资源:
原始答案
如果您能够确定CSV的URL,则可以使用此子例程打开与CSV数据的连接,并将其直接导入工作簿。您可能需要对导入的数据自动执行文本到列操作,但可以使用宏录制器轻松复制。我在下面的Test()
子例程中给出了一个例子。
您可以轻松修改此项以在新工作簿中添加QueryTables
,然后自动执行该工作簿上的SaveAs
方法以将文件另存为CSV。
此示例使用Yahoo Finance,Ford Motor Company的已知URL,并将在活动工作表的单元格QueryTables
中添加带有CSV数据的A1
。这可以很容易地修改,以将其放在另一个工作表,另一个工作簿等。
Sub Test()
Dim MyURL as String
MyURL = "http://ichart.finance.yahoo.com/table.csv?s=GM&a0&b=1&c2010&d=05&e=20&f=2013&g=d&ignore=.csv"
OpenURL MyURL
'Explode the CSV data:
Range("A:A").TextToColumns Destination:=Range("A1"), DataType:=xlDelimited, _
TextQualifier:=xlDoubleQuote, ConsecutiveDelimiter:=False, Tab:=False, _
Semicolon:=False, Comma:=True, Space:=False, Other:=False, FieldInfo _
:=Array(Array(1, 3), Array(2, 1), Array(3, 1), Array(4, 1), Array(5, 1), Array(6, 1), _
Array(7, 1)), TrailingMinusNumbers:=True
End Sub
Private Sub OpenURL(fullURL As String)
'This opens the CSV in querytables connection.
On Error GoTo ErrOpenURL
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;" & fullURL, Destination:=Range("A1"))
.Name = fullURL
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = True
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
ExitOpenURL:
Exit Sub 'if all goes well, you can exit
'Error handling...
ErrOpenURL:
Err.Clear
bCancel = True
Resume ExitOpenURL
End Sub