我正在尝试从以下网页中提取特定的文字字符串:
http://comptroller.texas.gov/taxinfo/salestax/collections1504.html
1504表示年份月份月份,我希望将此数字减少到0504(2005年4月:http://comptroller.texas.gov/taxinfo/salestax/collections0504.html)。
我不知道如何将此字符串复制/粘贴120次,我想知道如何将其输入VBA并让代码为我执行此操作。
如果您访问1504和0504之间的任何链接,我要查找的字符串紧跟在第一个“$”之后,直到$结尾(9个字符)。
提前谢谢!
以下是我在一些研究中找到的代码:
Sub Macro5()
'
' Macro5 Macro
'
'
Dim Erw, firstRow, lastRow
firstRow = 1
Last Row = Range("B" & Rows.Count).End(xlUp).Row
For Erw = firstRow To lastRow
Dim newRow
newRow = firstRow + 4
Range("B" & newRow).Select
ActiveCell.FormulaR1C1 = Range("B" & newRow)
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;ActiveCell.FormulaR1C1", _
Destination:=Range("$D$5"))
.Name = "collections1504_1"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingNone
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
nextRow = nextRow + 1
Next Erw
Range("D3").Select
Selection.Copy
Range("C5").Select
Range("D3").Select
Application.CutCopyMode = False
Selection.Copy
Range("C5").Select
Selection.PasteSpecial Paste:=xlPasteValues, Operation:=xlNone, SkipBlanks _
:=False, Transpose:=False
Range("D5:P143").Select
Application.CutCopyMode = False
Selection.QueryTable.Delete
Selection.ClearContents
End Sub
答案 0 :(得分:1)
答案 1 :(得分:0)
我也希望直接扫描网页内容。我的方法以电子表格形式将所有结果放在电子表格中。这是实现您正在寻找的目标的另一种方式:
Option Explicit
Sub GetSalesTaxData()
Dim ie As InternetExplorer
Dim taxMonth As Date
Dim url As String
Dim urlLeader As String
Dim prefix As String
Dim pos1 As Integer
Dim pos2 As Integer
Dim taxStr As String
Dim dest As Range
Dim rowOffset As Integer
taxMonth = DateValue("4/1/2015")
urlLeader = "http://comptroller.texas.gov/taxinfo/salestax/collections"
prefix = "deposited to general revenue totaled"
Set dest = Range("A1")
rowOffset = 0
Set ie = New InternetExplorer
ie.Visible = False
Do While taxMonth > DateValue("3/1/2005")
url = urlLeader & Right(year(taxMonth), 2) & Format(Int(month(taxMonth)), "00") & ".html"
ie.Navigate url
Do While ie.ReadyState <> READYSTATE_COMPLETE
DoEvents
Loop
pos1 = InStr(1, ie.Document.body.innerhtml, prefix, vbTextCompare) + Len(prefix) + 1
pos2 = InStr(pos1, ie.Document.body.innerhtml, "million", vbTextCompare)
taxStr = Mid(ie.Document.body.innerhtml, pos1, (pos2 - pos1 - 1))
'--- basic string clean up: strip the leading '$' and the comma
taxStr = Replace(taxStr, "$", "", , , vbTextCompare)
taxStr = Replace(taxStr, ",", "", , , vbTextCompare)
' on one of the answers there is a trailing '.' for some reason
If Right(taxStr, 1) = "." Then
taxStr = Left(taxStr, Len(taxStr) - 1)
End If
'--- store it in the worksheet
dest.Cells(1 + rowOffset, 1).Value = taxMonth
dest.Cells(1 + rowOffset, 2).Value = CDbl(taxStr)
rowOffset = rowOffset + 1
' decrement the date by one month
taxMonth = DateAdd("m", -1, taxMonth)
Loop
Set ie = Nothing
End Sub