<div class="game_content">
<div class="game_table">
<article class="league_name">
<span class="flags flags-category flags--uefa-europa-league"></span>
<h6 class="hide-on-small-only hide-on-med-only hide_ipadLand"><a title="International">International</a></h6>
<h3><a href="/en/league-games/1/uefa-europa-league" title="UEFA Europa League">UEFA Europa League</a></h3>
</article> <div class="game_detailsCol">
<div class="col s2 m1 l1">
<span class="red-text"><span class="green-text bold">F</span><span class="grey-text">21:45</span></span>
</div>
<div class="col s3 m4 l5">
<div>
<p class="yellow_card">3</p>
</div>
<a href="/en/teams/Marseille" title="Marseille livescore">Marseille</a>
</div>
<div class="col s1 m1 l1">
<a href="/en/game-info/467162/marseille-atletico-madrid" title="Marseille-Atletico Madrid livescore" class="">
0-3 </a>
</div>
<div class="col s3 m4 l5">
<a href="/en/teams/Atletico+Madrid" title="Atletico Madrid livescore">Atletico Madrid</a>
<div>
<span class="grey-text">0-1</span>
<p class="yellow_card">2</p> </div>
</div>
<div class="col s2 m1 l1">
<i class="icon icon-star " game_id="467162"></i>
<a class="dropdown-button hide-on-small-only hide-on-med-only hide_ipadLand" data-activates="dropdown467162">
<i class="icon icon-more_vert"></i>
</a><ul id="dropdown467162" class="dropdown-content">
<li class="close_dropMenu"><strong>x</strong></li>
<li><a href="/en/h2h/467162/marseille-atletico-madrid" title="Head To Head">Head To Head</a></li>
</ul>
</div>
</div>
伙计们,
我是通过VBA从网站抓取信息的新手,因此需要帮助。 我想循环通过div class =&#34; game_content&#34;并在每个div类=&#34; game_table&#34;废弃以下信息:
FROM title="UEFA Europa League" GET "UEFA Europa League"
FROM title="Marseille livescore">Marseille GET "Marseille"
FROM title="Marseille-Atletico Madrid livescore" class="">0-3 GET 0-3
FROM title="Atletico Madrid livescore">Atletico Madrid GET Atletico Madrid
FROM <span class="grey-text">0-1</span> GET 0-1
我的帖子包括FIRST类=&#34; game_table FROM class =&#34; game_content&#34;
我的代码:
Sub InternetConnection()
Dim IE As InternetExplorer
Dim HTMLDocument As HTMLDocument
Dim GameList As IHTMLElement
Dim Games As IHTMLElementCollection
Dim Game As IHTMLElement
'Open Internet Explorer
Set IE = New InternetExplorer
IE.Visible = True
IE.Navigate ("https://azscore.com/en/yesterday")
Do While IE.readyState <> READYSTATE_COMPLETE
Application.StatusBar = "Loading..."
Loop
Set HTMLDocument = IE.document
Set GameList = HTMLDocument.getElementsByClassName("game_content")(0)
Set Games = GameList.Children
For Each Game In Games
If Game.className = "game_table" Then
End If
Next
End Sub
任何帮助都将受到赞赏。
答案 0 :(得分:1)
通过工具提供的必需参考&gt;参考文献
注意:
现在不同的输出在不同的日子对网站运行。
Option Explicit
Public Sub PrintMatchInfo()
Application.ScreenUpdating = False
Dim IE As InternetExplorer, doc As HTMLDocument
Set IE = New InternetExplorer
With IE
.Visible = True
.Navigate2 "https://azscore.com/en/yesterday"
While .Busy Or .readyState < 4: DoEvents: Wend
Set doc = .document
End With
Dim games As Object, i As Long, j As Long, counter As Long
counter = 1
Set games = doc.getElementsByClassName("game_table")
For i = 0 To games.Length - 1
For j = 0 To games(i).getElementsByClassName("game_detailsCol").Length - 1
With ThisWorkbook.Worksheets("Sheet2")
.Cells(counter, 1) = Split(games(i).innerText, Chr$(10))(3)
Dim tempArr() As String
tempArr() = Split(GetKeyDetails(ReplacedString(games(i).getElementsByClassName("game_detailsCol")(j).innerText)), Chr$(126))
.Cells(counter, 2).Resize(1, UBound(tempArr) + 1) = tempArr
counter = counter + 1
End With
Next j
Next i
Application.ScreenUpdating = True
End Sub
Public Function ReplacedString(ByVal str As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "\s{2,}"
End With
ReplacedString = regex.Replace(str, Chr$(126)) '<== or other unexpected character
End Function
Public Function GetKeyDetails(ByVal inputString As String) As String
Dim regex As Object, tempString As String
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "(^[A-Z](?:\d|[01]\d|2[0-3]):[0-5]\d~)(\d?~?)+" '"^[A-Z](?:\d|[01]\d|2[0-3]):[0-5]\d~(\d~)?(\d~)?|x\~Head To Head"
End With
tempString = regex.Replace(inputString, vbNullString)
Dim tempArr() As String
tempArr = Split(tempString, Chr$(126))
Dim keyDetails(), outputString As String, detail As Long
keyDetails = Array(0, 1, 2, 3)
For detail = LBound(keyDetails) To UBound(keyDetails)
outputString = outputString & Chr$(126) & tempArr(keyDetails(detail))
Next detail
GetKeyDetails = Trim$(outputString)
End Function
输出:
<强>代码:强>
Option Explicit
Public Sub PrintAllMatchInfo()
Application.ScreenUpdating = False
Dim IE As InternetExplorer, doc As HTMLDocument
Set IE = New InternetExplorer
With IE
.Visible = True
.Navigate2 "https://azscore.com/en/yesterday"
While .Busy Or .readyState < 4: DoEvents: Wend
Set doc = .document
End With
Dim games As Object, i As Long, j As Long, counter As Long
counter = 1
Set games = doc.getElementsByClassName("game_table")
For i = 0 To games.Length - 1
For j = 0 To games(i).getElementsByClassName("game_detailsCol").Length - 1
With ThisWorkbook.Worksheets("Sheet1")
.Cells(counter, 1) = Split(games(i).innerText, Chr$(10))(3)
.Cells(counter, 2) = ReplacedString(games(i).getElementsByClassName("game_detailsCol")(j).innerText)
counter = counter + 1
End With
Next j
Next i
Application.ScreenUpdating = True
End Sub
Public Function ReplacedString(ByVal str As String) As String
Dim regex As Object
Set regex = CreateObject("VBScript.RegExp")
With regex
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "\s{2,}"
End With
ReplacedString = regex.Replace(str, " ") '<== or other unexpected character
End Function
<强>输出:强>
后续问题的模式:"^\w+\/(\d+~)+"