Webscrape跳过无效输入

时间:2018-02-16 12:14:31

标签: excel html5 vba

我试图创建一个网络刮刀,我真的需要帮助!

目前,该程序将以某种方式运行,以便主输入"模块代码"将从A列第2行开始一直到结束。 输出(模块标题,描述,Prereq和Preclusion)将从网站上删除。

但是,如果给定的输入没有提供任何输出,我很难尝试让程序跳过一行。

目前,如果Cells(15, 1)输入无效,则会继续Cells(16, 1)并将Cells(16, 2 to 5)的输出设置为Cells(15, 2 to 5)。因此,如果输入中有错误,我最后会有一个缺失的行。

但是,我希望实现的是Cells(15, 1)是无效输入,它会使Cells(15, 2 to 5)为空并继续使用Cell(16, 1)作为输入,并将输出放到{{ 1}}而不是前一行。

如果有人能指导我这将是伟大的!我仍在努力学习VBA作为新手。

Cells(16, 2 to 5)

功能如下所示:

Private Sub CommandButton4_Click()
Dim module_title As String
Dim module_description As String
Dim module_prereq As String
Dim module_preclusion As String
Dim module_code As String

For i = 2 To 200
    If Cells(i, 1) = "" Then Exit For
    ModuleCode.Text = Cells(i, 1)
    module_code = ModuleCode.Text

    Call GotoModules(module_code, module_title, module_description, module_prereq, module_preclusion)

    ModuleTitle.Text = module_title
    ModuleDescription.Text = module_description
    ModulePrereq.Text = module_prereq
    ModulePreclude.Text = module_preclusion
    output_row = 1
    DoEvents
    output_row = output_row + 1
    Sheets("Output").Range("B" & output_row) = module_title
    Sheets("Output").Range("C" & output_row) = module_description
    Sheets("Output").Range("D" & output_row) = module_prereq
    Sheets("Output").Range("E" & output_row) = module_preclusion
Next i

End Sub

1 个答案:

答案 0 :(得分:0)

这是重写

Option Explicit
Private Sub CommandButton4_Click() 'Test cases were ACC1002, FST4102
    Dim module_code As String, arr(), wsOutput As Worksheet, i As Long, objIE As Object
    Set wsOutput = ThisWorkbook.Worksheets("Output")
    Set objIE = CreateObject("InternetExplorer.Application")
    objIE.Visible = True

    With wsOutput
        For i = 2 To 3                           '200
            If IsEmpty(.Cells(i, 1)) Then Exit For

            module_code = .Cells(i, 1)
            arr = GotoModules(module_code, objIE)
            wsOutput.Range("B" & i) = arr(0)
            wsOutput.Range("C" & i) = arr(1)
            wsOutput.Range("D" & i) = arr(2)
            wsOutput.Range("E" & i) = arr(3)
        Next i
    End With

    objIE.Quit
    Set objIE = Nothing
End Sub

Public Function GotoModules(ByVal ModuleCode As String, ByVal objIE As Object) As Variant

    Dim module_title As String, module_description As String, module_prereq As String, module_preclusion As String

    If ModuleCode = vbNullString Then Exit Function

    objIE.Navigate ("https://myaces.nus.edu.sg/cors/jsp/report/ModuleDetailedInfo.jsp?acad_y=2017/2018&sem_c=2&mod_c=" & ModuleCode)

     While objIE.Busy = True Or objIE.readyState < 4: DoEvents: Wend

    Dim the_HTML_ToParse As String
    the_HTML_ToParse = objIE.document.Body.InnerHTML

    If InStr(the_HTML_ToParse, "Module Information") > 0 Then

        Dim HTML_Parse_Prereq As String, HTML_Parse_Preclusion As String, the_module_title As String, the_module_description As String, the_module_prereq As String, the_module_preclusion As String

        'Parses up to the point where "Module Information" string is
        the_HTML_ToParse = Mid(the_HTML_ToParse, InStr(the_HTML_ToParse, "Module Information"), Len(the_HTML_ToParse)) 'Parses from Module Information onwards
        HTML_Parse_Prereq = Mid(the_HTML_ToParse, InStr(the_HTML_ToParse, "Pre-requisite"), Len(the_HTML_ToParse))
        HTML_Parse_Preclusion = Mid(the_HTML_ToParse, InStr(the_HTML_ToParse, "Preclusion"), Len(the_HTML_ToParse))

        'Module Title Parse
        the_HTML_ToParse = Mid(the_HTML_ToParse, InStr(the_HTML_ToParse, "<TD colSpan=2>") + 14, Len(the_HTML_ToParse))
        the_module_title = Mid(the_HTML_ToParse, 1, InStr(the_HTML_ToParse, "</TD></TR>") - 1)

        'Module Description Parse
        the_HTML_ToParse = Mid(the_HTML_ToParse, InStr(the_HTML_ToParse, "<TD vAlign=top colSpan=2>") + 25, Len(the_HTML_ToParse))
        the_module_description = Mid(the_HTML_ToParse, 1, InStr(the_HTML_ToParse, "</TD></TR>") - 1)

        'Module Prerequisite Parse
        HTML_Parse_Prereq = Mid(HTML_Parse_Prereq, InStr(HTML_Parse_Prereq, "<TD colSpan=2>") + 14, Len(HTML_Parse_Prereq))
        the_module_prereq = Mid(HTML_Parse_Prereq, 1, InStr(HTML_Parse_Prereq, "</TD></TR>") - 1)

        'Module Preclusion Parse
        HTML_Parse_Preclusion = Mid(HTML_Parse_Preclusion, InStr(HTML_Parse_Preclusion, "<TD colSpan=2>") + 14, Len(HTML_Parse_Preclusion))
        the_module_preclusion = Mid(HTML_Parse_Preclusion, 1, InStr(HTML_Parse_Preclusion, "</TD></TR>") - 1)

    End If

    GotoModules = Array(the_module_title, the_module_description, the_module_prereq, the_module_preclusion)

End Function