在visual basic中保存mht文件中的所有图像

时间:2014-02-09 21:20:47

标签: vb.net image mhtml

我正在开发一个web-scraper,它导致我将网页保存为mht文件进行操作(由于权限和我的编程技巧,无法访问页面并在线提取)。

我正在使用敏捷包从表格中删除文本,但是页面上的表格也包含许多图像。

如果不同的方法我尝试了很多,但是无法从这个文件中提取图像......任何人都可以帮忙吗?

非常感谢

Private Sub Button1_Click(ByVal sender As Object, ByVal e As EventArgs) Handles Button1.Click


    Dim Web As New HtmlAgilityPack.HtmlWeb
    Dim Doc As New HtmlAgilityPack.HtmlDocument
    Dim RowCount As Integer = 1




    Doc.Load("J:\table1.mht")



    Dim tables As HtmlAgilityPack.HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//table")
    Dim img As HtmlAgilityPack.HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//table")
    Dim Links As HtmlAgilityPack.HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//table")
    Dim hNodeCol As HtmlNodeCollection = Doc.DocumentNode.SelectNodes("//a[@href]")
    Dim rows As HtmlAgilityPack.HtmlNodeCollection = tables(0).SelectNodes("//*[@id=HomeMyStudents]")

    For Each Link As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[1]")
        RowCount = RowCount + 1
        '  DGV.Rows(RowCount).Cells(0).Value = Link.InnerText
        DGV.Rows.Add("test", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing)
        '  DGV.Rows.Add("test")
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[2]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(1).Value = table.InnerText

    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[3]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(2).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[4]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(3).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[5]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(4).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[6]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(5).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[7]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(6).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[8]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(7).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[9]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(8).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[10]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(9).Value = table.InnerText
    Next
    RowCount = 0
    For Each table As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[11]")
        RowCount = RowCount + 1
        DGV.Rows(RowCount).Cells(10).Value = table.InnerText
    Next
    RowCount = 0
    For Each Link As HtmlAgilityPack.HtmlNode In Doc.DocumentNode.SelectNodes("//*[@id='HomeMyStudents']/tbody/tr['RowCount']/td[1]")
        'RowCount = RowCount + 1
        'DGV.Rows(RowCount).Cells(11).Value = Link.InnerText
        '  DGV.Rows.Add("test", Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing, Nothing)
        '  DGV.Rows.Add("test")
        ListBox1.Items.Add(Link.InnerText)
    Next


End Sub

0 个答案:

没有答案