假设我有一个像这样的表:
$this
class A {
private $a;
private $b;
function __construct($a,$b){
$this->a = ( $a == NULL) ? ' something' : $a;
$this->b = ( $b == NULL) ? ' something' : $b;
echo($this->a);
}
private function geta(){
return $this->a;
}
private function getb(){
return $this->b;
}
public function something(){
echo $this->a;
echo $this->b;
}
}
$o = new A('hi','something');
$o->something();
我应该编辑<table class="MyClass" width="100%" cellspacing="0" cellpadding="0">
<thead>
<tr>
<th class="releaseCol">Name</th>
<th class="typeCol">Type</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="https://www.somescrappypage.com/x/x/x/644892" class="demo">one</a></td>
<td class="demo">Demo</td>
</tr>
<tr>
<td><a href="https://www.somescrappypage.com/x/x/x/6876" class="other">two</a></td>
<td class="other">Compilation</td>
<tr>
<td><a href="https://www.somescrappypage.com/x/x/x/8440" class="album">three</a></td>
<td class="album">Full-length</td>
<tr>
<tr>
<td><a href="https://www.somescrappypage.com/x/x/x/610225" class="single">four</a></td>
<td class="single">Single</td>
</tr>
</tbody>
</table>
所以它还包括第一个单元格的var doc = new HtmlAgilityPack.HtmlDocument
{
OptionFixNestedTags = true,
OptionCheckSyntax = true,
OptionAutoCloseOnEnd = true
};
doc.LoadHtml(html);
List<List<string>> parsedTbl =
doc.DocumentNode.SelectSingleNode("//table[@class='MyClass']")
.Descendants("tr")
.Skip(1) //To Skip Table Header Row
.Where(tr => tr.Elements("td").Count() > 1)
.Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToList())
.ToList();
foreach (var r in parsedTbl)
{
Console.WriteLine($"{r[0]} {r[1]} "); //HOW TO INCLUDE HREF INFO?
}
?
.Select(td => td.InnerText.Trim())
答案 0 :(得分:1)
经过测试和工作。
var doc = new HtmlAgilityPack.HtmlDocument
{
OptionFixNestedTags = true,
OptionCheckSyntax = true,
OptionAutoCloseOnEnd = true
};
doc.LoadHtml(html);
List<List<HtmlAgilityPack.HtmlNode>> parsedTbl =
doc.DocumentNode.SelectSingleNode("//table[@class='MyClass']")
.Descendants("tr")
.Skip(1)
.Where(tr => tr.Elements("td").Count() > 1)
.Select(tr => tr.Elements("td").ToList())
.ToList();
foreach (var r in parsedTbl)
{
Console.WriteLine(r[0].FirstChild.Attributes["href"].Value + " " + r[0].InnerText + " " + r[1].InnerText); //HOW TO INCLUDE HREF INFO?
}
<强>输出强>
https://www.somescrappypage.com/x/x/x/644892 one Demo
https://www.somescrappypage.com/x/x/x/6876 two Compilation
https://www.somescrappypage.com/x/x/x/8440 three Full-length
https://www.somescrappypage.com/x/x/x/610225 four Single
答案 1 :(得分:0)
这不是很漂亮,但应该让你开始:
class Program
{
static void Main(string[] args)
{
var html = System.IO.File.ReadAllText(@"index.html");
var doc = new HtmlAgilityPack.HtmlDocument
{
OptionFixNestedTags = true,
OptionCheckSyntax = true,
OptionAutoCloseOnEnd = true
};
doc.LoadHtml(html);
var results =
doc.DocumentNode.SelectSingleNode("//table[@class='MyClass']")
.Descendants("tr")
.Skip(1) //To Skip Table Header Row
.Where(tr => tr.Elements("td").Count() > 1)
.Select(tr =>
{
return new Result
{
link = tr.Elements("td").Select(td => td.Elements("a").FirstOrDefault().Attributes["href"].Value).FirstOrDefault(),
inner = tr.Elements("td").Select(td => td.Elements("a").FirstOrDefault().InnerText).FirstOrDefault(),
name = tr.Elements("td").Skip(1).FirstOrDefault().InnerText
};
});
foreach (var result in results)
{
Console.WriteLine($"Link: {result.link} InnerText: {result.inner} Name: {result.name}");
}
}
}
class Result
{
public string link { get; set; }
public string inner { get; set; }
public string name { get; set; }
}
}