我有这个代码从网页抓取指定的文本:
static void Main(string[] args)
{
using (var client = new WebClient())
{
var pageContent = client.DownloadString("http://www.modern-railways.com");
var regexTitle = new Regex(@"<span class='articleTitle'>(.+?)</span>");
var regexDate = new Regex(@"class='summaryText' data-ajax='false'>(.+?)</a></p><div");
foreach (Match title in regexTitle.Matches(pageContent))
{
var articleTitle = title.Groups[1].Value;
Console.WriteLine(articleTitle);
}
foreach (Match date in regexDate.Matches(pageContent))
{
var articleDate = date.Groups[1].Value;
Console.WriteLine(articleDate);
}
Console.ReadLine();
}
}
现在它首先打印所有articleTitle然后打印所有articleDate。我如何获得第一行ArticleTitle,第二行articleDate等等?
答案 0 :(得分:1)
您可以使用LINQ和Zip
方法:
var titles = regexTitles.Matches(pageContent).Cast<Match>();
var dates = regexDate.Matches(pageContent).Cast<Match>();
var source = titles.Zip(dates, (t, d) => new { Title = t, Date = d })
foreach (var item in source)
{
var articleTitle = item.Title.Groups[1].Value;
var articleDate = item.Date.Groups[1].Value;
Console.WriteLine(articleTitle);
Console.WriteLine(articleDate);
}