您好我正在使用watin框架收集网址。我想遍历所有页面并收集链接并将其保存在一个文本文件中。我不知道如何添加分页功能。这是我的代码。
using System.Text;
using System.Threading.Tasks;
using WatiN.Core;
namespace magicbricks
{
class Class1
{
[STAThread]
static void Main(string[] args)
{
IE ie = new IE();
ie.GoTo("http://www.99acres.com/property-in-chennai-ffid?search_type=QS&search_location=HP&lstAcn=HP_R&src=CLUSTER&isvoicesearch=N&keyword_suggest=chennai%20%28all%29%3B&fullSelectedSuggestions=chennai%20%28all%29&strEntityMap=W3sidHlwZSI6ImNpdHkifSx7IjEiOlsiY2hlbm5haSAoYWxsKSIsIkNJVFlfMzIsIFBSRUZFUkVOQ0VfUywgUkVTQ09NX1IiXX1d&texttypedtillsuggestion=chennai&refine_results=Y&Refine_Localities=Refine%20Localities&action=%2Fdo%2Fquicksearch%2Fsearch&suggestion=CITY_32%2C%20PREFERENCE_S%2C%20RESCOM_R");
foreach (var currLink in ie.Links)
{
if (currLink.Url.Contains("b"))
{
Console.WriteLine(currLink.Url);
}
}
Console.ReadLine();
}
}
}
任何帮助将不胜感激。
答案 0 :(得分:0)
这是有效的解决方案。我改变了你的代码。
using System;
using WatiN.Core;
namespace magicbricks
{
static class Class1
{
private static WatiN.Core.Link _nextPageElement;
private static string _firstPartOfAddress = "";
private static string _lastPartOfAddress = "";
private static int _maxPageCounter = 0;
[STAThread]
static void Main(string[] args)
{
IE ie = SetUpBrowser();
EnterFirstWebpage(ie);
ie.WaitForComplete();
LookFoAllLinks(ie);
for (int i = 2; i < _maxPageCounter; i++)
{
Console.WriteLine("----------------------------Next Page {0}---------------------------", i);
Console.WriteLine(AssembleNextPageWebAddress(i));
EnterNextWebpageUrl(ie,AssembleNextPageWebAddress(i));
LookFoAllLinks(ie);
}
Console.ReadKey();
}
private static IE SetUpBrowser()
{
IE ie = new IE();
return ie;
}
private static void EnterFirstWebpage(IE ie)
{
ie.GoTo("http://www.99acres.com/property-in-chennai-ffid?search_type=QS&search_location=HP&lstAcn=HP_R&src=CLUSTER&isvoicesearch=N&keyword_suggest=chennai%20%28all%29%3B&fullSelectedSuggestions=chennai%20%28all%29&strEntityMap=W3sidHlwZSI6ImNpdHkifSx7IjEiOlsiY2hlbm5haSAoYWxsKSIsIkNJVFlfMzIsIFBSRUZFUkVOQ0VfUywgUkVTQ09NX1IiXX1d&texttypedtillsuggestion=chennai&refine_results=Y&Refine_Localities=Refine%20Localities&action=%2Fdo%2Fquicksearch%2Fsearch&suggestion=CITY_32%2C%20PREFERENCE_S%2C%20RESCOM_R");
}
private static void EnterNextWebpageUrl(IE ie,string url)
{
ie.GoTo(url);
ie.WaitForComplete();
}
private static void LookFoAllLinks(IE ie)
{
int currentpageCounter = 0;
var tmpUrl = string.Empty;
const string nextPageUrl = "http://www.99acres.com/property-in-chennai-ffid-page-";
foreach (var currLink in ie.Links)
{
if (currLink.Url.Contains("b"))
{
Console.WriteLine(currLink.Url);
try
{
if (currLink.Name.Contains("nextbutton"))
{
_nextPageElement = currLink;
}
}
catch (Exception ex)
{
}
try
{
if (currLink.GetAttributeValue("name").Contains("page"))
{
_firstPartOfAddress = currLink.Url.Substring(0, nextPageUrl.Length);
tmpUrl = currLink.Url.Remove(0,nextPageUrl.Length);
_lastPartOfAddress = tmpUrl.Substring(tmpUrl.IndexOf("?"));
tmpUrl = tmpUrl.Substring(0,tmpUrl.IndexOf("?"));
int.TryParse(tmpUrl, out currentpageCounter);
if (currentpageCounter > _maxPageCounter)
{
_maxPageCounter = currentpageCounter;
currentpageCounter = 0;
}
}
}
catch (Exception)
{
}
}
}
}
private static string AssembleNextPageWebAddress(int pageNumber)
{
return _firstPartOfAddress + pageNumber + _lastPartOfAddress;
}
}
}
一些解释:
变量_maxPageCounter包含要查找链接的最大页数。 我们在这里得到这个:
if (currLink.GetAttributeValue("name").Contains("page"))
{
_firstPartOfAddress = currLink.Url.Substring(0, nextPageUrl.Length);
tmpUrl = currLink.Url.Remove(0,nextPageUrl.Length);
_lastPartOfAddress = tmpUrl.Substring(tmpUrl.IndexOf("?"));
tmpUrl = tmpUrl.Substring(0,tmpUrl.IndexOf("?"));
int.TryParse(tmpUrl, out currentpageCounter);
if (currentpageCounter > _maxPageCounter)
{
_maxPageCounter = currentpageCounter;
currentpageCounter = 0;
}
}
稍后我们只是通过创建下一个地址来遍历页面。
private static string AssembleNextPageWebAddress(int pageNumber)
{
return _firstPartOfAddress + pageNumber + _lastPartOfAddress;
}
我们也可以在这里使用下一个按钮,并在循环中单击它。 我希望它有所帮助。