我想单击所有带有文本“ 300”的链接。我的网页抓取代码非常缓慢地点击了每个链接。我将链接存储在列表中,然后一个一个地单击它们。
我计算用于索引的链接,然后使用for(int pos = 0; pos
class Program
{
private static IWebDriver driver = null;
static void Main(string[] args)
{
driver = new InternetExplorerDriver();
driver.Manage().Window.Maximize();
driver.Navigate().GoToUrl("https://arbitrary.com/");
clickAllLinks("300");
}
//clicking links AND get data
public static void clickAllLinks(string tagName)
{
IWebElement element =
driver.FindElement(By.XPath("//div[@class='data']"));
int elements =
element.FindElements(By.PartialLinkText(tagName)).Count();
for (int pos = 0; pos < elements; pos++)
{
getElementWithIndex(By.PartialLinkText(tagName), pos).Click();
//fetchdata();
}
}
public static IWebElement getElementWithIndex(By by, int pos)
{
IWebElement element =
driver.FindElement(By.XPath("//div[@class='data']"));
IList<IWebElement> elements =
element.FindElements(By.PartialLinkText("300"));
return elements.ElementAt(pos);
}
//scrape data
public static async void fetchdata()
{
string currentURL = driver.Url; //url to string
Console.WriteLine("URL: " + currentURL);
var httpclient = new HttpClient();
var html = await httpclient.GetStringAsync(currentURL);
var htmldoc = new HtmlDocument();
htmldoc.LoadHtml(html); //html to htmldoc
List<List<string>> Receipt =
htmldoc.DocumentNode.SelectSingleNode("//table[@class='classname']")
//htmldoc into list TABLE->TR->TD->InnerText
.Descendants("tr")
.Where(tr => tr.Elements("td").Count() > 0)
.Select(tr => tr.Elements("td")
.ToList())
.ToList();
答案 0 :(得分:0)
这是clickAllLinks
方法的简化版本。这样可以减少当前方法的开销(不必要地获取元素和存储元素,这可能会影响执行速度)。
//clicking links AND get data
public static void clickAllLinks(string tagName)
{
int elements =
driver.FindElements(By.xpath("//div[@class='data']//a[contains(.," + tagName + ")]").Count();
for (int pos = 1; pos < elements; pos++)
{
driver.FindElements(By.xpath("(//div[@class='data']//a[contains(.," + tagName + ")])[" + pos + "]").Click();
//fetchdata();
}
}