嗨,我想知道是否有人可以帮我解决困扰我的事情。
我正在尝试从页面上的菜单中提取所有子链接:
http://groceries.asda.com/asda-webstore/landing/home.shtml#/cat/1215135760597
我尝试过使用selenium并搜索xpath,但似乎最低子项的链接仅在您只有一个级别时出现,并且通过每个变量有超过2000种可能性。我不仅喜欢节省时间的方法,而且还有一种方法可以让疲惫的右键单击保存链接位置不会遗漏,并粘贴到文本文件错误中!
最低子链接如下:http://groceries.asda.com/asda-webstore/landing/home.shtml#/shelf/910001112609/1/so_false
此链接适用于香蕉(这是我目前所处位置的恰当描述!)
其中有shelf
字样。谢谢!
答案 0 :(得分:0)
嗯,这真的不是一项微不足道的任务,但有解决方案。在C#测试样本下面,它遍历您网站上的所有链接,并从没有子节点的菜单中检索href文本(列表中的最后一个)。自己移植到您的语言=)
using System;
using NUnit.Framework;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
[TestFixture]
public class WalkLink
{
private ChromeDriver driver;
private const string url = @"http://groceries.asda.com/asda-webstore/landing/home.shtml";
[SetUp]
public void SetUp()
{
driver = CreateBrowser(url);
}
[TearDown]
public void TearDown()
{
driver.Dispose();
}
[Test]
public void WalkingThrough()
{
const string rootItemsSelector = "#primary-nav-items > li > div > div > div > ul > div > li";
Console.WriteLine("We are here");
var navItems = driver.FindElements(By.CssSelector("#primary-nav-items > li"));
var menusCount = navItems.Count;
for (int i = 0; i < menusCount; i++)
{
navItems[i].Click();
Walk(rootItemsSelector);
}
}
void Walk(string selector)
{
var items = driver.FindElements(By.CssSelector(selector));
var count = items.Count;
for (int i = 0; i < count; i++)
{
for (var a = 0; a < 10; a++)
{
try
{
items = driver.FindElements(By.CssSelector(selector));
Assert.AreEqual(count, items.Count);
var item = items[i];
item.Click();
if (!HasChild(item))
{
//HERE you can save href to your file
Console.WriteLine(Href(item));
}
break;
}
catch{}
}
Walk(selector + " > ul > div > li");
}
}
public bool HasChild(IWebElement element)
{
var link = element.FindElement(By.CssSelector("a"));
return link.GetAttribute("class").Contains("hasChild");
}
public string Href(IWebElement element)
{
var link = element.FindElement(By.CssSelector("a"));
return link.GetAttribute("href");
}
internal static ChromeDriver CreateBrowser(string urlString)
{
var url = new Uri(urlString);
ChromeDriver browser = null;
browser = new ChromeDriver();
browser.Manage().Window.Maximize();
browser.Navigate().GoToUrl(url);
return browser;
}
}