using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using OpenQA.Selenium;
using OpenQA.Selenium.Support;
using OpenQA.Selenium.Chrome;
using System.Text.RegularExpressions;
using System.IO;
using OpenQA.Selenium.Support.UI;
namespace FlippaSearch
{
public partial class Form1 : Form
{
static IWebDriver driverGC;
public Form1()
{
driverGC = new ChromeDriver(@"Z:\Justin\Documents\Visual Studio 2015\chromedriver_win32");
driverGC.Navigate().GoToUrl("https://flippa.com/websites/starter-sites?sitetype=blog&uniques_per_month_min=1000");
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
List<IWebElement> starterSites = new List<IWebElement>();
List<String> myStarterSites = new List<string>();
IWait<IWebDriver> wait = new WebDriverWait(driverGC, TimeSpan.FromSeconds(30.00));
var numPages = (driverGC.FindElement(By.XPath("//*[@id='searchBody']/div[1]/div[1]/h2/span")).Text);
double numberPages = int.Parse(Regex.Match(numPages, @"\d+", RegexOptions.RightToLeft).Value);
numberPages = Math.Ceiling(numberPages / 50);
int j;
for (int i = 1; i <= numberPages; i++)
{
driverGC.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(30);
var mySites = driverGC.FindElements(By.CssSelector(".ListingResults___listingResult"));
int size = 1;
for (j = 0; j < 3; ++j)
{
driverGC.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(30);
mySites = driverGC.FindElements(By.CssSelector(".ListingResults___listingResult"));
size = mySites.Count();
driverGC.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(30);
String siteLink = " ";
siteLink = mySites[j].FindElement(By.CssSelector(".ListingResults___listingResultLink")).GetAttribute("href");
driverGC.Navigate().GoToUrl(siteLink);
driverGC.Manage().Timeouts().ImplicitWait = TimeSpan.FromSeconds(10);
//testing tables
int row_tr = 5;
int Column_td = 3;
String CellValue;
String newCellValue;
String cellValueChange;
try
{
driverGC.FindElement(By.XPath("/html/body/div[3]/div[1]/div[1]/div[5]/div[1]/div/table[1]/tbody"));
for (int k = 1; k <= row_tr; k++)
{
for (int b = 1; b <= Column_td; b++)
{
CellValue = driverGC.FindElement(By.XPath("/html/body/div[3]/div[1]/div[1]/div[5]/div[1]/div/table[1]/tbody/tr[" + k + "]/td[" + b + "]")).Text.ToString();
if (CellValue == "Organic Search")
{
String mySiteName = driverGC.FindElement(By.XPath("/html/body/div[3]/div[1]/div[1]/div[1]/div[1]/h1")).Text.ToString();
newCellValue = driverGC.FindElement(By.XPath("/html/body/div[3]/div[1]/div[1]/div[5]/div[1]/div/table[1]/tbody/tr[" + k + "]/td[3]")).Text.ToString();
cellValueChange = Regex.Replace(newCellValue, @"[%\s]", string.Empty);
float organicSearch = float.Parse(cellValueChange);
if (organicSearch >= 50)
{
myStarterSites.Add(mySiteName);
myStarterSites.Add(CellValue);
myStarterSites.Add(newCellValue);
Console.WriteLine(mySiteName);
Console.WriteLine(CellValue);
Console.WriteLine(newCellValue);
}
}
}
}
}
catch (OpenQA.Selenium.NoSuchElementException)
{
}
//testing tables
driverGC.Navigate().Back();
//write shit to file
siteLink = "";
driverGC.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(30);
}
j = 0;
//mySites = null;
try
{
driverGC.FindElement(By.XPath("//*[@id='searchBody']/div[2]/div[2]/div/a[3]")).Click();
//driverGC.Manage().Timeouts().PageLoad = TimeSpan.FromSeconds(30);
}
catch (ElementNotVisibleException)
{
Console.WriteLine("No more pages");
}
}
using (StreamWriter writer = new StreamWriter(@"C:\Users\Justin\Desktop\newFile.txt"))
{
foreach (string s in myStarterSites)
{
writer.WriteLine(s + " ");// Writes in next line
writer.WriteLine(" ");
}
}
//MessageBox.Show("End");
driverGC.Quit();
Application.Exit();
}
}
}
启动后,代码将在第一页上正常运行。它抓住元素,将它们放在一个列表中,然后我可以收集所需的信息。一旦它遍历第一页(我将它设置为3严格用于测试原因,因此它更快)它将点击下一页,然后下一页加载,循环再次开始。问题是,当第二页加载时,第一页上的第一个元素再次被检查,然后在检查之后,它将转到第二页上的第二个元素,然后从那里继续。我的问题是,如何让它扫描第二页上的第一个元素,而不是再次重做第一页上的第一个元素?我尝试过使用Waits,但似乎无法使用任何东西。
提前致谢。
答案 0 :(得分:0)
可能是计时问题,下一个循环迭代在实际加载下一页之前开始。您可以在页码上使用显式等待和ExpectedConditions
TextToBePresentInElementLocated
,以确保当前页面符合您的实际需要。您可以通过使用Pagination___activeLink
类
driverGC.FindElement(By.XPath("//*[@id='searchBody']/div[2]/div[2]/div/a[3]")).Click(); // go to next page
WebDriverWait wait = new WebDriverWait(driver, TimeSpan.FromSeconds(5));
wait.Until(ExpectedConditions.TextToBePresentInElementLocated(By.ClassName("Pagination___activeLink"), (i + 1).ToString()));