我正在尝试从网页中抓取数据。但是,我在抓取表中的所有数据时遇到了麻烦。我需要切换页面以获取所有数据,并且我愿意使用DataGridTable获得输出。即使网站上的页面数发生了变化,我也很难弄清楚该如何做。我想按页自动在数据网格表中添加信息。我的输入(网站)仅显示25个项目。那就是为什么我在DataGridTable中有25个项目。我想证明“转到结束页面按钮”元素的“页数”是合理的。这样我的程序就知道要从网站上抓取多少页面。但是,如果有其他方法,我想知道谢谢。
这是我现在的代码。
DataTable dt = new DataTable();
var header = driver.FindElement(By.CssSelector("#gridComponent > div.k-grid-header"));
foreach (var row in header.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
string[] arr = new string[32];
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("th")))
{
// Check the header cell for a checkbox child. If no
// such child exists, add the column.
var headerCheckboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (headerCheckboxes.Count == 0)
{
//Number of Col Data Load
if (cellIndex <= 29)
{
arr[cellIndex] = cell.Text;
dt.Columns.Add(cell.Text);
}
else
cellIndex++;
}
}
Console.WriteLine(arr);
}
var table = driver.FindElement(By.CssSelector("#gridComponent"));
//Get Row value
foreach (var row in table.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
// Use a list instead of an array
List<string> arr = new List<string>();
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("td")))
{
// Skip the first column in the row by checking
// if the cell index is 0.
if (cellIndex != 0)
{
string cellValue = "";
Console.WriteLine(cell);
var checkboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (checkboxes.Count > 0)
{
bool isChecked = false;
isChecked = checkboxes[0].Selected;
cellValue = isChecked.ToString();
}
else
{
cellValue = cell.Text;
}
arr.Add(cellValue);
}
cellIndex++;
}
dt.Rows.Add(arr.ToArray());
}
dataGridView1.DataSource = dt;
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > ul > li:nth-child(3)")).Click();
}
This is the table that I am trying to scrape from.
这是上图所示的以下元素的代码。
<a href="#" aria-label="Go to the last page" title="Go to the last page" class="k-link k-pager-nav k-pager-last" data-page="3" tabindex="-1"><span class="k-icon k-i-arrow-end-right"></span></a>
非常感谢您。
答案 0 :(得分:0)
您可能需要考虑索引信息“ 64个项目中的1-25”,因为它可以很好地指示总页数。
批次= 1-25,即每页25个项目 项目总数= 64
不。页数=汇总(64/25)
PS:一个更好的选择,无需任何计算,也许可以获取最后一页按钮的“数据页”属性。
答案 1 :(得分:0)
我终于得到了答案。
private List<List<string>> GetRecords(IWebElement table)
{
List<List<string>> rows = new List<List<string>>(); ;
//Get Row value
foreach (var row in table.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
// Use a list instead of an array
List<string> cols = new List<string>();
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("td")))
{
// Skip the first column in the row by checking
// if the cell index is 0.
if (cellIndex != 0)
{
string cellValue = "";
Console.WriteLine(cell);
var checkboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (checkboxes.Count > 0)
{
bool isChecked = false;
isChecked = checkboxes[0].Selected;
cellValue = isChecked.ToString();
}
else
{
cellValue = cell.Text;
}
cols.Add(cellValue);
}
cellIndex++;
}
rows.Add(cols);
}
return rows;
}
private void button1_Click(object sender, EventArgs e)
{
//Configure to Hide CMD
var chromeDriverService = ChromeDriverService.CreateDefaultService();
chromeDriverService.HideCommandPromptWindow = true;
//Configure to Hide Chrome
ChromeOptions option = new ChromeOptions();
option.AddArgument("--headless");
//HIDING CHROME UN-COMMNET THE SECOND ONE TO SHOW
//IWebDriver driver = new ChromeDriver(chromeDriverService, option);
IWebDriver driver = new ChromeDriver();
driver.Url = "**************";
driver.Manage().Window.Maximize();
driver.SwitchTo().DefaultContent();
//Log-in
driver.FindElement(By.Id("username")).SendKeys("*****");
driver.FindElement(By.Id("password")).SendKeys("******" + OpenQA.Selenium.Keys.Enter);
//Entering Access Code
driver.FindElement(By.Id("password")).SendKeys("*******");
driver.FindElement(By.Id("accesscode")).SendKeys("********" + OpenQA.Selenium.Keys.Enter);
//go to CustomerList
driver.Navigate().GoToUrl("***********");
driver.Navigate().GoToUrl("*****************");
//Wait till load 3 seconds
waitOnPage(2);
DataTable dt = new DataTable();
var header = driver.FindElement(By.CssSelector("#gridComponent > div.k-grid-header"));
foreach (var row in header.FindElements(By.TagName("tr")))
{
//Configure Number of Col and row
int cellIndex = 0;
string[] arr = new string[32];
//Get Cell Data
foreach (var cell in row.FindElements(By.TagName("th")))
{
// Check the header cell for a checkbox child. If no
// such child exists, add the column.
var headerCheckboxes = cell.FindElements(By.CssSelector("input[type='checkbox']"));
if (headerCheckboxes.Count == 0)
{
//Number of Col Data Load
if (cellIndex <= 29)
{
arr[cellIndex] = cell.Text;
dt.Columns.Add(cell.Text);
}
else
cellIndex++;
}
}
Console.WriteLine(arr);
}
var table = driver.FindElement(By.CssSelector("#gridComponent"));
List<List<string>> records = GetRecords(table);
// Supposing you want the footer information
var lastPageStr = table.FindElement(By.ClassName("k-pager-last")).GetAttribute("data-page");
var lastPage = Convert.ToInt16(lastPageStr);
// You can select other info lik this
// class="k-link k-pager-nav" data-page="1"
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > ul > li:nth-child(3)")).Click();
// Cycle over the pages
for (int p = 0; p < (lastPage - 1); p++)
{
driver.FindElement(By.CssSelector("#gridComponent > div.k-pager-wrap.k-grid-pager.k-widget.k-floatwrap > a:nth-child(4) > span")).Click();
waitOnPage(2);
var rows = GetRecords(table);
records.AddRange(rows);
}
// Add all rows to DT
//dt.Rows.Add(records[4].ToArray());
foreach(var row in records)
{
dt.Rows.Add(row.ToArray());
}
dataGridView1.DataSource = dt;
}