无法将Selenium与Java从以下网站中抓取表格内容

时间:2018-06-21 02:42:30

标签: java selenium selenium-webdriver web web-scraping

来自以下站点 https://www.tradingview.com/chart/EhIMW8kQ/ 页面加载后,点击“策略测试器->交易清单”标签。

您可以在其中看到一个表,并且该表是动态变化的,有没有办法删除该表的内容。

1 个答案:

答案 0 :(得分:0)

是的,您可以滚动滚动表的内容直到到达表的末尾:
通过在此处应用recussion,您可以实现以下目标:

(我假设您已经到达“策略测试器->交易清单”标签。):
下面是代码:

package com.demo.core;

import java.util.List;
import java.util.Scanner;

import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;

public class TableParser {

    public static void main(String[] args) {
        System.setProperty("webdriver.chrome.driver", "J:\\STADIUM\\selenium-demo\\src\\main\\resources\\drivers\\chromedriver.exe");
        WebDriver driver = new ChromeDriver();
        driver.navigate().to("https://www.tradingview.com/chart/EhIMW8kQ/");
        Scanner sc = new Scanner(System.in);
        System.out.println("Enter any integer to continue : ");
        int x = sc.nextInt();

        // I have used Scanner here just to hold the execution until I reach "Strategy Tester -> List of Trades" tab manually
        List<WebElement> rows = driver.findElements(By.cssSelector("div.report-content.trades .report-data .table-wrap table tbody"));
        WebElement tableView = driver.findElement(By.className("report-content"));
        int count = 0;

        printTableDataRecursively(rows, count, driver, tableView);
    }

    /** It will print the table data using recursion
     * @param rows First list of rows
     * @param count
     * @param driver
     * @param tableView scrollable table view element
     */
    public static void printTableDataRecursively(List<WebElement> rows, int count, WebDriver driver, WebElement tableView) {
        boolean bottomOfTableReached = checkIfBottomOfViewReached(driver, tableView); // checking if end of table is reached
        for (WebElement row : rows) {
            count++;
            System.out.println(row.getAttribute("textContent")); 
            if (count == rows.size() && !bottomOfTableReached) {
                count = 0;
                scrollToElement(driver, row); // scrolling to last row element from list of rows
                rows = driver.findElements(By.cssSelector("div.report-content.trades .report-data .table-wrap table tbody")); // getting new list of rows
                rows.remove(0); // removing first row element because it was the last row from previous list of rows
                printTableDataRecursively(rows, count, driver,tableView);
            }
        }
    }

     /** It will check if scroll has reached to bottom of an HTML element that is scrollable.
     * @param driver
     * @param element 
     * @return true (if bottom reached) otherwise false
     */
    public static boolean checkIfBottomOfViewReached(WebDriver driver, WebElement element) {
         return  (boolean) ((JavascriptExecutor)driver).executeScript("if (arguments[0].scrollHeight == arguments[0].offsetHeight + arguments[0].scrollTop) { return true; } else { return false; }", element);
     }

     /** It will scroll to the given WebElement.
     * @param driver
     * @param element
     */
    public static void scrollToElement(WebDriver driver, WebElement element) {
            ((JavascriptExecutor)driver).executeScript("arguments[0].scrollIntoView();", element);
        }

}

我总共获得291行(最后3或4行包含重复的行),我认为您可以自己解决。

我的意思是,您必须滚动并获取行数据,直到到达表末尾为止。

只需运行此程序,然后在控制台中查看输出即可。

希望能帮助您实现目标。