如何使用Selenium WebDriver和Java查找损坏的链接

时间:2014-05-01 18:34:39

标签: java selenium selenium-webdriver

我想验证网站上已损坏的链接,我正在使用此代码:

 public static int invalidLink;
    String currentLink;
    String temp;

    public static void main(String[] args) throws IOException {
        // Launch The Browser
        WebDriver driver = new FirefoxDriver();
        // Enter URL
        driver.get("http://www.applicoinc.com");

        // Get all the links URL
        List<WebElement> ele = driver.findElements(By.tagName("a"));
        System.out.println("size:" + ele.size());
        boolean isValid = false;
        for (int i = 0; i < ele.size(); i++) {

            isValid = getResponseCode(ele.get(i).getAttribute("href"));
            if (isValid) {
                System.out.println("ValidLinks:" + ele.get(i).getAttribute("href"));
                driver.get(ele.get(i).getAttribute("href"));
                List<WebElement> ele1 = driver.findElements(By.tagName("a"));
                System.out.println("InsideSize:" + ele1.size());
                for (int j=0; j<ele1.size(); j++){
                    isValid = getResponseCode(ele.get(j).getAttribute("href"));
                    if (isValid) {
                        System.out.println("ValidLinks:" + ele.get(j).getAttribute("href"));
                    }
                    else{
                        System.out.println("InvalidLinks:"+ ele.get(j).getAttribute("href"));
                    }
                }

                } else {
                    System.out.println("InvalidLinks:"
                            + ele.get(i).getAttribute("href"));
                }

            }
        }
    }


    public static boolean getResponseCode(String urlString) {
        boolean isValid = false;
        try {
            URL u = new URL(urlString);
            HttpURLConnection h = (HttpURLConnection) u.openConnection();
            h.setRequestMethod("GET");
            h.connect();
            System.out.println(h.getResponseCode());
            if (h.getResponseCode() != 404) {
                isValid = true;
            }
        } catch (Exception e) {

        }
        return isValid;
    }

}

5 个答案:

答案 0 :(得分:2)

我会保持它返回一个int,只是让MalformedURLException成为一个特例,返回-1。

public static int getResponseCode(String urlString) {
    try {
        URL u = new URL(urlString);
        HttpURLConnection h =  (HttpURLConnection)  u.openConnection();
        h.setRequestMethod("GET");
        h.connect();
        return h.getResponseCode();

    } catch (MalformedURLException e) {
        return -1;
    }
}

编辑:看起来你似乎坚持使用布尔方法,正如我之前所说的那样有它的局限性但是应该可以用于演示目的。

没有理由第二次使用您拥有的方法找到所有元素。试试这个:

// Get all the links
List<WebElement> ele = driver.findElements(By.tagName("a"));
System.out.println("size:" + ele.size());
boolean isValid = false;
for (int i = 0; i < ele.size(); i++) {
    string nextHref = ele.get(i).getAttribute("href");
    isValid = getResponseCode(nextHref);
    if (isValid) {
        System.out.println("Valid Link:" + nextHref);

    }
    else {
        System.out.println("INVALID Link:" + nextHref);

    }
}

这是未经测试的代码,因此如果它不起作用,请提供更多详细信息,而不仅仅是说“它不起作用”,提供输出&amp;任何堆栈跟踪/错误消息(如果可能)。干杯

答案 1 :(得分:0)

看来,你的某些href属性包含的表达式并未标识为url&#39; s。 立即想到的是使用try catch块来识别这样的url。 请尝试以下代码。

package com.automation.test;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;

public class Test {
    public static int invalidLink;
    String currentLink;
    String temp;

    public static void main(String[] args) throws IOException {
        // Launch The Browser
        WebDriver driver = new FirefoxDriver();
        // Enter Url
        driver.get("file:///home/sighil/Desktop/file");

        // Get all the links url
        List<WebElement> ele = driver.findElements(By.tagName("a"));
        System.out.println("size:" + ele.size());
        boolean isValid = false;
        for (int i = 0; i < ele.size(); i++) {
            // System.out.println(ele.get(i).getAttribute("href"));
            isValid = getResponseCode(ele.get(i).getAttribute("href"));
            if (isValid) {
                System.out.println("ValidLinks:"
                        + ele.get(i).getAttribute("href"));
            } else {
                System.out.println("InvalidLinks:"
                        + ele.get(i).getAttribute("href"));
            }
        }

    }

    public static boolean getResponseCode(String urlString) {
        boolean isValid = false;
        try {
            URL u = new URL(urlString);
            HttpURLConnection h = (HttpURLConnection) u.openConnection();
            h.setRequestMethod("GET");
            h.connect();
            System.out.println(h.getResponseCode());
            if (h.getResponseCode() != 404) {
                isValid = true;
            }
        } catch (Exception e) {

        }
        return isValid;
    }

}

我修改了getResponseCode,根据url是有效(true)还是无效(false)返回布尔值。

希望这会对你有所帮助。

答案 2 :(得分:0)

在网络应用程序中,我们必须验证所有链接是否断开,这意味着单击链接后将显示“找不到页面”页面。 下面是代码:

import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver; 
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;

public class VerifyLinks {
    public static void main(String[] args) {
        WebDriver driver = new FirefoxDriver(); 
        driver.manage().window().maximize(); 
        driver.get("https://www.google.co.in");
        List< WebElement > allLink = driver.findElements(By.tagName("a")); 
        System.out.println("Total links are " + allLink.size());
        for (int i = 0; i < allLink.size(); i++) {
        WebElement ele = allLink.get(i); 
        String url = ele.getAttribute("href"); 
        verifyLinkActive(url);
    }
}
    public static void verifyLinkActive(String linkurl) {
        try {
           URL url = new URL(linkurl);
           HttpURLConnection httpUrlConnect = (HttpURLConnection) url.openConnection(); 
           httpUrlConnect.setConnectTimeout(3000); 
           httpUrlConnect.connect();
           if (httpUrlConnect.getResponseCode() == 200) {
              System.out.println(linkurl + " - " + httpUrlConnect.getResponseMessage());
           }
              if (httpUrlConnect.getResponseCode() == HttpURLConnection.HTTP_NOT_FOUND) {
                  System.out.println(linkurl + " - " + httpUrlConnect.getResponseMessage() 
                                     + " - " + HttpURLConnection.HTTP_NOT_FOUND);
              }
       }
       catch (Exception e) {
       }
   }
}

有关更多教程,请访问 https://www.jbktutorials.com/selenium

答案 3 :(得分:0)

步骤:
1.打开浏览器并导航到TestURL
2.抓取整个页面中的所有链接
3.检查步骤2中抓取的所有链接的HTTP状态代码(状态200正常,其他链接断开)
Selenium WebDriver Java代码:

WebDriver driver = new FirefoxDriver();
driver.get("<TestURL>");
List<WebElement> total_links = driver.findElements(By.tagName("a"));
System.out.println("Total Number of links: " + total_links.size());
for(int i = 0; i < total_links.size(); i++){
String url = total_links.get(i).getAttribute("href");
int resp_Code = 0;
try{
HttpResponse urlresp = new DefaultHttpClient().execute(new HttpGet(url));
resp_Code = urlresp.getStatusLine().getStatusCode();
}catch(Exception e){
}
if(resp_Code >= 400){
System.out.println(url + " is a broken link");
}
else{
System.out.println(url + " is a valid link");
}
}

答案 4 :(得分:-1)

  //allHref -for count of actual active links==after if statement filter
List<WebElement> allHref = new ArrayList<WebElement>();
    List<WebElement> linklist = driver.findElements(By.tagName("a"));

    for (int i = 0; i < linklist.size(); i++) {
        if (linklist.get(i).getAttribute("href").contains("https:")
                && linklist.get(i).getAttribute("href") != null) {
            System.out.println(linklist.get(i).getAttribute("href"));
            
            HttpURLConnection connection = (HttpURLConnection) new URL(linklist.get(i).getAttribute("href"))
                    .openConnection();
            connection.connect();
            String response = connection.getResponseMessage();
            connection.disconnect();
            System.out.println(linklist.get(i).getAttribute("href") + "R=e=s=p=o=n=s=e=>" + response);
            allHref.add(linklist.get(i));
        }
        
    }