使用jsoup获取验证码图像

时间:2015-02-19 23:39:52

标签: java jsoup captcha

我正在尝试通过从我的学术URL中获取验证码来开发一个基于java-GUI的小应用程序,询问用户的用户名,密码和验证码,在登录后显示内容。但是我被困在登录页面本身提交表格后,网上的回复是

alert('Please enter correct code.'); window.history.go(-1);

代码

public Map cookies;
public void downloadCaptcha()throws Exception {
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
.timeout(300000)
.userAgent("Mozilla/5.0")
.method(Connection.Method.GET).execute();
cookies = response.cookies();
Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
.cookies(cookies)
.ignoreContentType(true)
.method(Connection.Method.GET).timeout(30000).execute();
FileOutputStream out = (new FileOutputStream(new java.io.File("F:\\abc.jpg")));
out.write(resultImageResponse.bodyAsBytes()); 
out.close();
System.out.println("Captcha Fetched");

}

下载验证码

public static void getData(String captacha)throws Exception{
Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
.userAgent("Mozilla/5.0")
.cookies(cookies)
.data("username",username)
.data("passwd",password)
.data("txtCaptcha",captacha)
.data("submit","Login")
.data("option","login")
.data("op2","login")
.data("lang","english")
.data("return","https://academics.ddn.upes.ac.in/upes/index.php?option=com_content&task=view&id=53&Itemid=6420")
.data("message","0")
.data("j1643f05a0c7fc7910424fb3fc4fbbb6f","1")
.timeout(0)
.method(Connection.Method.POST)
.execute();
cookies = response.cookies();
System.out.println(response.cookies());
Document doc= response.parse();
FileWriter fr = new FileWriter("F:\\response.html");
PrintWriter pw= new PrintWriter(fr);
pw.println(doc.toString());
pw.close();
fr.close();
}

resonse.cookies()提供输出{PHPSESSID=ai0r017bmb55gv0m4ikeu6jfc6, 61c78a27855d239ae8682ff6befaa989=5ae2e5baf548bc293c943d3416e7d400}

网站为https://academics.ddn.upes.ac.in/upes/index.php

请指出我的错误。

1 个答案:

答案 0 :(得分:2)

您需要进行两项更改才能使代码正常工作:

1 - 您需要获取第二次调用(图像下载)返回的cookie并将其添加到之前的cookie中。

2 - 如果你看到字段" j1643f05a0c7fc7910424fb3fc4fbbb6f"非常可疑,实际上该字段是可变的,您需要在表单中选择隐藏的输入并使用它。

3(额外) - 情况并非如此,但有些服务器会抱怨您是否发送了一些标题,例如Accept,Accept-Encoding,Accept-Language ......

当我使用您的代码进行这些更改时,我得到:

<script>alert('Incorrect username or password. Please try again.'); window.history.go(-1);</script> 

当然我没有用户/通行证,我认为您将获得所需的页面。

包含必要更改的代码是:

public class SO_28619161 {


    public Map cookies;
    private String username = "u";
    private String password = "p";

    public HashMap<String,String> downloadCaptcha()throws Exception {
        Connection.Response response = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/")
                .timeout(300000)
                .userAgent("Mozilla/5.0")
                .method(Connection.Method.GET).execute();

        //nice
        cookies = response.cookies();

        //now we will load form's inputs 
        Document doc = response.parse();
        Elements fields = doc.select("form input");
        HashMap<String,String> formFields = new HashMap<String, String>();
        for (Element field : fields ){
            formFields.put(field.attr("name"), field.attr("value"));
        }

        Connection.Response resultImageResponse = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/modules/create_image.php")
                .cookies(cookies)
                .ignoreContentType(true)
                .method(Connection.Method.GET).timeout(30000).execute();

        //we will need these cookies also!
        cookies.putAll(resultImageResponse.cookies());

        FileOutputStream out = (new FileOutputStream(new java.io.File("abc.jpg")));
        out.write(resultImageResponse.bodyAsBytes()); 
        out.close();

        System.out.println("Captcha Fetched");

        return formFields;
    }

    public void getData(HashMap<String, String> formFields) throws Exception{
        Connection conn = Jsoup.connect("https://academics.ddn.upes.ac.in/upes/index.php")
                .userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64; rv:35.0) Gecko/20100101 Firefox/35.0")
                //not neccesary but these extra headers won't hurt
                .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
                .header("Accept-Encoding", "gzip, deflate")
                .header("Accept-Language", "es-ES,es;q=0.8,en-US;q=0.5,en;q=0.3")
                .header("Host", "academics.ddn.upes.ac.in")
                .header("Referer", "https://academics.ddn.upes.ac.in/upes/index.php")
                .cookies(cookies)
                .timeout(0)
                .method(Connection.Method.POST);

        //we send the fields
        conn.data(formFields);

        Response response = conn.execute();
        cookies = response.cookies();
        System.out.println(response.cookies());
        Document doc= response.parse();
        FileWriter fr = new FileWriter("response.html");
        PrintWriter pw= new PrintWriter(fr);
        pw.println(doc.toString());
        System.out.println(doc.toString());
        pw.close();
        fr.close();
    }

    private void run() throws Exception, IOException {
        HashMap<String, String> formFields = downloadCaptcha();

        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
        String captcha = br.readLine();

        //we set user/pass and captcha
        formFields.put("username", username);
        formFields.put("passwd", password);
        formFields.put("txtCaptcha", captcha);

        getData(formFields);
    }

    public static void main(String[] args) throws Exception {
        SO_28619161 main = new SO_28619161();
        main.run();
    }

}