以编程方式登录网站返回登录HTML

时间:2017-03-25 06:57:31

标签: java html

  

我有以下Java程序,它应该以编程方式登录我的学生网站并返回成绩簿的HTML。但是,当我运行它时,我返回的是实际登录站点的HTML:

public class Scraper {

static String formData = "j_username=[username here]&j_password=[password here]";
static String link = "https://parents.mtsd.k12.nj.us/genesis/parents?tab1=studentdata&tab2=gradebook&tab3=weeklysummary&studentid=100916&action=form";


public static void main (String[] args){

   String display = postData(link,formData);
   System.out.print(display);


}

public static String postData (String url, String data){

    URL link = null;
    HttpURLConnection connection = null;
    StringBuffer stringBuffer = new StringBuffer();
    DataOutputStream dataOutputStream = null;
    String document = null;

    try {
        link = new URL(url);
    }catch (Exception e){System.out.print(e);}

    try {
        connection = (HttpURLConnection) link.openConnection();
    }catch (Exception e){System.out.print(e);}

    connection.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
    connection.setRequestProperty("Accept-Encoding","gzip, deflate, br");
    connection.setRequestProperty("Accept-Language","en-US,en;q=0.8");
    connection.setRequestProperty("Cookie","");
    connection.setDoInput(true);
    connection.setDoOutput(true);
    connection.setInstanceFollowRedirects(true);

    //setCookie(connection);

    //post data
    String postString = data;
    stringBuffer.append(postString);

    try {
        connection.connect();
    }catch (Exception e){System.out.print(e);}

    try {
        dataOutputStream = new DataOutputStream(connection.getOutputStream());
    }catch (Exception e){System.out.print(e);}

    try {
        IOUtils.write(stringBuffer.toString(),dataOutputStream,"UTF-8");
    }catch (Exception e){System.out.print(e);}

    //handle redirects
    try {
        if(connection.getResponseCode() == HttpURLConnection.HTTP_MOVED_TEMP || connection.getResponseCode() == HttpURLConnection.HTTP_MOVED_PERM || connection.getResponseCode() == HttpURLConnection.HTTP_SEE_OTHER){
            String redirectURL = connection.getHeaderField("Location");
            String cookie = connection.getHeaderField("Set-Cookie");

            URL redURL = null;

            try {
                redURL = new URL(redirectURL);
            }catch (Exception e){System.out.print(e);}

            connection = (HttpURLConnection)redURL.openConnection();
            connection.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
            connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
            connection.setRequestProperty("Accept-Encoding","gzip, deflate, br");
            connection.setRequestProperty("Accept-Language","en-US,en;q=0.8");
            connection.setRequestProperty("Cookie",cookie);
            connection.setDoInput(true);
            connection.setDoOutput(true);
            connection.setInstanceFollowRedirects(true);

        }
    }catch (Exception e){System.out.print(e);}

    InputStream inputStream = null;

    try {

        inputStream = connection.getInputStream();

        document = IOUtils.toString(inputStream,"UTF-8");

    } catch (Exception e){System.out.print(e);}

    return document;
}

public static void setCookie(HttpURLConnection httpURLConnection){

}


}
  

我最初尝试将初始POST请求发送到inspect元素(https://parents.mtsd.k12.nj.us/genesis/j_security_check)的网络选项卡中显示的请求URL,但是,这会返回错误而不返回HTML数据。任何帮助都表示赞赏,因为这是我第一次尝试这种性质的东西。

     

更新:在尝试登录过程后,我注意到我获得的cookie只包含" lastvisit = 95FD925038EF488AA22719B64FB5C4A3"但缺少" JSESSION_ID"。我不确定这是否是导致问题的。

     

更新#2 我编辑了我的代码以符合提供的建议,但是,我仍然获得登录页面的HTML而不是成绩数据页面。另外,我添加了print语句来显示我获得的cookie,并且我注意到" JsessionID" cookie丢失了,而我只得到了#34; lastvisited" cookie中。

public class Scraper {

static String formData = "user&pass";
static String link = "https://parents.mtsd.k12.nj.us/genesis/parents?tab1=studentdata&tab2=gradebook&tab3=weeklysummary&studentid=100916&action=form";


public static void main (String[] args){

   String display = postData(link,formData);
   System.out.print(display);


}

public static String postData (String url, String data){

    URL link = null;
    HttpURLConnection connection = null;
    StringBuffer stringBuffer = new StringBuffer();
    DataOutputStream dataOutputStream = null;
    String document = null;

    try {
        link = new URL(url);
    }catch (Exception e){System.out.print(e);}

    try {
        connection = (HttpURLConnection) link.openConnection();
        connection.setRequestMethod("GET");
    }catch (Exception e){System.out.print(e);}

    connection.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
    connection.setRequestProperty("Accept-Encoding","gzip, deflate, br");
    connection.setRequestProperty("Accept-Language","en-US,en;q=0.8");
    connection.setRequestProperty("Cookie","");
    connection.setDoInput(true);
    connection.setDoOutput(true);
    //connection.setInstanceFollowRedirects(true);

    //setCookie(connection);

    String cookie = connection.getHeaderField("Set-Cookie"); //get cookies for session


    //try {
    //    connection.connect();
    //}catch (Exception e){System.out.print(e);}

    //try {
    //    dataOutputStream = new DataOutputStream(connection.getOutputStream());
    //}catch (Exception e){System.out.print(e);}

    //try {
    //    IOUtils.write(stringBuffer.toString(),dataOutputStream,"UTF-8");
    //}catch (Exception e){System.out.print(e);}

    //handle redirects
    try {

            //post data
            String postString = data;
            stringBuffer.append(postString);

            URL redURL = null;

            try {
                redURL = new URL("https://parents.mtsd.k12.nj.us/genesis/j_security_check");
            }catch (Exception e){System.out.print(e);}

            connection = (HttpURLConnection)redURL.openConnection();
            connection.setRequestMethod("POST");
            connection.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
            connection.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
            connection.setRequestProperty("Accept-Encoding","gzip, deflate, br");
            connection.setRequestProperty("Accept-Language","en-US,en;q=0.8");
            connection.setRequestProperty("Cookie",cookie);
            connection.setDoInput(true);
            connection.setDoOutput(true);
            connection.setInstanceFollowRedirects(true);

            //connection.connect();

    }catch (Exception e){System.out.print(e);}

    InputStream inputStream = null;

    String cookie2 = null;

    try {
            dataOutputStream = new DataOutputStream(connection.getOutputStream());
        }catch (Exception e){System.out.print(e);}

        try {
            IOUtils.write(stringBuffer.toString(),dataOutputStream,"UTF-8");

            System.out.println(stringBuffer.toString());

            cookie2 = connection.getHeaderField("Set-Cookie"); //get cookies for session

        }catch (Exception e){System.out.print(e);}

    URL fLink = null;

    try {
        fLink = new URL("https://parents.mtsd.k12.nj.us/genesis/parents?tab1=studentdata&tab2=gradebook&tab3=weeklysummary&studentid=100916&action=form" + stringBuffer.toString());
    }catch (Exception e){System.out.print(e);}

    HttpURLConnection conn = null;

    try {
         conn = (HttpURLConnection) fLink.openConnection();
        conn.setRequestMethod("GET");
    }catch (Exception e){System.out.print(e);}

    conn.setRequestProperty("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    conn.setRequestProperty("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
    conn.setRequestProperty("Accept-Encoding","gzip, deflate, br");
    conn.setRequestProperty("Accept-Language","en-US,en;q=0.8");
    conn.setRequestProperty("Cookie",cookie);
    conn.setDoInput(true);
    conn.setDoOutput(true);



        try {

            conn.connect();
            inputStream = conn.getInputStream();

        document = IOUtils.toString(inputStream,"UTF-8");

    } catch (Exception e){System.out.print(e);}

    System.out.println("cookie -- " + cookie);
    System.out.println("cookie2 -- " + cookie2);


    return document;
}
}
  

我的cookie打印语句的输出如下(用于实验目的)

cookie -- lastvisit=A1753DA7F2454A03B58DF8CBD39C22C4; Expires=Tue,   27-Mar-2018 18:27:55 GMT
cookie2 -- null

enter image description here

6 个答案:

答案 0 :(得分:0)

GET URL实际上以返回的html响应。你想要特定的HTML,你可以使用不同的端点来实际返回你需要的html

答案 1 :(得分:0)

一些建议:

  1. 登录请求必须是POST请求,而不是GET请求。
  2. 您需要提供一个包含登录POST请求的cookie。因此,您可能希望首先向服务器发出get请求,并在后续POST中使用Set-Cookie响应值。

答案 2 :(得分:0)

此站点使用Java Servlet身份验证机制。当您尝试获取受密码保护的任何页面时,服务器会在内部发出返回登录页面内容的转发。在您发布有效的用户名/密码后,它会将您转发到请求的URL。

请注意,这不是重定向,它由应用程序服务器在内部管理,客户端无法看到服务器端发生的情况。

要使其正常工作,您需要向要检索的页面发出GET。服务器以登录页面内容和会话ID作为cookie进行响应。您必须保存会话ID并在发布到表单操作时发送它。

由于我没有有效的证书来测试我不能保证这会起作用。使用无效凭据时,服务器将发出一些重定向,最终会更改会话,可能是因为它已失效并重新创建。

请使用有效凭据尝试此操作,并告诉我它是否有效。如果成功登录时存在重定向,则可能需要禁用自动重定向并以编程方式执行,检查会话ID更改。根据应用程序服务器的不同,出于安全考虑,它可能会更改会话ID。

也就是说,当您发布有效凭证时,这可能会起作用。

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLEncoder;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;

public class Scraper {

    private static final String USERNAME = "user@test.com";
    private static final String PASSWORD = "secret";
    private static final String GET_URL  = "https://parents.mtsd.k12.nj.us/genesis/parents?tab1=studentdata&tab2=gradebook&tab3=weeklysummary&studentid=100916&action=form";
    private static final String POST_URL = "https://parents.mtsd.k12.nj.us/genesis/j_security_check";

    public static void main(String[] args) {
        String cookies = doGet(GET_URL);
        doPost(POST_URL, cookies);
    };

    /**
     * Send the initial GET request which will forward to the login page
     * and retrieve cookies sent by the server.
     * Cookies are formatted according to HTTP specification so they can be
     * passed to the next request Cookie header.
     * @param getURL URL to get
     */
    public static String doGet(String getURL) {
        StringBuilder formattedCookies = new StringBuilder();
        try {
            URL url = new URL(getURL);
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            InputStream is = conn.getInputStream();
            Map<String, List<String>> headers = conn.getHeaderFields();
            List<String> cookies = headers.get("Set-Cookie");
            ListIterator<String> it = cookies.listIterator();
            while (it.hasNext()) {
                String[] parts = it.next().split("; ");
                formattedCookies.append(parts[0]);
                if (it.hasNext()) {
                    formattedCookies.append("; ");
                }
            }
            System.out.println("\n\nGET OUTPUT");
            printContent(is);
        } catch (Exception e) {
            System.out.println(e.getLocalizedMessage());
        }
        return formattedCookies.toString();
    }

    /**
     * Post the form parameters and get page content.
     * @param postURL URL to post to
     * @param cookies The cookies to send
     */
    public static void doPost(String postURL, String cookies) {
        try {
            String postData = String.format("j_username=%s&j_password=%s",
                URLEncoder.encode(USERNAME, "UTF-8"), PASSWORD);
            URL url = new URL(postURL);
            HttpURLConnection conn = (HttpURLConnection) url.openConnection();
            conn.setRequestMethod("POST");
            conn.setDoOutput(true);
            conn.setInstanceFollowRedirects(true);
            conn.setRequestProperty("Cookie", cookies);
            OutputStreamWriter out = new OutputStreamWriter(conn.getOutputStream());
            out.write(postData);
            out.close();
            InputStream is = conn.getInputStream();
            System.out.println("\n\nPOST OUTPUT");
            printContent(is);
        } catch (Exception e) {
            System.out.println(e.getLocalizedMessage());
        }
    }    

    public static void printContent(InputStream is) throws IOException {
        BufferedReader reader = new BufferedReader(new InputStreamReader(is));
        String line;
        while ((line = reader.readLine()) != null) {
            System.out.println(line);
        }
    }

}

答案 3 :(得分:0)

您正在尝试从连接中检索 Set-Cookie 标头,如果您在标头中注意到该标头,则会有两个条目。一个是最后一个访问过的条目,另一个是 JSessionID 条目。

在您的Cookie检索中, JSessionID 条目将被忽略。将条目检索为List并正确设置。

    List<String> cookies = connection.getHeaderFields().get("Set-Cookie");
    for (String cookie : cookies) {
        connection.addRequestProperty("Cookie", cookie.split(";", 2)[0]);
    }

编辑:尝试以下代码。它给了我未知的用户名和密码。

import java.io.DataOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;

import org.apache.commons.io.IOUtils;

public class Scraper {

static String formData = "j_username=user&j_password=pass";
static String link = "https://parents.mtsd.k12.nj.us/genesis/parents?tab1=studentdata&tab2=gradebook&tab3=weeklysummary&studentid=100916&action=form";

public static void main(String[] args) {

    String display = postData(link, formData);
    System.out.print(display);

}

public static String postData(String url, String data) {

    URL link = null;
    HttpURLConnection connection = null;
    StringBuffer stringBuffer = new StringBuffer();
    DataOutputStream dataOutputStream = null;
    String document = null;

    try {
        link = new URL(url);
    } catch (Exception e) {
        System.out.print(e);
    }

    try {
        connection = (HttpURLConnection) link.openConnection();
        connection.setRequestMethod("GET");
    } catch (Exception e) {
        System.out.print(e);
    }

    connection.setRequestProperty("Accept",
            "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    connection.setRequestProperty("User-Agent",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
    connection.setRequestProperty("Accept-Encoding", "gzip, deflate, br");
    connection.setRequestProperty("Accept-Language", "en-US,en;q=0.8");
    // connection.setRequestProperty("Cookie","JSESSIONID=82714DD38C05837934F515C5E0BFD1E3;
    // lastvisit=3626F6FB689948B09CEF3DDA56116B7C");
    connection.setDoInput(true);
    connection.setDoOutput(true);
    // connection.setInstanceFollowRedirects(true);

    // setCookie(connection);

    // String cookie = connection.getHeaderField("Set-Cookie"); //get
    // cookies for session
    List<String> cookies = connection.getHeaderFields().get("Set-Cookie");

    // try {
    // connection.connect();
    // }catch (Exception e){System.out.print(e);}

    // try {
    // dataOutputStream = new
    // DataOutputStream(connection.getOutputStream());
    // }catch (Exception e){System.out.print(e);}

    // try {
    // IOUtils.write(stringBuffer.toString(),dataOutputStream,"UTF-8");
    // }catch (Exception e){System.out.print(e);}

    // handle redirects
    try {

        // post data
        String postString = data;
        stringBuffer.append(postString);

        URL redURL = null;

        try {
            redURL = new URL("https://parents.mtsd.k12.nj.us/genesis/j_security_check");
        } catch (Exception e) {
            System.out.print(e);
        }

        connection = (HttpURLConnection) redURL.openConnection();
        connection.setRequestMethod("POST");
        connection.setRequestProperty("Accept",
                "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
        connection.setRequestProperty("User-Agent",
                "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
        connection.setRequestProperty("Accept-Encoding", "gzip, deflate, br");
        connection.setRequestProperty("Accept-Language", "en-US,en;q=0.8");
        // connection.setRequestProperty("Cookie",cookie);
        for (String cookie : cookies) {
            connection.addRequestProperty("Cookie", cookie.split(";", 2)[0]);
        }
        connection.setDoInput(true);
        connection.setDoOutput(true);
        connection.setInstanceFollowRedirects(true);

        // connection.connect();

    } catch (Exception e) {
        System.out.print(e);
    }

    InputStream inputStream = null;

    String cookie2 = null;

    try {
        dataOutputStream = new DataOutputStream(connection.getOutputStream());
    } catch (Exception e) {
        System.out.print(e);
    }

    try {
        IOUtils.write(stringBuffer.toString(), dataOutputStream, "UTF-8");

        System.out.println(stringBuffer.toString());

        cookie2 = connection.getHeaderField("Set-Cookie"); // get cookies
                                                            // for session

    } catch (Exception e) {
        System.out.print(e);
    }

    URL fLink = null;

    try {
        fLink = new URL(
                "https://parents.mtsd.k12.nj.us/genesis/parents?tab1=studentdata&tab2=gradebook&tab3=weeklysummary&studentid=100916&action=form"
                        + stringBuffer.toString());
    } catch (Exception e) {
        System.out.print(e);
    }

    HttpURLConnection conn = null;

    try {
        conn = (HttpURLConnection) fLink.openConnection();
        conn.setRequestMethod("GET");
    } catch (Exception e) {
        System.out.print(e);
    }

    conn.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    conn.setRequestProperty("User-Agent",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
    conn.setRequestProperty("Accept-Encoding", "gzip, deflate, br");
    conn.setRequestProperty("Accept-Language", "en-US,en;q=0.8");
    // conn.setRequestProperty("Cookie",cookie);
    for (String cookie : cookies) {
        conn.addRequestProperty("Cookie", cookie.split(";", 2)[0]);
    }
    conn.setDoInput(true);
    conn.setDoOutput(true);

    try {

        conn.connect();
        inputStream = conn.getInputStream();

        document = IOUtils.toString(inputStream, "UTF-8");

    } catch (Exception e) {
        System.out.print(e);
    }

    for (String cookie : cookies) {
        System.out.println("cookie -- " + cookie);
    }

    System.out.println("cookie2 -- " + cookie2);

    return document;
}
} 

答案 4 :(得分:0)

所以你要做的就是首先向服务器发出GET请求而不首先登录凭证(到你要检索数据的页面,而不是实际的登录页面) - 这基本上可以让你检索和存储会话cookie。然后,您可以使用登录凭据和您的cookie向实际登录页面发出POST请求。它应该将您重定向到您最初发送请求的页面(您尝试从中抓取数据的页面。使用Postman或类似的东西以确保您以正确的方式发送正确的数据。祝您好运!

答案 5 :(得分:-1)

尝试使用 OkHttp库

  

https://github.com/square/okhttp

然后

 public static String postData(String url, String data){               
       try { 

OkHttpClient client = new OkHttpClient().newBuilder()
       .connectTimeout(180, TimeUnit.SECONDS)
       .readTimeout(300, TimeUnit.SECONDS)
       .writeTimeout(300, TimeUnit.SECONDS).build();






MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded"); 
          RequestBody body = RequestBody.create(mediaType, data);
 Request request = new Request.Builder()
.url(url)
.post(body)
 .addHeader("Accept","text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
 .addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36")
 .addHeader("Accept-Encoding","gzip, deflate, br")
.addHeader("Accept-Language","en-US,en;q=0.8")
 .build();
String body = client.newCall(request).execute().body().string();
  return  body;


         }catch(Exception e){ e.printStackTrace();  return “An error occurred”; }                                        
    }