我正试图通过jsoup抓取我公司的linkedin页面。当我尝试下面给出的代码时,我无法访问所需的html文件。返回的html文件仅包含顶部页眉和页脚。中间的信息没有显示出来。
我该如何解决这个问题?
这是我的代码:
String url = "https://www.linkedin.com/uas/login?goback=&trk=hb_signin";
Connection.Response response = Jsoup
.connect(url)
.method(Connection.Method.GET)
.execute();
Document responseDocument = response.parse();
Element loginCsrfParam = responseDocument
.select("input[name=loginCsrfParam]")
.first();
response = Jsoup.connect("https://www.linkedin.com/uas/login-submit")
.cookies(response.cookies())
.data("loginCsrfParam", loginCsrfParam.attr("value"))
.data("session_key", "MY_EMAIL")
.data("session_password", "MY_PASSW")
.method(Connection.Method.POST)
.followRedirects(true)
.execute();
Document document = response.parse();
System.out.println("Welcome "
+ document.select(".act-set-name-split-link").html());
String url2 = "https://www.linkedin.com/vsearch/f?type=all&keywords=BARCO+NV+kortrijk";
Response response2 = Jsoup.connect(url2)
.cookies(response.cookies())
.userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21")
.timeout(100*1000)
.ignoreContentType(true)
.execute();
Document docc = response2.parse();
String html = docc.html();
Document doc = Jsoup.connect(url2)
.cookies(response.cookies())
.userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21")
.timeout(100*1000)
.get();
String htmlstring = doc.html();