我拥有的程序是一个网络爬虫,可访问多个视频游戏统计信息跟踪网站,并收集某些姓名输入到Google电子表格中的玩家的统计信息。该程序仅供个人使用,因此我对禁用证书验证并使自己承受与之相关的风险感到满意。目前,在运行代码时出现以下错误:
Exception in thread "main" org.jsoup.HttpStatusException: HTTP error
fetching URL. Status=403,
URL=https://overwatchtracker.com/profile/pc/global/ClaySymp-1875
at
org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:776)
at
org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:722)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:306)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:295)
at HtmlParser.main(HtmlParser.java:134)
我当前的代码是:
`
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.security.cert.CertificateException;
import java.util.ArrayList;
import javax.net.ssl.*;
import java.security.SecureRandom;
import java.security.cert.X509Certificate;
public class HtmlParser {
public static void main(String[] args) throws IOException {
// Create a trust manager that does not validate certificate chains
TrustManager[] trustAllCerts = new TrustManager[] {
new X509TrustManager() {
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
@Override
public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException
{
// TODO Auto-generated method stub
}
@Override
public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException
{
// TODO Auto-generated method stub
}
}};
// Ignore differences between hostnames
HostnameVerifier hv = new HostnameVerifier() {
@Override
public boolean verify(String arg0, SSLSession arg1)
{
// TODO Auto-generated method stub
return true;
}
};
// Install the all-trusting trust manager
try {
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
HttpsURLConnection.setDefaultHostnameVerifier(hv);
} catch (Exception e) {}
// ROCKET LEAGUE PLAYER NAME SETUP
ArrayList<String> rlList = new ArrayList<String>();
Document rlDoc = Jsoup.connect(
"https://docs.google.com/spreadsheets/d/e/2PACX-1vRGC6L6f9jAdqE3YsVLQRxlH3pHWj451XcxOaluQoWSJsZBwUiGeFmzzrogINy81Ng_Ic-y-wM5sPoC/pubhtml")
.get();
for (Element table : rlDoc.select("table.waffle")) {
for (Element row : table.select("tr")) {
Elements tds = row.select("td");
if (tds.size() > 0) {
rlList.add(tds.get(0).text());
}
}
}
// ROCKET LEAGUE STAT COLLECTION
for (int i = 0; i < rlList.size(); i++) {
String full = "";
String url = "https://rocketleague.tracker.network/profile/xbox/" + rlList.get(i);
Document rl = Jsoup.connect(url).get();
System.out.println(rlList.get(i));
for (Element table : rl.select("table.card-table.items")) {
int j = 0;
for (Element row : table.select("tr")) {
Elements tds = row.select("td");
if (tds.size() > 5) {
if (tds.get(1).text().substring(0, 6).equalsIgnoreCase("ranked")) {
String td2 = tds.get(2).text();
String td4 = tds.get(4).text();
if (td2.equals(""))
tds.get(2).html("N/A");
if (td4.equals(""))
tds.get(4).html("N/A");
if (j == 0) {
full = tds.get(1).text().substring(16) + " " + tds.get(2).text() + " "
+ tds.get(3).text() + " " + tds.get(4).text() + " " + tds.get(5).text();
} else if (j == 1) {
full = tds.get(1).text().substring(19) + " " + tds.get(2).text() + " "
+ tds.get(3).text() + " " + tds.get(4).text() + " " + tds.get(5).text();
} else if (j == 2) {
full = tds.get(1).text().substring(25) + " " + tds.get(2).text() + " "
+ tds.get(3).text() + " " + tds.get(4).text() + " " + tds.get(5).text();
} else if (j == 3) {
full = tds.get(1).text().substring(20) + " " + tds.get(2).text() + " "
+ tds.get(3).text() + " " + tds.get(4).text() + " " + tds.get(5).text();
}
System.out.println(full);
j++;
}
}
}
}
System.out.println("\n");
}
// END ROCKET LEAGUE
// OVERWATCH PLAYER NAME SETUP
ArrayList<String> owList = new ArrayList<String>();
Document owDoc = Jsoup.connect(
"https://docs.google.com/spreadsheets/d/e/2PACX-1vSZ7qiTCaOniDQzn4nq0tLdT_pATKa7Y1k30CebIgoxxEd04PsRRc-K2LjfzD_rSrsREluLS4oKtAPn/pubhtml")
.get();
for (Element table : owDoc.select("table.waffle")) {
for (Element row : table.select("tr")) {
Elements tds = row.select("td");
if (tds.size() > 0) {
owList.add(tds.get(0).text());
}
}
}
// OVERWATCH STAT COLLECTION
for (int i = 0; i < owList.size(); i++) {
String url = "https://overwatchtracker.com/profile/pc/global/" + owList.get(i);
System.out.println(owList.get(i));
Document ow = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36").get();
Elements elements = ow.select("div.infobox-container");
for (Element e : elements) {
System.out.println(e.text());
}
System.out.println("\n");
}
// END OVERWATCH
// FORTNITE PLAYER NAME SETUP
ArrayList<String> fList = new ArrayList<String>();
Document fDoc = Jsoup.connect(
"https://docs.google.com/spreadsheets/d/e/2PACX-1vQuZI5-dcO1vAzB_Yl-FWURT_tp2Wog7G2Xg8_2BDZRgPyGJFUDFh0w_ryIkgrcJ_U1ogFdez-R4ngj/pubhtml")
.get();
for (Element table : fDoc.select("table.waffle")) {
for (Element row : table.select("tr")) {
Elements tds = row.select("td");
if (tds.size() > 0) {
fList.add(tds.get(0).text());
}
}
}
// FORTNITE STAT COLLECTION
for (int i = 0; i < fList.size(); i++) {
String full = "";
System.out.println(fList.get(i));
Document f = Jsoup.connect("https://fortnitetracker.com/profile/pc/" + fList.get(i)).userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36").get();
Elements elements = f.select("div.trn-defstats.trn-defstats--width4");
for (Element e : elements) {
System.out.println(e.text());
}
}
System.out.println("\n");
// END FORTNITE
}
}
我收到的当前完整输出是:
EZJ7
Platinum I Division II N/A 657 (Top 33%) N/A 0 Win Streak: 6
Unranked Division I N/A 1,090 (Top 21%) N/A 0
Unranked Division I N/A 568 (Top 60%) N/A 0 Losing Streak: 5
Diamond II Division III N/A 1,062 (Top 25%) N/A 0
WombatWarfare11
Diamond III Division II N/A 957 (Top 4.2%) N/A 0
Champion II Division II ~3 1,311 (Top 6%) ~27 74
Diamond II Division II N/A 912 (Top 11%) N/A 0
Champion II Division I ~12 1,283 (Top 7%) ~25 17 Losing Streak: 3
BlueFlash19
Gold III Division IV N/A 621 (Top 42%) N/A 0 Losing Streak: 5
Diamond II Division II N/A 1,030 (Top 25%) N/A 0 Losing Streak: 6
Platinum II Division I N/A 696 (Top 35%) N/A 0 Losing Streak: 2
Diamond III Division III N/A 1,161 (Top 15%) N/A 0 Losing Streak: 3
ClaySymp-1875
Exception in thread "main" org.jsoup.HttpStatusException: HTTP error
fetching URL. Status=403,
URL=https://overwatchtracker.com/profile/pc/global/ClaySymp-1875
at
org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:776)
at
org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:722)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:306)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:295)
at HtmlParser.main(HtmlParser.java:134)
请随时对我做错的任何事情打电话给我,因为我是刚接触网络的人,但是我正在寻找一种解决此类错误的方法,该解决方案不如我的贫民窟,因为我的自签名证书解决方案