我试图获取一段时间内频道订户数量的数据,以便为它拟合一些很酷的图表。该程序非常糟糕,仅从https://socialblade.com/youtube/user/pewdiepie/realtime中提取HTML,然后找到实时子计数的位置。由于某种原因,我得到的HTML仅每小时更改一次,因此我没有得到想要的频繁数据(与缓存有关吗?)。我对网络东西在Java中的工作方式不甚了解,我只是想将一些东西放在一起,实际上只是想获得一种简单的方法来获取数据,以便可以对此应用一些机器学习或LoggerPro曲线拟合。我找不到在Google上搜索问题的简便方法,因为我不确定自己到底是什么问题。哦,如果我每隔10秒左右自动连接到他们的站点,是否可以将它视为DOS攻击?
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
public class Main {
public static void main(String[] args) throws Exception {
//String data = "";
PrintWriter out = new PrintWriter("pewDieSubs"+ System.currentTimeMillis()+".txt");
long lastTime = System.currentTimeMillis();
long deltaTime = 0;
System.setProperty("http.agent", "Chrome");
URL url = new URL("https://socialblade.com/youtube/user/pewdiepie/realtime");
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
String inputLine;
String lastInputLine = "";
while ((inputLine = in.readLine()) != null) {
if (inputLine.contains("<p id=\"rawCount\" style=\"display: none;\">")) {
if (!inputLine.equals(lastInputLine)) {
lastInputLine = inputLine;
deltaTime = System.currentTimeMillis() - lastTime;
lastTime = System.currentTimeMillis();
System.out.println(inputLine);
String tmp = "";
for (int i = 0; i < 8; i++) {
tmp = tmp + inputLine.charAt(40 + i);
}
System.out.println(tmp + " --- deltaTime = " + deltaTime);
//data = data + "\n" + lastTime + " " + tmp;
out.println(lastTime + " " + tmp);
out.flush();
}
in.close();
in = new BufferedReader(new InputStreamReader(url.openStream()));
Thread.sleep(10000);
}
}
in.close();
out.close();
}
}