我已经做过一个演示程序,可以通过静态网站进行抓取,但是不断更改其数据的网站又如何呢? 例如https://www.oanda.com/currency/live-exchange-rates/
我应该如何解决我的问题?
唯一想到的方法是每秒发送200个请求,但是我不会被禁止吗?
浏览器如何处理此类网站,我如何用Java代码复制这些网站?
答案 0 :(得分:0)
浏览器正在每个时间段https://www.oanda.com/lfr/rates_lrrr执行一次数据请求。代替解析html页面,您应该尝试找出它们的数据API。
答案 1 :(得分:0)
Selenium Web Driver非常棒。
下载Selenium jar,并将其包含在您的构建路径中:https://www.seleniumhq.org/download/ 从此处下载ChromeDriver:http://chromedriver.chromium.org/downloads
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.ie.InternetExplorerDriver;
public class ExchangeRates {
public static void main(String[] args) throws InterruptedException {
//Chrome Driver
System.setProperty("webdriver.chrome.driver", "C:\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
//Internet Explorer Driver
//System.setProperty("webdriver.ie.driver", "C:\\MicrosoftWebDriver.exe");
//WebDriver driver = new InternetExplorerDriver();
driver.get("https://www.oanda.com/currency/live-exchange-rates/");
for(int i = 0; i < 10; i++) {
getExchangeRates(driver);
Thread.sleep(1000);
}
driver.close();
}
private static void getExchangeRates(WebDriver driver) {
String exchangeRateJSON = runJavascript(driver, "var rateRows = $('.rate_row'); " +
"var exchangeRates = []; " +
"for(var i = 0; i < rateRows.length; i++) { " +
" var row = $(rateRows[i]); " +
" var lbls = row.find(\".inline.title.left a\"); " +
" var vals = row.find(\".inline.value.right\"); " +
" var lbl1 = $(lbls[0]).text(); " +
" var lbl1Parts = lbl1.split(\"/\"); " +
" exchangeRates.push({ " +
" fromCur:lbl1Parts[0], " +
" toCur:lbl1Parts[1], " +
" bid:parseFloat($(vals[1]).text()), " +
" ask:parseFloat($(vals[0]).text()) " +
" }); " +
" var lbl2 = $(lbls[1]).text(); " +
" var lbl2Parts = lbl2.split(\"/\"); " +
" exchangeRates.push({ " +
" fromCur:lbl2Parts[0], " +
" toCur:lbl2Parts[1], " +
" bid:parseFloat($(vals[3]).text()), " +
" ask:parseFloat($(vals[2]).text()) " +
" }); " +
"} " +
"return JSON.stringify(exchangeRates);");
System.out.println(exchangeRateJSON);
}
public static String runJavascript(WebDriver driver, String script) {
WebElement body = driver.findElement(By.cssSelector("body"));
return (String)((JavascriptExecutor)driver).executeScript(script, body);
}
}
Selenium Webdriver的基本前提是您可以打开一个网页,然后执行诸如在页面上下文中运行Javascript之类的操作。这样可以防止您被检测为机器人。
javascript返回的JSON如下所示:
[
{
"fromCur": "EUR",
"toCur": "USD",
"bid": 1.13949,
"ask": 1.13962
},
{
"fromCur": "USD",
"toCur": "EUR",
"bid": 0.87749,
"ask": 0.87759
},
{
"fromCur": "GBP",
"toCur": "USD",
"bid": 1.27602,
"ask": 1.27619
},
{
"fromCur": "USD",
"toCur": "GBP",
"bid": 0.78358,
"ask": 0.78369
},
{
"fromCur": "USD",
"toCur": "CAD",
"bid": 1.31391,
"ask": 1.31408
},
{
"fromCur": "CAD",
"toCur": "USD",
"bid": 0.76099,
"ask": 0.76109
},
{
"fromCur": "USD",
"toCur": "CHF",
"bid": 0.9936,
"ask": 0.99379
},
{
"fromCur": "CHF",
"toCur": "USD",
"bid": 1.00625,
"ask": 1.00644
},
{
"fromCur": "USD",
"toCur": "JPY",
"bid": 110.752,
"ask": 110.765
},
{
"fromCur": "JPY",
"toCur": "USD",
"bid": 0.00903,
"ask": 0.00903
},
{
"fromCur": "EUR",
"toCur": "GBP",
"bid": 0.89291,
"ask": 0.89308
},
{
"fromCur": "GBP",
"toCur": "EUR",
"bid": 1.11972,
"ask": 1.11993
},
{
"fromCur": "EUR",
"toCur": "CHF",
"bid": 1.13236,
"ask": 1.13252
},
{
"fromCur": "CHF",
"toCur": "EUR",
"bid": 0.88299,
"ask": 0.88311
},
{
"fromCur": "AUD",
"toCur": "USD",
"bid": 0.72642,
"ask": 0.72654
},
{
"fromCur": "USD",
"toCur": "AUD",
"bid": 1.37639,
"ask": 1.37661
},
{
"fromCur": "EUR",
"toCur": "JPY",
"bid": 126.207,
"ask": 126.226
},
{
"fromCur": "JPY",
"toCur": "EUR",
"bid": 0.00792,
"ask": 0.00792
},
{
"fromCur": "GBP",
"toCur": "JPY",
"bid": 141.328,
"ask": 141.355
},
{
"fromCur": "JPY",
"toCur": "GBP",
"bid": 0.00707,
"ask": 0.00708
},
{
"fromCur": "EUR",
"toCur": "AUD",
"bid": 1.56854,
"ask": 1.56877
},
{
"fromCur": "AUD",
"toCur": "EUR",
"bid": 0.63744,
"ask": 0.63754
},
{
"fromCur": "EUR",
"toCur": "CZK",
"bid": 25.75355,
"ask": 25.78107
},
{
"fromCur": "CZK",
"toCur": "EUR",
"bid": 0.03879,
"ask": 0.03883
},
{
"fromCur": "EUR",
"toCur": "HUF",
"bid": 323.69,
"ask": 324.089
},
{
"fromCur": "HUF",
"toCur": "EUR",
"bid": 0.00309,
"ask": 0.00309
},
{
"fromCur": "EUR",
"toCur": "NZD",
"bid": 1.73338,
"ask": 1.73374
},
{
"fromCur": "NZD",
"toCur": "EUR",
"bid": 0.57679,
"ask": 0.57691
},
{
"fromCur": "EUR",
"toCur": "SEK",
"bid": 10.39725,
"ask": 10.39991
},
{
"fromCur": "SEK",
"toCur": "EUR",
"bid": 0.09615,
"ask": 0.09618
},
{
"fromCur": "EUR",
"toCur": "SGD",
"bid": 1.56877,
"ask": 1.5692
},
{
"fromCur": "SGD",
"toCur": "EUR",
"bid": 0.63727,
"ask": 0.63744
},
{
"fromCur": "EUR",
"toCur": "CAD",
"bid": 1.49727,
"ask": 1.49748
},
{
"fromCur": "CAD",
"toCur": "EUR",
"bid": 0.66779,
"ask": 0.66788
},
{
"fromCur": "EUR",
"toCur": "DKK",
"bid": 7.45298,
"ask": 7.45446
},
{
"fromCur": "DKK",
"toCur": "EUR",
"bid": 0.13415,
"ask": 0.13417
},
{
"fromCur": "EUR",
"toCur": "NOK",
"bid": 9.53927,
"ask": 9.54229
},
{
"fromCur": "NOK",
"toCur": "EUR",
"bid": 0.1048,
"ask": 0.10483
},
{
"fromCur": "EUR",
"toCur": "PLN",
"bid": 4.31208,
"ask": 4.31726
},
{
"fromCur": "PLN",
"toCur": "EUR",
"bid": 0.23163,
"ask": 0.23191
},
{
"fromCur": "EUR",
"toCur": "TRY",
"bid": 7.95697,
"ask": 7.97624
},
{
"fromCur": "TRY",
"toCur": "EUR",
"bid": 0.12537,
"ask": 0.12568
},
{
"fromCur": "EUR",
"toCur": "ZAR",
"bid": 16.43966,
"ask": 16.4571
},
{
"fromCur": "ZAR",
"toCur": "EUR",
"bid": 0.06076,
"ask": 0.06083
},
{
"fromCur": "USD",
"toCur": "CNH",
"bid": 6.90501,
"ask": 6.90585
},
{
"fromCur": "CNH",
"toCur": "USD",
"bid": 0.1448,
"ask": 0.14482
},
{
"fromCur": "USD",
"toCur": "DKK",
"bid": 6.53998,
"ask": 6.5415
},
{
"fromCur": "DKK",
"toCur": "USD",
"bid": 0.15287,
"ask": 0.15291
},
{
"fromCur": "USD",
"toCur": "HUF",
"bid": 284.084,
"ask": 284.37
},
{
"fromCur": "HUF",
"toCur": "USD",
"bid": 0.00352,
"ask": 0.00352
},
{
"fromCur": "USD",
"toCur": "MXN",
"bid": 19.1822,
"ask": 19.18824
},
{
"fromCur": "MXN",
"toCur": "USD",
"bid": 0.05212,
"ask": 0.05213
},
{
"fromCur": "USD",
"toCur": "PLN",
"bid": 3.78415,
"ask": 3.78769
},
{
"fromCur": "PLN",
"toCur": "USD",
"bid": 0.26401,
"ask": 0.26426
},
{
"fromCur": "USD",
"toCur": "SEK",
"bid": 9.12374,
"ask": 9.12617
},
{
"fromCur": "SEK",
"toCur": "USD",
"bid": 0.10958,
"ask": 0.1096
},
{
"fromCur": "USD",
"toCur": "THB",
"bid": 33.316,
"ask": 33.434
},
{
"fromCur": "THB",
"toCur": "USD",
"bid": 0.02991,
"ask": 0.03002
},
{
"fromCur": "USD",
"toCur": "ZAR",
"bid": 14.42721,
"ask": 14.44086
},
{
"fromCur": "ZAR",
"toCur": "USD",
"bid": 0.06925,
"ask": 0.06931
},
{
"fromCur": "USD",
"toCur": "CZK",
"bid": 22.60223,
"ask": 22.61947
},
{
"fromCur": "CZK",
"toCur": "USD",
"bid": 0.04421,
"ask": 0.04424
},
{
"fromCur": "USD",
"toCur": "HKD",
"bid": 7.84967,
"ask": 7.85
},
{
"fromCur": "HKD",
"toCur": "USD",
"bid": 0.12739,
"ask": 0.12739
},
{
"fromCur": "USD",
"toCur": "INR",
"bid": 69.997,
"ask": 70.102
},
{
"fromCur": "INR",
"toCur": "USD",
"bid": 0.01426,
"ask": 0.01429
},
{
"fromCur": "USD",
"toCur": "NOK",
"bid": 8.3705,
"ask": 8.37378
},
{
"fromCur": "NOK",
"toCur": "USD",
"bid": 0.11942,
"ask": 0.11947
},
{
"fromCur": "USD",
"toCur": "SAR",
"bid": 3.7495,
"ask": 3.75125
},
{
"fromCur": "SAR",
"toCur": "USD",
"bid": 0.26658,
"ask": 0.2667
},
{
"fromCur": "USD",
"toCur": "SGD",
"bid": 1.37673,
"ask": 1.37695
},
{
"fromCur": "SGD",
"toCur": "USD",
"bid": 0.72624,
"ask": 0.72636
},
{
"fromCur": "USD",
"toCur": "TRY",
"bid": 6.97585,
"ask": 7.00085
},
{
"fromCur": "TRY",
"toCur": "USD",
"bid": 0.14284,
"ask": 0.14335
},
{
"fromCur": "GBP",
"toCur": "AUD",
"bid": 1.75644,
"ask": 1.75679
},
{
"fromCur": "AUD",
"toCur": "GBP",
"bid": 0.56922,
"ask": 0.56933
},
{
"fromCur": "GBP",
"toCur": "CHF",
"bid": 1.26798,
"ask": 1.26827
},
{
"fromCur": "CHF",
"toCur": "GBP",
"bid": 0.78848,
"ask": 0.78866
},
{
"fromCur": "GBP",
"toCur": "ZAR",
"bid": 18.4094,
"ask": 18.42928
},
{
"fromCur": "ZAR",
"toCur": "GBP",
"bid": 0.05426,
"ask": 0.05432
},
{
"fromCur": "GBP",
"toCur": "SGD",
"bid": 1.75673,
"ask": 1.75725
},
{
"fromCur": "SGD",
"toCur": "GBP",
"bid": 0.56907,
"ask": 0.56924
},
{
"fromCur": "AUD",
"toCur": "JPY",
"bid": 80.454,
"ask": 80.47
},
{
"fromCur": "JPY",
"toCur": "AUD",
"bid": 0.01243,
"ask": 0.01243
},
{
"fromCur": "AUD",
"toCur": "SGD",
"bid": 1.00008,
"ask": 1.00041
},
{
"fromCur": "SGD",
"toCur": "AUD",
"bid": 0.99959,
"ask": 0.99992
},
{
"fromCur": "CAD",
"toCur": "JPY",
"bid": 84.284,
"ask": 84.301
},
{
"fromCur": "JPY",
"toCur": "CAD",
"bid": 0.01186,
"ask": 0.01186
},
{
"fromCur": "CHF",
"toCur": "JPY",
"bid": 111.446,
"ask": 111.472
},
{
"fromCur": "JPY",
"toCur": "CHF",
"bid": 0.00897,
"ask": 0.00897
},
{
"fromCur": "NZD",
"toCur": "CAD",
"bid": 0.86359,
"ask": 0.86388
},
{
"fromCur": "CAD",
"toCur": "NZD",
"bid": 1.15757,
"ask": 1.15796
},
{
"fromCur": "NZD",
"toCur": "USD",
"bid": 0.65731,
"ask": 0.65746
},
{
"fromCur": "USD",
"toCur": "NZD",
"bid": 1.52101,
"ask": 1.52135
},
{
"fromCur": "SGD",
"toCur": "JPY",
"bid": 80.432,
"ask": 80.455
},
{
"fromCur": "JPY",
"toCur": "SGD",
"bid": 0.01243,
"ask": 0.01243
},
{
"fromCur": "ZAR",
"toCur": "JPY",
"bid": 7.666,
"ask": 7.681
},
{
"fromCur": "JPY",
"toCur": "ZAR",
"bid": 0.13019,
"ask": 0.13045
},
{
"fromCur": "GBP",
"toCur": "CAD",
"bid": 1.67665,
"ask": 1.67698
},
{
"fromCur": "CAD",
"toCur": "GBP",
"bid": 0.59631,
"ask": 0.59643
},
{
"fromCur": "GBP",
"toCur": "NZD",
"bid": 1.94104,
"ask": 1.94156
},
{
"fromCur": "NZD",
"toCur": "GBP",
"bid": 0.51505,
"ask": 0.51519
},
{
"fromCur": "GBP",
"toCur": "PLN",
"bid": 4.82832,
"ask": 4.83505
},
{
"fromCur": "PLN",
"toCur": "GBP",
"bid": 0.20682,
"ask": 0.20711
},
{
"fromCur": "AUD",
"toCur": "CAD",
"bid": 0.95447,
"ask": 0.95468
},
{
"fromCur": "CAD",
"toCur": "AUD",
"bid": 1.04747,
"ask": 1.0477
},
{
"fromCur": "AUD",
"toCur": "NZD",
"bid": 1.10499,
"ask": 1.10529
},
{
"fromCur": "NZD",
"toCur": "AUD",
"bid": 0.90474,
"ask": 0.90499
},
{
"fromCur": "CAD",
"toCur": "CHF",
"bid": 0.75611,
"ask": 0.75636
},
{
"fromCur": "CHF",
"toCur": "CAD",
"bid": 1.32212,
"ask": 1.32256
},
{
"fromCur": "CAD",
"toCur": "SGD",
"bid": 1.0477,
"ask": 1.04801
},
{
"fromCur": "SGD",
"toCur": "CAD",
"bid": 0.95419,
"ask": 0.95447
},
{
"fromCur": "CHF",
"toCur": "ZAR",
"bid": 14.51736,
"ask": 14.53388
},
{
"fromCur": "ZAR",
"toCur": "CHF",
"bid": 0.0688,
"ask": 0.06888
},
{
"fromCur": "NZD",
"toCur": "JPY",
"bid": 72.797,
"ask": 72.82
},
{
"fromCur": "JPY",
"toCur": "NZD",
"bid": 0.01373,
"ask": 0.01374
},
{
"fromCur": "NZD",
"toCur": "SGD",
"bid": 0.90493,
"ask": 0.90529
},
{
"fromCur": "SGD",
"toCur": "NZD",
"bid": 1.10462,
"ask": 1.10506
},
{
"fromCur": "TRY",
"toCur": "JPY",
"bid": 15.823,
"ask": 15.864
},
{
"fromCur": "JPY",
"toCur": "TRY",
"bid": 0.06304,
"ask": 0.0632
},
{
"fromCur": "XAG",
"toCur": "JPY",
"bid": 1663.5,
"ask": 1665.5
},
{
"fromCur": "JPY",
"toCur": "XAG",
"bid": 0.0006,
"ask": 0.0006
},
{
"fromCur": "XAU",
"toCur": "JPY",
"bid": 132176,
"ask": 132220
},
{
"fromCur": "JPY",
"toCur": "XAU",
"bid": 0.00001,
"ask": 0.00001
},
{
"fromCur": "XAG",
"toCur": "USD",
"bid": 15.01958,
"ask": 15.03569
},
{
"fromCur": "USD",
"toCur": "XAG",
"bid": 0.06651,
"ask": 0.06658
},
{
"fromCur": "XAU",
"toCur": "USD",
"bid": 1193.441,
"ask": 1193.691
},
{
"fromCur": "USD",
"toCur": "XAU",
"bid": 0.00084,
"ask": 0.00084
}
]
答案 2 :(得分:0)
快速答案是无头浏览器。这些站点中的大多数都通过带有页面加载方法的套接字/ ajax /异步提供新信息。因此,要爬网动态网站是绝对正确的,最简单的方法是使其行为更像浏览器而不是脚本。有很多方法可以使用硒或phantomjs。通常,人们会使用诸如坚果之类的东西来大规模控制爬行流量。您可能还需要研究代理服务器场。