我正在尝试将以下顺序代码转换为多线程代码,但结果对我来说听起来不合理。
package com.net;
import jdk.incubator.http.HttpClient;
import jdk.incubator.http.HttpRequest;
import jdk.incubator.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class Req {
HttpClient client = HttpClient.newHttpClient();
private String getResource(String someUrl) {
String body = "";
try {
URI url = new URI(someUrl);
HttpRequest request = HttpRequest.newBuilder()
.uri(url).GET().build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandler.asString());
body = response.body();
} catch (URISyntaxException e) {
System.out.println("URL " + someUrl + "is not valid");
} catch (IOException | InterruptedException e) {
System.out.println(e.getMessage());
}
return body;
}
public static void main(String[] args){
String[] topIranianSites = {
"https://www.aparat.com/",
"http://www.varzesh3.com/",
"http://namnak.com/",
"http://www.telewebion.com/",
"https://divar.ir/",
"https://www.ninisite.com/",
"https://www.blogfa.com/",
"http://www.namasha.com/",
"http://www.yjc.ir/"
};
Req singleThreadReq = new Req();
float totalElapsedTime = 0F;
for (String site : topIranianSites){
long fetchStartTime = System.currentTimeMillis();
String html = singleThreadReq.getResource(site);
float elapsed = (float) (System.currentTimeMillis() - fetchStartTime) / 1000;
Document doc = Jsoup.parse(html);
System.out.println("It took " + elapsed + " seconds to fetch " + site + " with title " + doc.title());
totalElapsedTime += elapsed;
}
System.out.println("Total Elapsed Time: " + totalElapsedTime + "\nTotal Number of sites: " + topIranianSites.length);
}
}
这是输出
WARNING: Using incubator modules: jdk.incubator.httpclient
It took 2.622 seconds to fetch https://www.aparat.com/ with title آپارات - سرویس اشتراک ویدیو
It took 0.455 seconds to fetch http://www.varzesh3.com/ with title
It took 0.521 seconds to fetch http://namnak.com/ with title نمناک
It took 2.172 seconds to fetch http://www.telewebion.com/ with title تلوبیون | مرجع پخش زنده و دانلود فیلم ، سریال و سایر برنامه های تلویزیون
General SSLEngine problem
It took 0.229 seconds to fetch https://divar.ir/ with title
It took 1.769 seconds to fetch https://www.ninisite.com/ with title نی نی سایت | راهنمای بارداری و بچه داری
Received fatal alert: handshake_failure
It took 0.382 seconds to fetch https://www.blogfa.com/ with title
It took 2.641 seconds to fetch http://www.namasha.com/ with title نماشا - سرویس رایگان اشتراک ویدیو
It took 0.503 seconds to fetch http://www.yjc.ir/ with title
Total Elapsed Time: 11.294001
Total Number of sites: 9
从顺序输出我想正确的多线程代码应该花费大约2.8秒来获取所有9个站点。但我的多线程代码实现需要更多
package com.net;
import jdk.incubator.http.HttpClient;
import jdk.incubator.http.HttpRequest;
import jdk.incubator.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class ReqThreaded implements Runnable {
class Site {
String url;
String title;
float fetchTime;
}
private HttpClient client = HttpClient.newHttpClient();
private Thread[] threadPool;
private String[] rawSites;
private Site[] sitesArr;
private int sitesDone = 0;
long startTime = System.currentTimeMillis();
float totalElapsed = 0F;
public ReqThreaded(String[] sites) {
threadPool = new Thread[sites.length];
sitesArr = new Site[sites.length];
rawSites = sites;
for (int i = 0; i < sites.length; i++) {
startThread(i);
}
while (sitesDone < sites.length) {
try {
Thread.sleep(1000);
totalElapsed = (float) (System.currentTimeMillis() - startTime) / 1000;
System.out.print("\rElapsed time: " + totalElapsed + "Sites Done: " + sitesDone);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
System.out.println("\n\nStatistics:\n\n");
for (Site someSite : sitesArr) {
System.out.println("URL " + someSite.url + "\nTitle: " + someSite.title + "\nFetch Time: " + someSite.fetchTime + "\n\n");
}
}
private void startThread(int i) {
if (threadPool[i] == null) {
threadPool[i] = new Thread(this);
threadPool[i].start();
}
}
private String getResource(String someUrl) {
String body = "";
try {
URI url = new URI(someUrl);
HttpRequest request = HttpRequest.newBuilder()
.uri(url).GET().build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandler.asString());
body = response.body();
} catch (URISyntaxException e) {
System.out.println("URL " + someUrl + "is not valid");
} catch (IOException | InterruptedException e) {
System.out.println(e.getMessage());
}
return body;
}
@Override
public void run() {
Thread thisThread = Thread.currentThread();
int sitesIndex = 0;
for (int j = 0; j < threadPool.length; j++) {
if (thisThread == threadPool[j]) {
sitesIndex = j;
}
}
long fetchStartTime = System.currentTimeMillis();
String html = getResource(rawSites[sitesIndex]);
float elapsed = (float) (System.currentTimeMillis() - fetchStartTime) / 1000;
sitesDone++;
Document doc = Jsoup.parse(html);
sitesArr[sitesIndex] = new Site();
sitesArr[sitesIndex].url = rawSites[sitesIndex];
sitesArr[sitesIndex].title = doc.title();
sitesArr[sitesIndex].fetchTime = elapsed;
}
public static void main(String[] args) {
String[] topIranianSites = {
"https://www.aparat.com/",
"http://www.varzesh3.com/",
"http://namnak.com/",
"http://www.telewebion.com/",
"https://divar.ir/",
"https://www.ninisite.com/",
"https://www.blogfa.com/",
"http://www.namasha.com/",
"http://www.yjc.ir/"
};
new ReqThreaded(topIranianSites);
}
}
这是多线程代码的输出。每个网址的总时间和获取时间似乎都不正确。我觉得有些东西在这里阻塞或某种竞争条件。这有什么不对?
WARNING: Using incubator modules: jdk.incubator.httpclient
General SSLEngine problem
Received fatal alert: handshake_failure
Elapsed time: 7.068Sites Done: 9
Statistics:
URL https://www.aparat.com/
Title: آپارات - سرویس اشتراک ویدیو
Fetch Time: 4.808
URL http://www.varzesh3.com/
Title:
Fetch Time: 5.904
URL http://namnak.com/
Title: نمناک
Fetch Time: 1.056
URL http://www.telewebion.com/
Title: تلوبیون | مرجع پخش زنده و دانلود فیلم ، سریال و سایر برنامه های تلویزیون
Fetch Time: 6.569
URL https://divar.ir/
Title:
Fetch Time: 0.53
URL https://www.ninisite.com/
Title: نی نی سایت | راهنمای بارداری و بچه داری
Fetch Time: 4.287
URL https://www.blogfa.com/
Title:
Fetch Time: 0.767
URL http://www.namasha.com/
Title: نماشا - سرویس رایگان اشتراک ویدیو
Fetch Time: 4.539
URL http://www.yjc.ir/
Title:
Fetch Time: 0.836
答案 0 :(得分:0)
我修复了类ReqThreaded
中的一些明显错误,例如不正确的同步,将方法run()
移到类Site
中,并使结果打印与单线程变体中的结果打印相同。未使用线程池,因为每个请求都创建了一个单独的线程。
结果如下:
Received fatal alert: handshake_failure
It took 0.263 seconds to fetch https://www.blogfa.com/ with title
General SSLEngine problem
It took 0.491 seconds to fetch https://divar.ir/ with title
It took 1.02 seconds to fetch http://www.yjc.ir/ with title
It took 1.056 seconds to fetch http://www.telewebion.com/ with title تلوبیون | مرجع پخش زنده و دانلود فیلم ، سریال و سایر برنامه های تلویزیون
It took 1.262 seconds to fetch https://www.ninisite.com/ with title نی نی سایت | راهنمای بارداری و بچه داری
It took 1.411 seconds to fetch http://namnak.com/ with title نمناک
It took 1.608 seconds to fetch http://www.varzesh3.com/ with title ورزش سه :: صفحه اصلی
It took 2.221 seconds to fetch http://www.namasha.com/ with title نماشا - سرویس رایگان اشتراک ویدیو
It took 2.247 seconds to fetch https://www.aparat.com/ with title آپارات - سرویس اشتراک ویدیو
Elapsed time: 2.253
Sites Done: 9
Process finished with exit code 0
也就是说,总时间仅略大于从网站获得结果的最长时间。
多线程工作!
修改后的代码如下:
import jdk.incubator.http.HttpClient;
import jdk.incubator.http.HttpRequest;
import jdk.incubator.http.HttpResponse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.concurrent.CountDownLatch;
public class ReqThreaded {
class Site implements Runnable {
final String url;
String title;
float fetchTime;
Site(String url) {
this.url = url;
}
@Override
public void run() {
long fetchStartTime = System.currentTimeMillis();
String html = getResource(url);
float elapsed = (float) (System.currentTimeMillis() - fetchStartTime) / 1000;
Document doc = Jsoup.parse(html);
title = doc.title();
fetchTime = elapsed;
System.out.println("It took " + fetchTime + " seconds to fetch " + url + " with title " + title);
sitesDone.countDown();
}
}
private HttpClient client = HttpClient.newHttpClient();
private CountDownLatch sitesDone;
public ReqThreaded(String[] sites) throws InterruptedException {
int siteNumber = sites.length;
sitesDone = new CountDownLatch(siteNumber);
long startTime = System.currentTimeMillis();
for (int i = 0; i < siteNumber; i++) {
Runnable site = new Site(sites[i]);
Thread thread = new Thread(site);
thread.start();
}
sitesDone.await();
float totalElapsed = (float) (System.currentTimeMillis() - startTime) / 1000;
System.out.print("\rElapsed time: " + totalElapsed + "\nSites Done: " + siteNumber);
}
private String getResource(String someUrl) {
String body = "";
try {
URI url = new URI(someUrl);
HttpRequest request = HttpRequest.newBuilder()
.uri(url).GET().build();
HttpResponse<String> response = client.send(request, HttpResponse.BodyHandler.asString());
body = response.body();
} catch (URISyntaxException e) {
System.out.println("URL " + someUrl + "is not valid");
} catch (IOException | InterruptedException e) {
System.out.println(e.getMessage());
}
return body;
}
public static void main(String[] args) throws InterruptedException {
String[] topIranianSites = {
"https://www.aparat.com/",
"http://www.varzesh3.com/",
"http://namnak.com/",
"http://www.telewebion.com/",
"https://divar.ir/",
"https://www.ninisite.com/",
"https://www.blogfa.com/",
"http://www.namasha.com/",
"http://www.yjc.ir/"
};
new ReqThreaded(topIranianSites);
}
}
那是:
永远不会同时在不同的线程上运行相同对象的相同方法
永远不会通过普通变量在线程之间交换信息。仅使用专门的设施。在这里,我使用CountDownLatch
来表示每个线程的结束。如果我想将一些信息返回给主线程,我会改用BlockingQueue
。