我正试图更快地调用多个API。
在下面的代码中,// Import required modules
const axios = require('axios');
const cheerio = require('cheerio');
const fs = require('fs');
const url = "http://johndevisser.marktplaza.nl/?p=";
async function getProductsHtml(data) {
const $ = await cheerio.load(data);
let productsHTML = [];
$("div.item").each((i, prod) => {
productsHTML.push(($(prod).html()));
});
return productsHTML;
};
async function parseProducts(html) {
let products = [];
for (item in html) {
// Store the data we already have
const $ = await cheerio.load(html[item]);
let product = {};
let mpUrl = $("a").attr("href");
product["title"] = $("a").attr("title");
product["mpUrl"] = mpUrl;
product["imgUrl"] = $("img").attr("src");
let priceText = $("span.subtext").text().split("\xa0")[1].replace(",", ".");
product["price"] = parseFloat(priceText);
products.push(product);
}
return products;
}
async function addDescriptionToProducts(prods) {
for (i in prods) {
const response = await axios.get(prods[i]["mpUrl"])
const $ = cheerio.load(response.data);
description = $("div.description p").text();
prods[i]["descr"] = description;
}
return prods
}
async function getProductsFromPage(i) {
try {
const page = await axios.get(`http://johndevisser.marktplaza.nl/?p=${i}`);
console.log("GET request succeeded!");
// Get the Array with HTML of each product
const productsHTML = await getProductsHtml(page.data);
console.log("HTML array obtained!");
// Get the Array of objects with meta info
const productsParsed = await parseProducts(productsHTML);
console.log("Products parsed!")
// Add description to each product
const productsMeta = await addDescriptionToProducts(productsParsed);
console.log("Descriptions added!")
// Return the Array with all product information
return productsMeta;
} catch(e) {
console.log(e);
}
};
async function saveAllProducts() {
try {
const allProducts = await getAllProducts();
let jsonProducts = await JSON.stringify(allProducts);
fs.writeFile("products.json", jsonProducts, "utf8", (e) => {
if (e) {
console.log(e)
}
});
} catch(e) {
console.log(e);
}
}
async function getAllProducts() {
try {
let allProducts = [];
for (let i = 1; i < 855; i++) {
const productsFromPage = await getProductsFromPage(i);
allProducts = [...allProducts, ...productsFromPage];
console.log("Saved products from page " + i);
}
return allProducts
} catch(e) {
console.log(e);
}
}
saveAllProducts();
是当前同步的版本。我有一种感觉,getFilteredEvents
操作将等待每个API的响应(内部使用map(x -> x.getFilteredEvents(eventResearch))
,然后传递给下一个API,以构建要返回的RestTemplate.exchange()
。一种解决方案可能是在单独的线程上启动List<Event>
调用,但是我想尝试map
API。
因此CompletableFuture
是我努力改善响应时间的结果。
getFilteredEventsFaster
我的理解是,我希望将@Service
public class EventsResearchService {
@Autowired
private List<UniformEventsResearchApi> eventsResearchApis;
// this works, but I'm trying to improve it
public EventResearchResponse getFilteredEvents(EventResearch eventResearch) {
List<Event> eventsList = eventsResearchApis
.stream()
.map(x -> x.getFilteredEvents(eventResearch))
.flatMap(List::stream)
.collect(Collectors.toList());
return extractResponse(eventResearch, eventsList);
}
// this doesn't work yet: what is wrong?
public CompletableFuture<List<Event>> getFilteredEventsFaster(EventResearch eventResearch) {
List<CompletableFuture<List<Event>>> futureEventsList = eventsResearchApis
.parallelStream()
.map(x -> CompletableFuture.supplyAsync(() -> x.getFilteredEvents(eventResearch)))
.collect(Collectors.toList());
return CompletableFuture.allOf(futureEventsList.toArray(new CompletableFuture<List<Event>>[0]));
}
}
而不是CompletableFuture<List<Event>>
发送回我的前端,因此,List<CompletableFuture<List<Event>>>
调用(如果我理解正确,类似于CompletableFuture.allOf()
操作,从多个flatmap
创建一个CompletableFuture
。
不幸的是,使用CompleteableFuture
时出现Generic array creation
编译错误。
我在做什么错了?
我觉得使用new CompletableFuture<List<Event>>[0]
方法确实可以让我收集所有答案,但是那将是对Service线程的阻塞操作,不是吗? (如果我理解正确的话,这样做会挫败试图将join
返回到我的前端的目的。)
答案 0 :(得分:1)
以下代码段显示了如何使用listOfFutures.stream().map(CompletableFuture::join)
来收集allOF
的结果。我从this page中选取了这个示例,该示例指出它不会等待每个Future完成。
class Test {
public static void main(String[] args) throws Exception {
long millisBefore = System.currentTimeMillis();
List<String> strings = Arrays.asList("1","2", "3", "4", "5", "6", "7", "8");
List<CompletableFuture<String>> listOfFutures = strings.stream().map(Test::downloadWebPage).collect(toList());
CompletableFuture<List<String>> futureOfList = CompletableFuture
.allOf(listOfFutures.toArray(new CompletableFuture[0]))
.thenApply(v -> listOfFutures.stream().map(CompletableFuture::join).collect(toList()));
System.out.println(futureOfList.get()); // blocks here
System.out.printf("time taken : %.4fs\n", (System.currentTimeMillis() - millisBefore)/1000d);
}
private static CompletableFuture<String> downloadWebPage(String webPageLink) {
return CompletableFuture.supplyAsync( () ->{
try { TimeUnit.SECONDS.sleep(4); }
catch (Exception io){ throw new RuntimeException(io); }
finally { return "downloaded : "+ webPageLink; }
});
}
}
由于效率似乎是这里的问题,因此我添加了一个虚拟基准测试,以证明它不需要32秒即可执行。
输出:
[downloaded : 1, downloaded : 2, downloaded : 3, downloaded : 4, downloaded : 5, downloaded : 6, downloaded : 7, downloaded : 8]
time taken : 8.0630s
借助此答案,并通过使用this website(有关与allOf
相关的异常处理的讨论),我想到了这个完整的版本:
public CompletableFuture<List<Event>> getFilteredEventsFaster(EventResearch eventResearch) {
/* Collecting the list of all the async requests that build a List<Event>. */
List<CompletableFuture<List<Event>>> completableFutures = eventsResearchApis.stream()
.map(api -> getFilteredEventsAsync(api, eventResearch))
.collect(Collectors.toList());
/* Creating a single Future that contains all the Futures we just created ("flatmap"). */
CompletableFuture<Void> allFutures =CompletableFuture.allOf(completableFutures
.toArray(new CompletableFuture[eventsResearchApis.size()]));
/* When all the Futures have completed, we join them to create merged List<Event>. */
CompletableFuture<List<Event>> allCompletableFutures = allFutures
.thenApply(future -> completableFutures.stream()
.map(CompletableFuture::join)
.flatMap(List::stream) // creating a List<Event> from List<List<Event>>
.collect(Collectors.toList())
);
return allCompletableFutures;
}
private CompletableFuture<List<Event>> getFilteredEventsAsync(UniformEventsResearchApi api,
EventResearch eventResearch) {
/* Manage the Exceptions here to ensure the wrapping Future returns the other calls. */
return CompletableFuture.supplyAsync(() -> api.getFilteredEvents(eventResearch))
.exceptionally(ex -> {
LOGGER.error("Extraction of events from API went wrong: ", ex);
return Collections.emptyList(); // gets managed in the wrapping Future
});
}