因此,我一直在研究Web抓取,这主要是在工作,但是当我尝试输出数组元素时,它们都是未定义的(请参见底部的输出)。我觉得这可能与计时问题有关,我说这是因为价格从来不会像请求一样花费相同的时间来回答。如果是问题所在,如何使它们同步?谢谢!
func startUpdating() {
timer = DispatchSource.makeTimerSource(flags: [], queue: DispatchQueue.main)
timer?.schedule(deadline: DispatchTime.now(),
repeating: DispatchTimeInterval.milliseconds(16),
leeway: .milliseconds(5))
timer?.setEventHandler(qos: .userInitiated, flags: [], handler: self.update)
timer?.resume()
}
func update() {
coordinateX.title = String(planeNode.position.x)
coordinateY.title = String(planeNode.position.y)
}
输出
const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('post.csv');
var numbers = ["2202917",
"2205112",
"3514318",
"3514561",
"3585503",
"3585704",
"3610075",
"5132753",
"5247359",
"5247360"];
var y =0;
var partNumber1 ="";
var price1 ="";
writeStream.write('PartNumber,Price \n');
for (y=0; y < numbers.length; y++){
request(url + numbers[y], function(error, response, html) {
if (!error && response.statusCode == 200) {
const $ = cheerio.load(html);
price1 =$('.price').text().replace(/\s\s+/g,'');
partNumber1 =numbers[y];
console.log(partNumber1,' ',price1);
writeStream.write(`${partNumber1}, ${price1} \n`);
}
});
}
答案 0 :(得分:0)
您正在执行request()调用,这是一个异步调用。因此,这将是不正常的。 这是一个更简单的示例,我将用它来解释替代方案。
使用不同的时间间隔来模拟请求调用。
var numbers = ["2202917",
"2205112",
"3514318",
"3514561",
"3585503",
"3585704",
"3610075",
"5132753",
"5247359",
"5247360"];
/*
for (var y=0; y < numbers.length; y++) {
setTimeout(function(error, response, html) {
// the problem here is that it is referring a closure variable. The value of y keeps changing outside, and is not the same as the one you called it with.
// In this particular case, the value of y = numbers.length, and hence the undefined output
console.log(numbers[y]);
}, y % 100);
}
*/
// Alternative 1
// A better way would be like this
for (var y=0; y < numbers.length; y++) {
// doRequest(numbers[y]);
}
function doRequest(partNumber) {
setTimeout(function(error, response, html) {
console.log(partNumber);
}, partNumber % 100);
}
// end: Alternative 1
// Alternative 2
// in case you want it to be in sync
async function doSomething() {
for (var y = 0; y < numbers.length; y++) {
await doPromiseRequest(numbers[y]);
}
}
function doPromiseRequest(partNumber) {
return new Promise((resolve, reject) => {
setTimeout(function(error, response, html) {
console.log(partNumber);
resolve();
}, partNumber % 100);
});
}
doSomething();
// end Alternative 2
/*
another alternative is not calling a separate function, but executing an anonymous function in the loop itself. It is not clean at all and not recommended.
for (var y = 0; y < numbers.length; y++) {
(function (y) {
setTimeout(function(error, response, html) {
// the problem here is that it is referring a closure variable. The value of y keeps changing outside, and is not the same as the one you called it with
console.log(numbers[y]);
}, y % 100);
})(y);
}
*/