我有一个像这样的大型JSON文件:
[
{
"id": 2000,
"city": "New York",
"lat": "",
"lon": "",
},
...
]
我在每个对象中搜索lat和lon。我正在使用JSONStream模块来管道数据:
var writeStream = fs.createWriteStream('data/stream.json');
var readStream = fs.createReadStream('data/dummy/institucion_1_2000.json', {encoding: 'utf8'})
// Search for lat and lon
var search = es.mapSync(function(data){
if(data.lat == ""){
// Search for the lat and lon
geocoder.geocode({address: data.city, country: "US"})
.then(function(res) {
console.log("Searched for " + res[0].formattedAddress);
data.lat = res[0].latitude;
data.lon = res[0].longitude;
})
.catch(function(err) {
console.log("There was an error with element with id = " + data.id);
console.log("Here is the error: " + err);
if(err == 'Error: Status is OVER_QUERY_LIMIT. You have exceeded your rate-limit for this API.') {
process.exit();
}
});
return data;
}
})
// Pipe
readStream
.pipe(JSONStream.parse('*'))
.pipe(search)
.pipe(JSONStream.stringify()) // This doesent wait until the search is finish
.pipe(writeStream)
地理编码部分有效。
我的问题是JSONStream.stringify在搜索功能结束之前读取并管理数据。所以我得到了相同的JSON文件,没有我需要的修改。如果我试试这个:
if(data.lat == ""){
lat = 1;
}
而不是地理编码,这需要更多的时间,它的工作原理。我猜我的问题在于修改流数据所花费的时间。那么,有一种方法可以在数据被修改后对其进行管道传输吗?
修改 同步和异步之间我搞得一团糟。感谢djones
var search = es.map(function (data, callback) {
if(data.lat == ""){
geocoder.geocode({address: data.city, country: "USA"}) // Choose between Comuna_Empresa and Comuna_Institucion
.then(function(res) {
console.log("Searched for " + res[0].formattedAddress);
data.lat = res[0].latitude;
data.lon = res[0].longitude;
callback(null,data);
})
.catch(function(err) {
console.log("There was an error with element at index = " + index);
console.log("Here is the error: " + err);
});
}
})