我每秒都从Kafka接收数据行。对于每一批数据,我都将插入数据库中。
我的应用程序不断读取每个批次的最后message
和id
。这里的问题是,承诺不是按顺序运行,而是在一批完成后并发运行,并且它们继续读取相同的message
和id
。我希望每个诺言都有自己的message
和id
,这由它们从第一个函数的for循环中传入的顺序定义。
我认为我需要使用闭包,但是我不确定如何在这里应用它们。 我不想使用计时器!
谢谢!
// This is live data, coming in concurrently, forever. Promises from previous batch must be resolved before the next batch is received.
batchOfRows.on('message', function (data) {
for (var i = 0; i < batchOfRows.rows.length; i++) {
validate(batchOfRows.rows[i])
.then(result => console.log(result))
.catch(error => console.log(error));
}
});
// For each row received, give it an ID and then insert into the DB
function validate(data) {
return new Promise((resolve, reject) => {
message = data;
id = message.date + message.location
DB.execute('select * from table1 where id = ?', id) // This is a promise function provided by the database driver (Cassandra)
.then(result => {
// Insert into the table at this ID
insertIntoDB(message, id)
.then(result => resolve(result))
.catch(error => reject(error));
})
.catch(error => {
reject(error);
});
});
}
// Inserting into DB
function insertIntoDB(message, id) {
return new Promise((resolve, reject) => {
query = "insert into table2 where id = ? and messageBody = ?";
DB.execute(query, [id, JSON.Stringify(message)])
.then(result => resolve("Successfully inserted message ID " + id))
.catch(error => reject("Error inserting!"));
});
}
编辑(danh的解决方案):
var kafka = require('kafka-node');
client = new kafka.Client("localhost:2181"), Consumer = kafka.Consumer;
// This is like an event listener.
batchOfRows = new Consumer(
client, [{
topic: 'my_topic',
partition: 0,
offset: 0
}], {
fromOffset: false
}
);
let results = [];
let promises = Promise.resolve();
function processQueue() {
queue.forEach(element => {
promises = promises.then(element.map(processElement)).then(elementResult => {
// results.push(elementResult); // Don't want result to increase in size! I have put this inside insertDB then I clear it below
console.log(results.length); // First received batch prints: 0. Second received batch prints 72. Third received batch prints 75
results = [];
queue.shift();
});
});
}
batchOfRows.on('message', function (data) {
console.log(batchOfRows.value.length); // First received batch prints: 72. Second received batch prints 75. Third received batch prints 76
queue.push(batchOfRows.rows);
processQueue();
});
function processElement(data) {
const id = data.date + data.location
return DB.execute('select * from table1 where id = ?', id)
.then(result => insertIntoDB(data, id).then(() => result));
}
function insertIntoDB(message, id) {
const query = "insert into table2 where id = ? and messageBody = ?";
return DB.execute(query, [id, JSON.Stringify(message)])
.then(result => {
// Pushing the result here
results.push(result); // Seems like it does not push the results from the first batch from batchOfRows until it receives the second batch
console.log("Test") // On the first batch prints "Test" 72 times right away
});
}
编辑 我通过添加element.map(processUpdate)稍微修改了processQueue函数,因为从batchOfRows接收的批处理实际上是数组,并且我需要对该数组中的每个项目执行该数据库查询。
我也删除了result.push(elementResult),因为由于某种原因elementResult实际上是未定义的。我已经将results.push(elementResult)移到insertIntoDB中,并将其命名为results.push(result)。这可能是错误的根源(我不知道如何将insertIntoDB的结果返回给调用诺言函数processQueue)。
如果您看一下insertIntoDB,如果我console.log(“ test”),它将打印测试次数与batchOfRows数组中的元素相同,表明它已经解决了该批次中的所有诺言。因此,在第一个批次/消息上,如果有72行,它将打印72次“测试”。但是,如果我将console.log(“ Test”)更改为results.push(result),甚至results.push(“ test”),然后打印results.length,它仍然会给我0,直到第二批完成即使我希望长度为72。
答案 0 :(得分:3)
稍微抽象一下这些想法,并在数据中明确表示它们(而不是在诺言中隐式保留的数据)可能会有所帮助。从队列开始:
let queue = [];
使用queue.push(element)
将内容添加到队列中,使用element = queue.shift()
按到达顺序获取和删除
我们的目标是按顺序处理队列中的所有内容,按顺序保存结果。处理本身是异步的,我们想在开始下一个队列项目之前先完成它,因此我们需要一连串的诺言(称为promises
)来处理队列:
let results = [];
let promises = Promise.resolve();
function processQueue() {
queue.forEach(element => {
promises = promises.then(processElement(element)).then(elementResult => {
results.push(elementResult);
queue.shift();
});
});
}
我们可以说服自己这是对的,甚至不考虑processElement()
的作用,只要它返回了承诺即可。 (在OP情况下,该诺言就是处理一系列“行”的诺言)。 processElement()
就可以了,结果(OP情况下的结果数组)将被推送到results
。
确信操作的顺序是有意义的,当新批次到达时,将其添加到队列中,然后处理队列中的所有内容:
batchOfRows.on('message', function (data) {
queue.push(batchOfRows.rows);
processQueue();
});
我们只需要定义processElement()
。为此,请使用@YuryTarabanko的有用建议(并将其答案标记为IMO)
function processElement(data) {
const id = data.date + data.location
return DB.execute('select * from table1 where id = ?', id)
.then(result => insertIntoDB(data, id).then(() => result));
}
function insertIntoDB(message, id) {
const query = "insert into table2 where id = ? and messageBody = ?";
return DB.execute(query, [id, JSON.Stringify(message)])
}
一个不错的副作用是您可以衡量进度。如果输入的到达速度太快,则表达式:
queue.length - results.length
...将随着时间的流逝而增长。
编辑在查看较新的代码时,我为为什么对每一行(batchOfRows.rows
中的每个元素)进行查询感到困惑。由于该查询的结果将被忽略,请不要这样做...
function processElement(data) {
const id = data.date + data.location
// we know everything we need to know to call insert (data and id)
// just call it and return what it returns :-)
return insertIntoDB(data, id);
}
我现在知道这将是一个长期运行的任务,它不应该累积结果(甚至线性地)。为此,更清洁的解决方案是删除我建议的对results
数组的所有引用。最小版本的insert只是插入并返回插入结果...
function insertIntoDB(message, id) {
const query = "insert into table2 where id = ? and messageBody = ?";
return DB.execute(query, [id, JSON.Stringify(message)]);
}
我认为您添加了一些代码来记录结果(一个更好的测试方法是通过某些外部过程检查数据库,但是如果您想记录日志,请记住要 pass-through 记录后的结果值。
anyPromise.then(result => {
console.log(result);
return result; // IMPORTANT
})
答案 1 :(得分:2)
您的代码中有各种反模式。首先,您不需要手动创建承诺,您可能不需要致电new Promise
。其次,通过不从onFulfill
处理程序中返回嵌套的诺言来破坏诺言链。最后,当您不声明变量id = message.date + message.location
// This is live data, coming in concurrently, forever. Promises from previous batch must be resolved before the next batch is received.
let pending = Promise.resolve([]); // previous batch starting w/ resolved promise
batchOfRows.on('message', function (data) {
// not sure where was batchRows comming from in your code
const nextBatch = () => Promise.all(
data.batchOfRows.rows.map(validate)
);
// reassign pending to a new promise
// whatever happend to previous promise we keep running
pending = pending
.then(nextBatch)
.catch(e => console.error(e))
});
// For each row received, give it an ID and then insert into the DB
function validate(data) {
const id = data.date + data.location
return DB.execute('select * from table1 where id = ?', id)
.then(result => insertIntoDB(data, id).then(() => result));
}
// Inserting into DB
function insertIntoDB(message, id) {
const query = "insert into table2 where id = ? and messageBody = ?";
return DB.execute(query, [id, JSON.Stringify(message)])
}