有两个文件都按ID排序,parent_data.json和child_data.json。孩子是父母的一部分。
我正在尝试将parent.id与child.id匹配。我正在尝试对每个.json文件进行一次通过。而不是像嵌套的forEach / map这样循环遍历每个孩子的事情。
假设两个.json文件都包含少量演出/数百万行,重新读取children.json将花费很长时间,需要多次传递,并且将所有内容加载到内存中也是不可能的。
每个文件可以是JSON格式,也可以是NDJSON。当前示例是NDJSON。
parent_data.json:
{"id":1, "name": "parent_1"}
{"id":2, "name": "parent_2"}
{"id":3, "name": "parent_3"}
{"id":4, "name": "parent_4"}
{"id":5, "name": "parent_5"}
{"id":6, "name": "parent_6"}
{"id":7, "name": "parent_7"}
child_data.json:
{"id":4, "name":"belongs_to_parent_4", "guid": "${unique_id}"}
{"id":4, "name":"belongs_to_parent_4", "guid": "${unique_id}"}
{"id":5, "name":"belongs_to_parent_5", "guid": "${unique_id}"}
{"id":7, "name":"belongs_to_parent_7", "guid": "${unique_id}"}
{"id":7, "name":"belongs_to_parent_7", "guid": "${unique_id}"}
预期结果:
[
{
"id": 4,
"name": "parent_4",
"children": [
{
"id": 4,
"name": "belongs_to_parent_4",
"guid": "${unique_id}"
},
{
"id": 4,
"name": "belongs_to_parent_4",
"guid": "${unique_id}"
}
]
},
{
"id": 5,
"name": "parent_5",
"children": [
{
"id": 5,
"name": "belongs_to_parent_5",
"guid": "${unique_id}"
}
]
},
{
"id": 7,
"name": "parent_7",
"children": [
{
"id": 7,
"name": "belongs_to_parent_7",
"guid": "${unique_id}"
}
]
}
]
我找不到一种合适的方法来遍历任何一条流,而不会在最终结果中丢失一些孩子。
此代码错过了每个父母的两个第一个孩子:
const fs = require('fs');
const es = require('event-stream');
const main = () => {
let currentParent = null;
let currentChild = null;
let prevChild = null;
let parentLines = 0;
let childLines = 0;
let tmpChildren = [];
const parentStream = fs
.createReadStream('parent_data.json') // Parent Stream
.pipe(es.split('\n')) // Delimit by \n
.pipe(
es.mapSync(line => { // Current line, without the delimiter \n
parentStream.pause(); // Pause stream until done processing this line
++parentLines; // Debug
currentParent = JSON.parse(line); // Now it's valid JSON object
console.log('currentParent', currentParent.id); // Debug
}),
)
.on('error', e => {
console.error(e);
})
.on('close', () => {
console.log('close reading parent stream');
})
.on('end', () => {
console.log('end reading parent stream');
});
const childStream = fs
.createReadStream('child_data.json') // Child Stream
.pipe(es.split('\n')) // Split by delimiter
.pipe(
es.mapSync(line => { // Current child line, without delimiter
++childLines; // Debug
childStream.pause(); // Pause child stream
currentChild = JSON.parse(line); // Valid JSON child now
if (prevChild && (currentParent.id === prevChild.id)) { // Check prevChild and currentParent
tmpChildren.push(prevChild);
prevChild = null;
}
if (currentChild && (currentParent.id == currentChild.id)) { // Check currentChild and currentParent
console.log('child', currentChild.id);
tmpChildren.push(currentChild);
// childStream.resume(); // Having this here will cause the stream to stop processing entirely
} else {
// We're here because currentParent does not match currentChild, move to next parent
prevChild = currentChild;
if (tmpChildren.length > 0) {
currentParent['children'] = tmpChildren;
tmpChildren = [];
}
parentStream.resume();
}
console.log('currentChild', currentChild.id);
childStream.resume(); // Having this here will cause the stream to skip children on new parent
}),
)
.on('error', e => {
console.error(e);
})
.on('close', () => {
console.log('close reading child stream');
})
.on('end', () => {
console.log('end reading child stream');
});
};
main();
TLDR:在child.id上匹配parent.id,如果不匹配,则保存child。循环到下一个父对象,检查parent.id上的prevChild.id和/或child.id。如果匹配,则将prevChild / child附加到parent [children],然后迭代child。如果不匹配,则迭代到下一个父对象...等