我编写了一个小的Node脚本来从Oracle数据库表中读取记录,调用进行转换的外部Web服务,然后将结果插入到另一个表中。因为我需要对超过7000万条记录重复相同的过程,所以我一次选择了1万行,并使用Oracle节点中的executeMany进行插入。
代码如下:
let insertQuery = `INSERT INTO ${start_table}_TOKEN_MASK (${id_field1}, ${id_field2}, TOKEN, CARD_NUMBER_MASK, CARD_NUMBER) VALUES (:id1, :id2, :token, :mask, :cardNumber)`;
function process_chunk() {
counter++;
// SELECT CARDS
connection.execute(`SELECT
${start_table}.${id_field1},
${start_table}.${id_field2},
${start_table}.${card_field}
FROM ${start_table}
LEFT JOIN ${start_table}_TOKEN_MASK ON ${start_table}.${id_field1} = ${start_table}_TOKEN_MASK.${id_field1}
WHERE TOKEN IS NULL AND ROWNUM <= ${max_per_run}`).then(async (res) => {
if (res.rows.length <= 0) {
console.log(`All done.`);
console.timeEnd('mapping');
connection.close();
process.exit(0);
}
console.log(`Fetched ${res.rows.length} rows.`);
var binds = [];
try{
console.log(`Starting tokenization...`);
await Promise.map(res.rows, async (row) => {
const mask = `${row[2].toString().substring(0,4)}.${row[2].toString().substring(4,6)}${'X'.repeat(2)}.${'X'.repeat(4)}.${row[2].toString().substring(row[2].toString().length - 4)}`;
const token = await Tokenize(row[2]);
binds.push([row[0], row[1], token, mask, row[2]]);
}, {concurrency: 150}).then(async () => {
// Send chunk to DB
console.log(`Sending to DB...`);
// Prevent failure on insertmany, dividing inserts into multiple transactions
var chunks = splitArray(binds, 5000);
await Promise.map(chunks, async (chunk) => {
const insertResult = await connection.executeMany(insertQuery, chunk, { autoCommit: true });
console.log(`Inserted ${insertResult.rowsAffected} rows. Commit.`);
}, {concurrency: 1}).then(() => {
console.log(`Chunk ${counter} tokenized.`);
process_chunk();
});
});
}
catch(err){
console.error(err);
process.exit(1);
}
}).catch(err => {
console.error(err);
});
}
在过程结束时,我看到新表(在其中插入的位置)具有记录,但是每个元组是混合的,例如:
ID_FIELD1, ID_FIELD2, TOKEN, CARD_NUMBER_MASK, CARD_NUMBER
1 2 1234 456789 458755
2 3 1235 456790 458789
这表明绑定是通过插入或Oracle驱动程序以某种方式“混合”的。
有什么想法吗?