您好我正在尝试使用nodejs将旧数据库迁移到新数据库。
一些遗留数据库表有近百万行,因此这是一项高内存消耗的任务。
在我目前的脚本中,我的内存很快就用完了。我做了一些改变,我希望能解决这个问题,但是用过的空间仍然会随着迭代而增长。
以下代码基本上是查询旧表,映射一些字段并插入新数据库。我已经将变量声明从循环内部移到了外部,希望旧的变量因此会被覆盖,现在需要新的空间。此外,我在数组上使用.pop
,希望这会不断减少剩余行所需的空间。
然而,正如我已经说过,所需的空间随着迭代而不断增长。有谁知道为什么?
function migrate_user_table(callback) {
// Migrate user table
logger.log('info', "Starting migration of user table...");
let row = null;
let userid = null;
let fullname = null;
let active = null;
let imagepath = null;
let statusbase64 = null;
let gender = null;
let orientation = null;
let reports = null;
let reviewStatus = null;
let region = null;
let newReviewStatus = null;
let newgender = null;
let neworientation = null;
let newregion = null;
let banned = null;
let lastActive = null;
let numberOfRequests = null;
let requestsSend = null;
let moji = null;
let created = null;
let minAgeS = null;
let maxAgeS = null;
let minAgeC = null;
let maxAgeC = null;
let genderS = null;
let orientS = null;
let genderC = null;
let newgenderS = null;
let neworientS = null;
let newgenderC = null;
let user = null;
let user_has_social = null;
let user_has_data_username = null;
let user_has_data_status = null;
let user_has_data_report = null;
let user_has_data_sent = null;
let user_has_data_recv = null;
let user_has_moji = null;
let user_has_filter_searchage = null;
let user_has_filter_chatage = null;
let user_has_filter_searchgender = null;
let user_has_filter_chatgender = null;
let user_has_filter_searchorient = null;
legacy.query('SELECT * FROM user u LEFT JOIN behavior b ON (u.userid = b.userid) LEFT JOIN filter f ON (u.username = f.username)', (error, results) => {
if( error ) throw error;
while (results.length > 0 ) {
row = results.pop();
userid = row["userid"];
kikname = row["username"];
fullname = row["fullname"];
active = row["active"];
imagepath = row["img"];
statusbase64 = row["status"];
gender = parseInt(row["gender"]);
orientation = row["orientation"];
reports = row["reports"];
reviewStatus = parseInt(row["reviewStatus"]);
region = row["region"];
// map to new reviewstatus
newReviewStatus = 1;
switch (reviewStatus) {
case 0 :
newReviewStatus = 1;
break;
case 1 :
newReviewStatus = 3;
break;
case 2 :
newReviewStatus = 4;
break;
case -1 :
newReviewStatus = 2;
break;
}
// map to new gender, orientation and region
newgender = gender +1;
neworientation = orientation +1;
newregion = 7;
if( region >= 0 ) {
newregion = region +1;
}
banned = row["banned"];
lastActive = row["pendingSince"];
numberOfRequests = row["numberOfRequests"];
requestsSend = row["requestsSend"];
moji = row["moji_idmoji"];
created = row["created"];
minAgeS = row["minAgeS"];
maxAgeS = row["maxAgeS"];
minAgeC = row["minAgeC"];
maxAgeC = row["maxAgeC"];
genderS = row["genderS"];
orientS = row["orientS"];
genderC = row["genderC"];
newgenderS = genderS + 1;
if( newgenderS === 0 ) {
newgenderS = null;
}
neworientS = orientS + 1;
if( neworientS === 0 ) {
neworientS = null;
}
newgenderC = genderC + 1;
if( newgenderC === 0 ) {
newgenderC = null;
}
user = {iduser : userid, imageurl : imagepath, birthdate : null, active : active, banned : banned, reviewstatus_idreviewstatus : newReviewStatus, last_active : lastActive,
created : created, gender_idgender : newgender, orientation_idorientation : neworientation, region_idregion : newregion};
connection.query('INSERT INTO user SET ?', user, (error) => {
if( error ) throw error;
logger.log('debug', "User Insert successfull");
});
user_has_social = {user_iduser : userid, socialtype_idsocialtype : 1, value : kikname};
connection.query('INSERT INTO user_has_social SET ?', user_has_social, (error) => {
if( error ) throw error;
logger.log('debug', "User_has_social Insert successfull");
});
user_has_data_username = {user_iduser : userid, datatype_iddatatype : 5, value : fullname};
user_has_data_status = {user_iduser : userid, datatype_iddatatype : 1, value : statusbase64};
user_has_data_report = {user_iduser : userid, datatype_iddatatype : 7, value : reports};
user_has_data_sent = {user_iduser : userid, datatype_iddatatype : 4, value : requestsSend};
user_has_data_recv = {user_iduser : userid, datatype_iddatatype : 3, value : numberOfRequests};
datainsert(connection, user_has_data_username);
datainsert(connection, user_has_data_status);
datainsert(connection, user_has_data_report);
datainsert(connection, user_has_data_sent);
datainsert(connection, user_has_data_recv);
user_has_moji = {user_iduser : userid, moji_idmoji : moji};
connection.query('INSERT INTO user_has_moji SET ?', user_has_moji, (error) => {
if( error ) throw error;
logger.log('debug', "User_has_moji" +
" Insert successfull");
});
user_has_filter_searchage = { user_iduser : userid, filtertype_idfiltertype : 1, value : minAgeS, add_value : maxAgeS};
user_has_filter_chatage = { user_iduser : userid, filtertype_idfiltertype : 2, value : minAgeC, add_value : maxAgeC};
user_has_filter_searchgender = { user_iduser : userid, filtertype_idfiltertype : 3, value : newgenderS, add_value : null};
user_has_filter_chatgender = { user_iduser : userid, filtertype_idfiltertype : 4, value : newgenderC, add_value : null};
user_has_filter_searchorient = { user_iduser : userid, filtertype_idfiltertype : 5, value : neworientS, add_value : null};
filterinsert(connection, user_has_filter_searchage);
filterinsert(connection, user_has_filter_chatage);
filterinsert(connection, user_has_filter_searchgender);
filterinsert(connection, user_has_filter_chatgender);
filterinsert(connection, user_has_filter_searchorient);
logger.log('debug', results.length + " row to go");
}
callback();
});
}
答案 0 :(得分:2)
您使用query(stmt, function(error, results) {...})
的方式将旧表中的整个结果集加载到RAM中。然后,您逐行循环遍历该结果集的内容(使用pop
获取行。)
考虑到SQL的要点是处理对于RAM来说太大的数据量,你就不用担心了。
你正在做一些浪费RAM的事情:SELECT *
。如果仅使用SELECT userid, username, ...
枚举所需的列,则行的行数会更短,因此更多的行将适合RAM。
但这不会解决你的问题,只是推迟它。
要修复它,你有两个选择。一种是以块的形式处理遗留表。
例如,您可以以块的形式检索数据。你得到这个查询的第一个块
SELECT whatever ORDER BY user_id LIMIT 1000 OFFSET 0
以及包含这些查询的下一个块
SELECT whatever ORDER BY user_id LIMIT 1000 OFFSET 1000
SELECT whatever ORDER BY user_id LIMIT 1000 OFFSET 2000
这会给你每个千行的块。继续,直到你没有检索到行。
第二种选择:逐个调查流式传输结果集的行。这需要稍微不同的设置才能使用query()
。它写在这里。 https://github.com/mysqljs/mysql#streaming-query-rows
基本上它是这样的:
var stream = legacy.query('SELECT whatever');
stream
.on('result', function(row) {
// Pausing the connnection is useful if your processing involves I/O
legacy.pause();
// handle your row of data here...
legacy.resume();
})
.on('end', function() {
// all rows have been received
});
这样您就可以一次处理一行数据。