我正在努力从页面收集错误并将其存储在数据库中,然后使用图形API以可视方式显示信息。
有8个站点,每个站点有100个条目 - 每次800个交易。 我循环遍历每个站点,然后子循环遍历错误表并收集它们。
如果我对所有800个条目的每个子循环进行插入查询,但是我从这么多事务中得到某种内存泄漏并且在几分钟之后 - 由于内存超出而导致节点中断,我得到了它。
所以我尝试将所有800个条目排队到数组,然后在每次迭代结束时执行多插入,但我得到的是ER_PARSE_ERROR。
var tabletojson = require('tabletojson');
var mysql = require("mysql");
var striptag = require("striptags");
var fs = require("fs");
var path = require('path');
var startCollector;
var iterations = 0;
var insertions = 0;
var duplicated = 0;
var datas = [];
var clients = ["ClientA", "ClientB", "ClientC", "ClientD", "ClientE", "ClientF", "ClientG", "ClientH"];
var appDir = path.dirname(require.main.filename);
var errorList = ["err1", "err2", "err3", "err4", "err5", "err6"];
var con = mysql.createPool({
host: "localhost",
user: "User",
password: "Password",
database: "errors"
});
function CollectErrors() {
startCollector = new Date();
for(var a = 0; a < clients.length; a++) {
(function(a) {
tabletojson.convertUrl("http://example.com" + clients[a] + "/page.php?limit=100", { stripHtmlFromCells: false }, function(response) {
var rs = response[0];
for(var l = rs.length-1; l > -1; l--) {
var newDate = formatDate(striptag(rs[l]["Date"]), striptag(rs[l]["Time"]));
var user = getUser(striptag(rs[l]["User"]));
var msg = striptag(rs[l]["Error"]);
var splitError = rs[l]["Error"].split("<a href=\"");
var link = getUrl(splitError[1]);
var id = getId(link);
var type = getType(striptag(splitError[0]));
var temp = [newDate, link, type, user, clients[a], id, msg];
datas.push(temp);
}
});
})(a);
}
con.getConnection(function(err, connection) {
connection.query("INSERT IGNORE INTO entries (time, url, type, author, client, uid, message) VALUES ?", [datas], function(err, rows) {
console.log(err);
});
connection.release();
datas = [];
});
setTimeout(CollectErrors, 10000);
}
function formatDate(date, time) {
var newdate = date.split("/").reverse().join("-");
var newtime = time+":00";
return newdate + " " + newtime;
}
function getUrl(uri) {
return "http://example.com/"+uri.split("\">Details")[0];
}
function getId(url) {
return decodeURIComponent((new RegExp('[?|&]' + "id" + '=' + '([^&;]+?)(&|#|;|$)').exec(url) || [null, ''])[1].replace(/\+/g, '%20')) || null;
}
function getType(error) {
for(var a = 0; a < errorList.length; a++) {
if(error.indexOf(errorList[a]) !== -1) {
return errorList[a];
}
}
return "Other";
}
function getUser(user) {
if(user == "" || user == " " || user == null) {
return "System";
}
return user;
}
CollectErrors();
我也尝试了mysql.createConnection,但这也给了我同样的问题。
我已经被困了12个小时而且我看不出有什么问题,我甚至尝试用字符串填充Datas表但是出现了同样的错误。
答案 0 :(得分:2)
我已将您的代码更改为使用ES6并更正了模块功能。
有用的链接:correct pooling with mysql,更正insert query,async/await,IIFE,enhanced object
const tabletojson = require('tabletojson'),
mysql = require("mysql"),
striptag = require("striptags"),
fs = require("fs"),
path = require('path');
const startCollector,
iterations = 0,
insertions = 0,
duplicated = 0;
let datas = [];
const clients = ["ClientA", "ClientB", "ClientC", "ClientD", "ClientE", "ClientF", "ClientG", "ClientH"];
const appDir = path.dirname(require.main.filename);
const errorList = ["err1", "err2", "err3", "err4", "err5", "err6"];
const con = mysql.createPool({
host: "localhost",
user: "User",
password: "Password",
database: "errors"
});
// We'll use async/await from ES6
const collectErrors = async() => {
// Up to here I've only changed syntax to ES6
let startCollector = new Date();
// We'll try to iterate through each client. And we use here for..of syntax to allow us using await
for (let client of clients) {
// Please, check that client value return correct data. If not, change for..of to your for..each and client variable to clients[a]
const tbj = await tabletojson.convertUrl("http://example.com" + client + "/page.php?limit=100", {
stripHtmlFromCells: false
});
const result = tgj[0];
for (rs of result) {
// I can't check this part, but I hope your example was with correct values.
let newDate = formatDate(striptag(rs[l]["Date"]), striptag(rs[l]["Time"]));
let user = getUser(striptag(rs[l]["User"]));
let link = getUrl(splitError[1]);
let msg = striptag(rs[l]["Error"]);
let id = getId(link);
let splitError = rs[l]["Error"].split("<a href=\"");
let getType = getType(striptag(splitError[0]));
// ES6 enhanced object syntax
datas.push({
newDate,
user,
msg,
id,
splitError,
link,
getType,
temp: [newDate, link, type, user, client, id, msg]
});
}
}
// OK, here we have fulfilled datas array. And we want to save it.
con.getConnection((err, connection) => {
// Please, notice, here I've changed your insert query to prepared statement.
connection.query("INSERT IGNORE INTO entries SET ?", datas, (err, rows) => {
console.log(err);
connection.release();
datas = [];
});
});
// I don't see why do you need timeout here, so I've left it commented.
// setTimeout(CollectErrors, 10000);
};
// Here your other methods go....
// And to call your async function we'll use IIFE
(async() => {
await collectErrors();
})();
&#13;
可能有mysql插入错误,但这不是肯定的。如果发生,请写下评论,我会帮助您。