我想进行网页抓取。我写了代码
var connection = require('./mysqlConnection');
var c = new Crawler({
maxConnections : 10,
callback : function (error, result, $) {
if(error){
console.log(error);
}else{
const data = $(".test");
for(var i = 0; i < data.length; i ++){
const item = $(data[i]).text();
var query = 'INSERT INTO crawling (item) VALUES ("' + item + '")';
connection.query(query, function(err, rows) {
console.log(err);
});
}
}
}
});
c.queue('https://xxxxxxxxxx.com');
我运行这段代码时,const item
可能会到达一半,但是
{ Error: ER_PARSE_ERROR: You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near 'June18th
")' at line 1
at Query.Sequence._packetToError (/Users/xxx/xxx/node_modules/mysql/lib/protocol/sequences/Sequence.js:52:14)
at Query.ErrorPacket (/Users/xxx/xxx/node_modules/mysql/lib/protocol/sequences/Query.js:77:18)
at Protocol._parsePacket (/Users/xxx/xxx/node_modules/mysql/lib/protocol/Protocol.js:279:23)
at Parser.write (/Users/xxx/xxx/node_modules/mysql/lib/protocol/Parser.js:76:12)
at Protocol.write (/Users/xxx/xxx/node_modules/mysql/lib/protocol/Protocol.js:39:16)
at Socket.<anonymous> (/Users/xxx/xxx/node_modules/mysql/lib/Connection.js:103:28)
at emitOne (events.js:116:13)
at Socket.emit (events.js:211:7)
at addChunk (_stream_readable.js:263:12)
at readableAddChunk (_stream_readable.js:250:11)
--------------------
at Protocol._enqueue (/Users/xxx/xxx/node_modules/mysql/lib/protocol/Protocol.js:145:48)
at Connection.query (/Users/xxx/xxx/node_modules/mysql/lib/Connection.js:208:25)
at Object.callback (/Users/xxx/xxx/app.js:135:24)
at Crawler._onInject (/Users/xxx/node_modules/node-webcrawler/lib/crawler.js:428:13)
at /Users/xxx/node_modules/node-webcrawler/lib/crawler.js:395:11
at Crawler._inject (/Users/xxx/node_modules/node-webcrawler/lib/crawler.js:154:9)
at Crawler._onContent (/Users/xxx/node_modules/node-webcrawler/lib/crawler.js:394:7)
at Request._callback (/Users/xxx/node_modules/node-webcrawler/lib/crawler.js:332:14)
at Request.self.callback (/Users/xxx/node_modules/node-webcrawler/node_modules/request/request.js:187:22)
at emitTwo (events.js:126:13)
code: 'ER_PARSE_ERROR',
errno: 1064,
sqlMessage: 'You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near \'June18th"\n \n \n ")\' at line 1',
sqlState: '42000',
index: 0,
sql: 'INSERT INTO crawling (title) VALUES (" \n I went to Paris at"June18th"because\n \n \n ")' }
我真的不明白为什么会发生这样的错误。我认为\ n会导致此错误,所以我重写了
const item = $(data[i]).text().replace("\n", '');
但发生相同的错误。我的代码有什么问题?应如何解决?
答案 0 :(得分:0)
尝试更改:connection.query(query, function(err, rows) {
到connection.query([query], function(err, rows) {
它可能需要[]
答案 1 :(得分:0)
您要在查询中添加双引号以分隔插入语句中的文本,但是您不知道传入的文本是否也将双引号引起,因此该语句格式错误:
c=session.query(a).order_by(a.id.desc()).limit(2)
c=c[::-1]
因此,通过'INSERT INTO crawling (title) VALUES (" .. "June18th".. ")'
,sql出错了。
转义传入的文本是一个好习惯,因为您避免了sql注入并且也避免了这些问题;通常mysql(或任何其他数据库)模块具有内置的转义功能,可以在查询中使用某些语法,或者直接调用转义功能。就像这个模块https://github.com/mysqljs/mysql#escaping-query-values