我有以下基本代码:
const cookie = require('cookie')
const https = require('https')
const http = require('http')
const proto = { https, http }
// Use connect method to connect to the server
MongoClient.connect(url, function(err, client) {
console.log("Connected successfully to server")
const db = client.db(dbName)
const foos = db.collection('foos')
perform(foos, function(){
console.log('done')
})
})
function perform(a, done) {
const stream = a.find({ url: null })
// emits each line as a buffer or as a string representing an array of fields
stream.on('data', function(doc){
stream.pause()
request(doc.url, function(){
stream.resume()
})
function request(url, fn, redirect, cookies) {
cookies = cookies || {}
console.log(redirect ? 'redirect' : 'start', url)
const val = url.match(/^https/) ? 'https' : 'http'
var headers = {}
if (Object.keys(cookies).length) {
var ck = []
Object.keys(cookies).forEach(key => {
ck.push(cookie.serialize(key, cookies[key]))
})
headers.Cookie = ck.join('; ')
}
proto[val].get(url, { headers }, function(response) {
// console.log(response.headers)
console.log(response.statusCode, url)
if (response.statusCode == 302 || response.statusCode == 301 || response.statusCode == 307 || response.statusCode == 303) {
if (response.headers['set-cookie']) {
response.headers['set-cookie'].forEach(function(str){
var cks = cookie.parse(str)
for (var key in cks) {
switch (key) {
case 'expires':
case 'path':
case 'domain':
break
default:
cookies[key] = cks[key]
}
}
})
}
var newUrl = response.headers.location
if (!newUrl.match(/^https?:\/\//)) {
if (newUrl.match(/\/\//)) {
newUrl = 'http:' + newUrl
} else if (newUrl.match(/\//)) {
newUrl = domain + newUrl
} else {
newUrl = domain + '/' + newUrl
}
}
request(newUrl, fn, true, cookies)
} else {
// do something
fn()
}
}).on('error', function(err) { // Handle errors
console.log(err.message)
fn()
})
}
})
// now pipe some data into it
stream.on('end', function(){
done()
})
}
它实际上只是从数据库中加载一堆URL,然后为每个URL进行调用。它使用MongoDB集合的流功能,因此一次仅执行一个请求,当请求完成时,它将启动下一个请求。但是,运行此脚本大约3-5分钟后,该过程将挂起。不仅如此,我使用的浏览器也挂了!我将其作为Node.js脚本运行,但由于某种原因,它似乎在3-5分钟后阻止了我计算机的所有流量。
问题是,当我重新启动进程(仅花费一秒钟/秒)时,一切都很好,请求也通过了。另一个方面是,例如,我尝试转到浏览器中的URL(任何URL,例如stackoverflow.com),并且由于脚本的原因而挂起。如果我重新启动该过程,则浏览器窗口将完成其请求!我不知道为什么会这样。
想知道为什么会发生这种情况以及如何解决这个问题。