我的Google Cloud Functions从数据库获取URL,然后使用GET请求检索相关网页的来源。我使用了Axios,Request和本机HTTP(S)模块。
大多数网站都运行得很好,没有任何问题。然而,对于该URL https://www.healthline.com/health/food-nutrition/cricket-flour-nutrition
,Axios和Request只会挂起,直到我的函数超时,但是使用我的本机HTTPS测试,它从URL下载块但从未完成,然后挂起直到函数超时。
我的代码非常简单......
const https = require('https');
const newAgent = new https.Agent({ keepAlive: true });
console.log('-> GET ', document.location.href);
const requestOptions = {
agent: newAgent,
hostname: document.location.hostname,
path: document.location.path
};
var fetchReq = https.request(requestOptions, (res) => {
let source = '';
console.log('STATUS: ' + res.statusCode);
Object.keys(res.headers).forEach(h => {
console.log('-> ' + h + ' -> ', JSON.stringify(res.headers[h]));
});
res.setEncoding('utf8');
res.on('data', (chunk) => {
console.log(`-----> CHUNK:`, chunk.substring(0, 40) + ' ... ', chunk.substring(chunk.length - 41, chunk.length - 1));
source += chunk;
});
res.on('close', () => {
console.log(`-----> CLOSED STREAM`);
});
res.on('end', () => {
console.log(`-----> STREAM ENDED`);
try {
console.log(`-> Fetched`, source.length);
console.log(`-> Saving to GCS`);
const bucket = storage.bucket(process.env.STORAGE_BUCKET_RAW);
const fileName = document.organization + '/' + document.key + '.raw';
const file = bucket.file(fileName);
const response = {
uri: 'gs://' + bucket.name + '/' + fileName,
data: preCleaning(source)
};
const options = {
gzip: true,
metadata: {
metadata: {
kind: document.kind,
organization: document.organization,
username: document.username,
key: document.key,
url: document.location.href
}
}
};
file.save(response.data, options)
.then(() => {
console.log(`-> Saved to GCS`);
resolve();
})
.catch(err => {
reject(new AppError(`Error saving raw document to storage (${uri}).`, 500, err));
});
} catch (e) {
console.log('HTTP message: ', e.message);
}
});
res.on('finish', () => {
console.log(`-----> FINISHED STREAM`);
});
res.on('error', (e) => {
console.log(`Got error: ${e.message}`);
})
}).on('socket', (socket) => {
socket.emit('agentRemove');
}).end();
我尝试过不同的库,相同的代码在本地工作,它只是让人感到困惑,而且我的想法已经用完......