概述:
我的任务是在我们无法访问旧工具之前将某些内容从旧工具中迁移出来。具体来说,有为营销生成的电子邮件渲染,我需要从该系统下载这些电子邮件预览图像。产品方面没有内部 API 或支持来帮助获取这些图像。
方法:
图像预览 URL 接受 3 个参数。图像的 ID
,以及 height
和 width
属性。该网站需要身份验证,因此当我登录时,我将其生成的 cookie/标头放入我的代码中,以便我可以将标头与 fetch
一起发送。为此,我使用 nodeJS 和 node-fetch
对 URL 进行标注。然后将图像写入磁盘并遍历列表。
问题:
我们有数千个此类电子邮件预览/图像需要下载。目前,我的代码一次迭代一个,这不是很可扩展。有没有办法将并发与 fetch
之类的东西一起使用,这样我就可以一次下载多个图像,并有一些上限?关于解决这个问题的更好方法的任何想法?这只是一次迁移的情况,所以我只是在寻找最有效的方法来做到这一点。
代码:
// Includes
const fs = require('fs');
const csv = require('csv-parser');
const fetch = require('node-fetch');
// Vars
const jobIDs = [];
const imageWidth = 1024;
const imageHeight = 2000;
const jobFileName = '500.csv';
const cookie = "xt_0d95e=!O34QVojQ....redacted"
let currentIteration = 0;
let currentJobId = '';
/**
* Read our data list to process
*/
fs.createReadStream(jobFileName)
.pipe(csv({
delimiter: ','
}))
.on('data', (data) => jobIDs.push(data))
.on('end', () => {
console.log(jobIDs.length + ' Job IDs loaded from list.');
checkNext();
});
/**
* Check to see if we have more JobIds to process
*/
function checkNext() {
// Do we have more Job ID's to run?
if (currentIteration < jobIDs.length) {
// If we have another JobID, set it to the current variable and continue scraping
if (jobIDs[currentIteration].JobID != null) {
currentJobId = jobIDs[currentIteration].JobID;
scrape();
}
} else {
// Debug
console.log('Complete - Downloaded [' + jobIDs.length + ' Emails]');
}
}
/**
* Iterate over each JobId, make a call-out for the image, and save it
*/
function scrape() {
// Debug
console.log('Fetching Job ID: ' + currentJobId);
// Fetch
fetch(`https://website.com/Content/Email/EmailThumbnail.aspx?jid=${currentJobId}&w=${imageWidth}&h=${imageHeight}`, {
"headers": {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-language": "en-US,en;q=0.9",
"cache-control": "max-age=0",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "none",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
"cookie": cookie
},
"referrerPolicy": "strict-origin-when-cross-origin",
"body": null,
"method": "GET",
"mode": "cors"
})
.then(res => {
// Store the image and rename it to the Job ID
const dest = fs.createWriteStream(`./images/${currentJobId}.png`);
res.body.pipe(dest);
// Debug
let output = '\n' + currentJobId;
fs.appendFileSync('./debug.txt', output);
// Check for more
currentIteration++;
checkNext();
});
}