我正在使用puppeteer从站点中提取数据。我的问题是在获取所有内容后关闭浏览器。
请帮助我:)这是GraphQL的解析器功能。
const Somesite = async ({ ticker }) => {
const browser = await puppeteer.launch({ headless: false});
const page = await browser.newPage();
await page.goto('https://Somesite.com/quote.ashx?t=' + ticker);
let result = {
ticker,
market_cap: async () => {
return await fetch_data_with_xpath(page, { ticker, xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[2]/td[2]/b" })
},
float: async () => {
return await fetch_data_with_xpath(page, { ticker, xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[2]/td[10]/b" })
},
insider_own: async () => {
return await fetch_data_with_xpath(page, { ticker, xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[1]/td[8]/b" })
},
short_float: async () => {
return await fetch_data_with_xpath(page, { ticker, xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[3]/td[10]/b" })
},
date: async () => {
return moment().format("MM/DD/YYYY");
},
cash_per_share: async () => {
return await fetch_data_with_xpath(page, { ticker, xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[6]/td[2]/b" })
},
dept_equity: async () => {
return await fetch_data_with_xpath(page, { ticker, xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[10]/td[4]/b/span" })
},
browser : async () => {
return browser.close()
}
};
return result;
};
const fetch_data_with_xpath = async (page, { ticker, xpath }) => {
const element = await page.$x(xpath);
let text = "";
if (!!element) {
text = await page.evaluate(element => element.textContent, element[0]);
}
return text;
}
我试图为每个数据元素打开单独的浏览器,但是效率不是很高,并且很快就达到了极限。
我无法让浏览器保持打开状态,这不是一种选择。
当我在定义browser.close
后触发result
时,浏览器过早关闭并且没有数据被获取。
答案 0 :(得分:1)
首先从对象内部删除browser.close()
,因为对象中的属性是无序的,所以它将无法正常工作。
假设对象上的所有元素都是一个函数,我们可以将for..of
与async..await
一起使用。
async function getResult() {
// hold all of our related functions
let resultFunctions = {
ticker,
market_cap: async () => {
return fetch_data_with_xpath(page, {
ticker,
xpath: "/html/body/table[3]/tbody/tr[1]/td/table/tbody/tr[7]/td/table/tbody/tr[2]/td[2]/b"
})
}
};
let result = {}
// run all functions inside the object
for (let [key, fn] of Object.entries(resultFunctions)) {
if (typeof fn === 'function') {
result[key] = await fn()
}
// if it's not a function,
// then it's "Probably" a string according to our schema above
if (typeof fn === 'string') result[key] = fn;
}
return result;
}
const result = await getResult()
await browser.close()