我正在通过示例(https://medium.com/@e_mad_ehsan/getting-started-with-puppeteer-and-chrome-headless-for-web-scrapping-6bf5979dee3e)用猫鼬来研究木偶。 当我执行“ node index.js”时,会收到如下的findOneAndUpdateerror消息;
(node:53135) DeprecationWarning: current URL string parser is deprecated,
and will be removed in a future version. To use the new parser, pass option
{ useNewUrlParser: true } to MongoClient.connect.
(node:53135) UnhandledPromiseRejectionWarning: TypeError:
User.findOneAndUpdate is not a function**
at upsertUser (/home/oceanm/thal/index.js:111:14)
at run (/home/oceanm/thal/index.js:69:4)
at <anonymous>
at process._tickCallback (internal/process/next_tick.js:188:7)
(node:53135) UnhandledPromiseRejectionWarning: Unhandled promise rejection.
This error originated either by throwing inside of an async function without
a catch block, or by rejecting a promise which was not handled with
.catch(). (rejection id: 1)
(node:53135) [DEP0018] DeprecationWarning: Unhandled promise rejections are
deprecated. In the future, promise rejections that are not handled will
terminate the Node.js process with a non-zero exit code.
....... ................. ...... ................. ......
我不知道为什么会收到此错误消息。我被困了几个小时。请帮助我。
<models/user.js>
const mongoose = require('mongoose');
let userSchema = new mongoose.Schema({
username: String,
email: String,
dateCrawled: Date
});
<creds.js>
module.exports = {
username: 'myid',
password: 'mypassword'
}
<index.js>
const puppeteer = require('puppeteer');
const CREDS = require('./creds');
const mongoose = require('mongoose');
const User = require('./models/user');
async function run() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://github.com/login');
const USERNAME_SELECTOR = '#login_field';
const PASSWORD_SELECTOR = '#password';
const BUTTON_SELECTOR = '#login > form > div.auth-form-body.mt-3 > input.btn.btn-primary.btn-block';
await page.click(USERNAME_SELECTOR);
await page.keyboard.type(CREDS.username);
await page.click(PASSWORD_SELECTOR);
await page.keyboard.type(CREDS.password);
await page.click(BUTTON_SELECTOR);
await page.waitForNavigation();
const userToSearch = 'miyamoto';
const searchUrl = `https://github.com/search?q=${userToSearch}&type=Users`;
await page.goto(searchUrl);
await page.waitFor(2*1000);
const LIST_USERNAME_SELECTOR = '#user_search_results > div.user-list > div:nth-child(INDEX) > div.d-flex.flex-auto > div > a';
const LIST_EMAIL_SELECTOR = '#user_search_results > div.user-list > div:nth-child(INDEX) > div.d-flex.flex-auto > div > ul > li:nth-child(2) > a';
const LENGTH_SELECTOR_CLASS = 'user-list-item';
let numPages = await getNumPages(page);
console.log('Numpages: ', numPages);
for (let h = 1; h <= numPages; h++) {
let pageUrl = searchUrl + '&p=' + h;
await page.goto(pageUrl);
let listLength = await page.evaluate((sel) => {
return document.getElementsByClassName(sel).length;
}, LENGTH_SELECTOR_CLASS);
for (let i = 1; i <= listLength; i++) {
// change the index to the next child
let usernameSelector = LIST_USERNAME_SELECTOR.replace("INDEX", i);
let emailSelector = LIST_EMAIL_SELECTOR.replace("INDEX", i);
let username = await page.evaluate((sel) => {
return document.querySelector(sel).getAttribute('href').replace('/', '');
}, usernameSelector);
let email = await page.evaluate((sel) => {
let element = document.querySelector(sel);
return element? element.innerHTML: null;
}, emailSelector);
// not all users have emails visible
if (!email)
continue;
console.log(username, ' -> ', email);
// TODO save this user
upsertUser({
username: username,
email: email,
dateCrawled: new Date()
});
}
}
browser.close();
}
async function getNumPages(page) {
const NUM_USER_SELECTOR = '#js-pjax-container > div > div.col-12.col-md-9.float-left.px-2.pt-3.pt-md-0.codesearch-results > div > div.d-flex.flex-column.flex-md-row.flex-justify-between.border-bottom.pb-3.$
let inner = await page.evaluate((sel) => {
let html = document.querySelector(sel).innerHTML;
return html.replace(',', '').replace('users', '').trim();
}, NUM_USER_SELECTOR);
let numUsers = parseInt(inner);
console.log('numUsers: ', numUsers);
let numPages = Math.ceil(numUsers / 10);
return numPages;
}
async function upsertUser(userObj) {
const DB_URL = 'mongodb://localhost/thal';
if (mongoose.connection.readyState == 0) { mongoose.connect(DB_URL); }
// if this email exists, update the entry, don't insert
let conditions = { email: userObj.email };
let options = { upsert: true, new: true, setDefaultsOnInsert: true };
User.findOneAndUpdate(conditions, userObj, options, (err, result) => {
if (err) throw err;
});
}
run();
答案 0 :(得分:1)
我认为您没有在User
中导出<models/user.js>
模型。
添加module.exports = mongoose.model('User', userSchema);
而不使用User
变量可能会导致该问题。
const mongoose = require('mongoose');
let userSchema = new mongoose.Schema({
username: String,
email: String,
dateCrawled: Date
});
module.exports = mongoose.model('User', userSchema);