该项目使用的堆栈是:ReactJS,ExpressJS和MongoDB。
我正在寻找一种改善搜索引擎的搜索查询的方法。该查询可解析 70万册图书记录,28万个人记录和2k标签记录。
有适当的索引,但是查询最多可以持续6秒钟。
期待您的帮助!
模型
// Book model
const BookSchema = Schema ({
codes: {
isbn: { type: String, minlength: 10, maxlength: 10, unique: true },
isbn13: { type: String, minlength: 13, maxlength: 14, unique: true },
asin: { type: String, minlength: 10, maxlength: 10, unique: true }
},
title: { type: String, minlength: 3 },
summary: {
content: { type: String, minlength: 60, maxlength: 140 },
user_id: { type: Schema.Types.ObjectId, ref: 'User' },
status: { type: Number, min: 0 }
},
tags: [ { type: Schema.Types.ObjectId, ref: 'Tag' } ],
slug: { type: String, match: /^[a-za-z0-9]+(?:-[a-za-z0-9]+)*$/, minlenth: 3, maxlength: 50, unique: true },
cover: {
public_id: { type: String },
version: { type: String }
},
contributors: [new Schema({
contributor_id: { type: Schema.Types.ObjectId, ref: 'Person' },
role: { type: String, enum: ['writer', 'translator', 'illustrator', 'other'] }
}, { _id: false })],
publication: {
publisher: { type: String },
collection: { type: String },
publicationDate: { type: Date }
},
rating: { type: Number, min: 0, max: 5, default: 0 },
reviews: [ { type: Schema.Types.ObjectId, ref: 'Review' } ],
alias: { type: String },
main: { type: Schema.Types.ObjectId, ref: 'Book' },
replicas: [ { type: Schema.Types.ObjectId, ref: 'Book' } ],
weight: { type: Number, min: 0, default: 0 },
status: { type: Number, min: 0, default: 0 }
}, {
timestamps: true
});
// Person model
const PersonSchema = Schema ({
firstName: { type: String },
middleName: { type: String },
lastName: { type: String },
fullName: { type: String },
pseudonym: { type: String },
contributions: [{ type: Schema.Types.ObjectId, ref: 'Book' }]
}, {
timestamps: true
});
// Tag model
const TagSchema = Schema ({
label: { type: String, minlenth: 3, unique: true },
slug: { type: String, match: /^[a-za-z0-9]+(?:-[a-za-z0-9]+)*$/, minlenth: 3, maxlength: 50, unique: true },
seo: {
title: { type: String },
description: { type: String },
content: { type: String }
},
books: [ { type: Schema.Types.ObjectId, ref: 'Book' } ],
user_id: { type: Schema.Types.ObjectId, ref: 'User' },
alias: { type: String },
weight: { type: Number, min: 0, default: 0 },
status: { type: Number, min: 0, default: 0 }
}, {
timestamps: true
});
搜索功能
// search controllers
const Book = require('../../models/book.js');
const Person = require('../../models/person.js');
const Tag = require('../../models/tag.js');
const to = require('await-to-js').default;
// _
// | |
// ___ ___ __ _ _ __ ___| |__
// / __|/ _ \/ _` | '__/ __| '_ \
// \__ \ __/ (_| | | | (__| | | |
// |___/\___|\__,_|_| \___|_| |_|
//
exports.search = (req, res, next) => {
const query = new RegExp(req.params.query, 'i');
if(query.length < 3) {
let er = new Error('Query length must be minimum 3 characters');
er.status = 422;
er.error = err;
return next(er);
}
const books = new Promise(async (resolve, reject) => {
let [err, books] = await to(Book.find({
$or: [
{ 'title': query },
{ 'codes.isbn': query },
{ 'codes.isbn13': query },
{ 'codes.asin': query }
],
'status': { $ne: 2 }
}).populate('contributors.contributor_id').limit(30));
if(err) return reject(err);
resolve(books)
});
const contributors = new Promise(async (resolve, reject) => {
let [err, contributors] = await to(
Person.find({ fullName: query })
.populate({
path : 'contributions',
populate : {
path :'contributors.contributor_id'
}
})
.limit(20)
);
if(err) return reject(err);
resolve(contributors)
});
const tags = new Promise(async (resolve, reject) => {
let [err, tags] = await to(Tag.find({ label: query }).limit(10));
if(err) return reject(err);
tags = tags.filter(tag => tag.status > 0);
resolve(tags)
});
Promise.all([books, contributors, tags])
.then(values => {
const booksFromContributors = values[1].map(contributor => contributor.contributions).flat() || [];
// merge books with removing duplicates
let mergedBooks = values[0];
booksFromContributors.forEach(contributorsBook => {
let duplicateBook = mergedBooks.find(book => book._id === contributorsBook._id);
(!duplicateBook) &&
mergedBooks.push(contributorsBook)
})
// remove desactivated book, status = 2
mergedBooks = mergedBooks.filter(mergedBook => mergedBook.status !== 2);
// reorder based on status and weight
mergedBooks = mergedBooks
.sort((book1, book2) => (book2.status - book1.status))
.sort((book1, book2) => (book2.weight - book1.weight));
res.status(200).json({ books: mergedBooks, tags: values[2] });
})
}
查询说明
{
"queryPlanner" : {
"plannerVersion" : 1.0,
"namespace" : "blablalivre.books",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"$or" : [
{
"codes.asin" : /Line Papin/i
},
{
"codes.isbn" : /Line Papin/i
},
{
"codes.isbn13" : /Line Papin/i
},
{
"title" : /Line Papin/i
}
]
},
{
"status" : {
"$not" : {
"$eq" : 2.0
}
}
}
]
},
"queryHash" : "7A66E97D",
"planCacheKey" : "31A167BF",
"winningPlan" : {
"stage" : "LIMIT",
"limitAmount" : 30.0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"status" : {
"$not" : {
"$eq" : 2.0
}
}
},
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"title" : /Line Papin/i
}
]
},
"keyPattern" : {
"title" : 1.0
},
"indexName" : "title",
"isMultiKey" : false,
"multiKeyPaths" : {
"title" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"title" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"codes.asin" : /Line Papin/i
}
]
},
"keyPattern" : {
"codes.asin" : 1.0
},
"indexName" : "asin",
"isMultiKey" : false,
"multiKeyPaths" : {
"codes.asin" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"codes.asin" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"codes.isbn" : /Line Papin/i
}
]
},
"keyPattern" : {
"codes.isbn" : 1.0
},
"indexName" : "isbn",
"isMultiKey" : false,
"multiKeyPaths" : {
"codes.isbn" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"codes.isbn" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"codes.isbn13" : /Line Papin/i
}
]
},
"keyPattern" : {
"codes.isbn13" : 1.0
},
"indexName" : "isbn13",
"isMultiKey" : false,
"multiKeyPaths" : {
"codes.isbn13" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"codes.isbn13" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
}
]
}
}
},
"rejectedPlans" : [
{
"stage" : "LIMIT",
"limitAmount" : 30.0,
"inputStage" : {
"stage" : "FETCH",
"filter" : {
"status" : {
"$not" : {
"$eq" : 2.0
}
}
},
"inputStage" : {
"stage" : "OR",
"inputStages" : [
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"codes.asin" : /Line Papin/i
}
]
},
"keyPattern" : {
"codes.asin" : 1.0
},
"indexName" : "asin",
"isMultiKey" : false,
"multiKeyPaths" : {
"codes.asin" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"codes.asin" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"codes.isbn" : /Line Papin/i
}
]
},
"keyPattern" : {
"codes.isbn" : 1.0
},
"indexName" : "isbn",
"isMultiKey" : false,
"multiKeyPaths" : {
"codes.isbn" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"codes.isbn" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
},
{
"stage" : "IXSCAN",
"filter" : {
"title" : /Line Papin/i
},
"keyPattern" : {
"title" : 1.0,
"codes.isbn" : 1.0,
"codes.isbn13" : 1.0,
"codes.asin" : 1.0,
"status" : -1.0
},
"indexName" : "search",
"isMultiKey" : false,
"multiKeyPaths" : {
"title" : [
],
"codes.isbn" : [
],
"codes.isbn13" : [
],
"codes.asin" : [
],
"status" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"title" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
],
"codes.isbn" : [
"[MinKey, MaxKey]"
],
"codes.isbn13" : [
"[MinKey, MaxKey]"
],
"codes.asin" : [
"[MinKey, MaxKey]"
],
"status" : [
"[MaxKey, 2.0)",
"(2.0, MinKey]"
]
}
},
{
"stage" : "IXSCAN",
"filter" : {
"$or" : [
{
"codes.isbn13" : /Line Papin/i
}
]
},
"keyPattern" : {
"codes.isbn13" : 1.0
},
"indexName" : "isbn13",
"isMultiKey" : false,
"multiKeyPaths" : {
"codes.isbn13" : [
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2.0,
"direction" : "forward",
"indexBounds" : {
"codes.isbn13" : [
"[\"\", {})",
"[/Line Papin/i, /Line Papin/i]"
]
}
}
]
}
}
}
]
},
"ok" : 1.0
}
答案 0 :(得分:0)
刚刚找到了一种可以大大改善搜索查询执行力的解决方案:text indexes!
执行时间结果:从6秒到1秒!
更新mongoDB索引后,我用$text query operator和...voilà重写了查询。
// search controller
const Book = require('../../models/book.js');
const Person = require('../../models/person.js');
const Tag = require('../../models/tag.js');
const to = require('await-to-js').default;
const helpers = require('../../helpers/index.js');
// _
// | |
// ___ ___ __ _ _ __ ___| |__
// / __|/ _ \/ _` | '__/ __| '_ \
// \__ \ __/ (_| | | | (__| | | |
// |___/\___|\__,_|_| \___|_| |_|
//
exports.search = (req, res, next) => {
const query = `\"${helpers.cleanString(req.params.query)}\"`;
if(query.length < 3) {
let er = new Error('Query length must be minimum 3 characters');
er.status = 422;
er.error = err;
return next(er);
}
const books = new Promise(async (resolve, reject) => {
let [err, books] = await to(
Book.find({
$text: { $search: query, $caseSensitive: false },
'status': { $ne: 2 }
})
.sort({'weight': -1, 'status': -1})
.populate('contributors.contributor_id').limit(30));
if(err) return reject(err);
resolve(books)
});
const contributors = new Promise(async (resolve, reject) => {
let [err, contributors] = await to(
Person.find({
$text: { $search: query, $caseSensitive: false }
})
.populate({
path : 'contributions',
populate : {
path :'contributors.contributor_id'
}
})
.limit(20)
);
if(err) return reject(err);
resolve(contributors)
});
const tags = new Promise(async (resolve, reject) => {
let [err, tags] = await to(
Tag.find({
$text: { $search: query, $caseSensitive: false, $language: 'fr' }
}).limit(10));
if(err) return reject(err);
tags = tags.filter(tag => tag.status > 0);
resolve(tags)
});
Promise.all([books, contributors, tags])
.then(values => {
const booksFromContributors = values[1].map(contributor => contributor.contributions).flat() || [];
// merge books with removing duplicates
let mergedBooks = values[0];
booksFromContributors.forEach(contributorsBook => {
let duplicateBook = mergedBooks.find(book => book._id === contributorsBook._id);
(!duplicateBook) &&
mergedBooks.push(contributorsBook)
})
// remove desactivated book, status = 2
mergedBooks = mergedBooks.filter(mergedBook => mergedBook.status !== 2);
// reorder based on status and weight
mergedBooks = mergedBooks
.sort((book1, book2) => (book2.status - book1.status))
.sort((book1, book2) => (book2.weight - book1.weight));
res.status(200).json({ books: mergedBooks, tags: values[2] });
})
}
下一步将是重写双重排序(也许使用Lodash)。