如何改善此搜索查询?

时间:2020-09-28 16:13:12

标签: mongodb express

该项目使用的堆栈是:ReactJS,ExpressJS和MongoDB。

我正在寻找一种改善搜索引擎的搜索查询的方法。该查询可解析 70万册图书记录,28万个人记录和2k标签记录

有适当的索引,但是查询最多可以持续6秒钟。

期待您的帮助!


模型

// Book model
const BookSchema = Schema ({
  codes: {
    isbn: { type: String, minlength: 10, maxlength: 10, unique: true },
    isbn13: { type: String, minlength: 13, maxlength: 14, unique: true },
    asin: { type: String, minlength: 10, maxlength: 10, unique: true }
  },
  title: { type: String, minlength: 3 },
  summary: { 
    content: { type: String, minlength: 60, maxlength: 140 },
    user_id: { type: Schema.Types.ObjectId, ref: 'User' },
    status: { type: Number, min: 0 }
  },
  tags: [ { type: Schema.Types.ObjectId, ref: 'Tag' } ],
  slug: { type: String, match: /^[a-za-z0-9]+(?:-[a-za-z0-9]+)*$/, minlenth: 3, maxlength: 50, unique: true },
  cover: { 
    public_id: { type: String },
    version: { type: String }
  },
  contributors: [new Schema({
    contributor_id: { type: Schema.Types.ObjectId, ref: 'Person' },
    role: { type: String, enum: ['writer', 'translator', 'illustrator', 'other'] }
  }, { _id: false })],
  publication: {
    publisher: { type: String },
    collection: { type: String },
    publicationDate: { type: Date }
  },
  rating: { type: Number, min: 0, max: 5, default: 0 },
  reviews: [ { type: Schema.Types.ObjectId, ref: 'Review' } ],
  alias: { type: String },
  main: { type: Schema.Types.ObjectId, ref: 'Book' },
  replicas: [ { type: Schema.Types.ObjectId, ref: 'Book' } ],
  weight: { type: Number, min: 0, default: 0 },
  status: { type: Number, min: 0, default: 0 }
}, {
  timestamps: true
});


// Person model
const PersonSchema = Schema ({
  firstName: { type: String },
  middleName: { type: String },
  lastName: { type: String },
  fullName: { type: String },
  pseudonym: { type: String },
  contributions: [{ type: Schema.Types.ObjectId, ref: 'Book' }]
}, {
  timestamps: true
});


// Tag model
const TagSchema = Schema ({
  label: { type: String, minlenth: 3, unique: true  },
  slug: { type: String, match: /^[a-za-z0-9]+(?:-[a-za-z0-9]+)*$/, minlenth: 3, maxlength: 50, unique: true },
  seo: {
    title: { type: String },
    description: { type: String },
    content: { type: String }
  },
  books: [ { type: Schema.Types.ObjectId, ref: 'Book' } ],
  user_id: { type: Schema.Types.ObjectId, ref: 'User' },
  alias: { type: String },
  weight: { type: Number, min: 0, default: 0 },
  status: { type: Number, min: 0, default: 0 }
}, {
  timestamps: true
});

搜索功能

// search controllers
const Book   = require('../../models/book.js');
const Person = require('../../models/person.js');
const Tag    = require('../../models/tag.js');
const to     = require('await-to-js').default;

//                            _     
//                           | |    
//    ___  ___  __ _ _ __ ___| |__  
//   / __|/ _ \/ _` | '__/ __| '_ \ 
//   \__ \  __/ (_| | | | (__| | | |
//   |___/\___|\__,_|_|  \___|_| |_|
//                                                           

exports.search = (req, res, next) => {
  const query = new RegExp(req.params.query, 'i');

  if(query.length < 3) {
    let er = new Error('Query length must be minimum 3 characters');
    er.status = 422;
    er.error = err;
    
    return next(er);
  }

  const books = new Promise(async (resolve, reject) => {
    let [err, books] = await to(Book.find({ 
      $or: [ 
        { 'title': query },
        { 'codes.isbn': query }, 
        { 'codes.isbn13': query }, 
        { 'codes.asin': query }
      ],
      'status': { $ne: 2 }
    }).populate('contributors.contributor_id').limit(30));
    if(err) return reject(err);

    resolve(books)
  });

  const contributors = new Promise(async (resolve, reject) => {
    let [err, contributors] = await to(
      Person.find({ fullName: query })
        .populate({ 
          path : 'contributions',
          populate : {
            path :'contributors.contributor_id'
          }
        })
        .limit(20)
    );
    if(err) return reject(err);

    resolve(contributors)
  });

  const tags = new Promise(async (resolve, reject) => {
    let [err, tags] = await to(Tag.find({ label: query }).limit(10));
    if(err) return reject(err);

    tags = tags.filter(tag => tag.status > 0);

    resolve(tags)
  });

  Promise.all([books, contributors, tags])
    .then(values => {
      const booksFromContributors = values[1].map(contributor => contributor.contributions).flat() || [];

      // merge books with removing duplicates
      let mergedBooks = values[0];
      booksFromContributors.forEach(contributorsBook => {
        let duplicateBook = mergedBooks.find(book => book._id === contributorsBook._id);
        
        (!duplicateBook) &&
          mergedBooks.push(contributorsBook)
      })

      // remove desactivated book, status = 2
      mergedBooks = mergedBooks.filter(mergedBook => mergedBook.status !== 2);

      // reorder based on status and weight
      mergedBooks = mergedBooks
        .sort((book1, book2) => (book2.status - book1.status))
        .sort((book1, book2) => (book2.weight - book1.weight));

      res.status(200).json({ books: mergedBooks, tags: values[2] });
    })
}

查询说明

{ 
    "queryPlanner" : {
        "plannerVersion" : 1.0, 
        "namespace" : "blablalivre.books", 
        "indexFilterSet" : false, 
        "parsedQuery" : {
            "$and" : [
                {
                    "$or" : [
                        {
                            "codes.asin" : /Line Papin/i
                        }, 
                        {
                            "codes.isbn" : /Line Papin/i
                        }, 
                        {
                            "codes.isbn13" : /Line Papin/i
                        }, 
                        {
                            "title" : /Line Papin/i
                        }
                    ]
                }, 
                {
                    "status" : {
                        "$not" : {
                            "$eq" : 2.0
                        }
                    }
                }
            ]
        }, 
        "queryHash" : "7A66E97D", 
        "planCacheKey" : "31A167BF", 
        "winningPlan" : {
            "stage" : "LIMIT", 
            "limitAmount" : 30.0, 
            "inputStage" : {
                "stage" : "FETCH", 
                "filter" : {
                    "status" : {
                        "$not" : {
                            "$eq" : 2.0
                        }
                    }
                }, 
                "inputStage" : {
                    "stage" : "OR", 
                    "inputStages" : [
                        {
                            "stage" : "IXSCAN", 
                            "filter" : {
                                "$or" : [
                                    {
                                        "title" : /Line Papin/i
                                    }
                                ]
                            }, 
                            "keyPattern" : {
                                "title" : 1.0
                            }, 
                            "indexName" : "title", 
                            "isMultiKey" : false, 
                            "multiKeyPaths" : {
                                "title" : [

                                ]
                            }, 
                            "isUnique" : false, 
                            "isSparse" : false, 
                            "isPartial" : false, 
                            "indexVersion" : 2.0, 
                            "direction" : "forward", 
                            "indexBounds" : {
                                "title" : [
                                    "[\"\", {})", 
                                    "[/Line Papin/i, /Line Papin/i]"
                                ]
                            }
                        }, 
                        {
                            "stage" : "IXSCAN", 
                            "filter" : {
                                "$or" : [
                                    {
                                        "codes.asin" : /Line Papin/i
                                    }
                                ]
                            }, 
                            "keyPattern" : {
                                "codes.asin" : 1.0
                            }, 
                            "indexName" : "asin", 
                            "isMultiKey" : false, 
                            "multiKeyPaths" : {
                                "codes.asin" : [

                                ]
                            }, 
                            "isUnique" : false, 
                            "isSparse" : false, 
                            "isPartial" : false, 
                            "indexVersion" : 2.0, 
                            "direction" : "forward", 
                            "indexBounds" : {
                                "codes.asin" : [
                                    "[\"\", {})", 
                                    "[/Line Papin/i, /Line Papin/i]"
                                ]
                            }
                        }, 
                        {
                            "stage" : "IXSCAN", 
                            "filter" : {
                                "$or" : [
                                    {
                                        "codes.isbn" : /Line Papin/i
                                    }
                                ]
                            }, 
                            "keyPattern" : {
                                "codes.isbn" : 1.0
                            }, 
                            "indexName" : "isbn", 
                            "isMultiKey" : false, 
                            "multiKeyPaths" : {
                                "codes.isbn" : [

                                ]
                            }, 
                            "isUnique" : false, 
                            "isSparse" : false, 
                            "isPartial" : false, 
                            "indexVersion" : 2.0, 
                            "direction" : "forward", 
                            "indexBounds" : {
                                "codes.isbn" : [
                                    "[\"\", {})", 
                                    "[/Line Papin/i, /Line Papin/i]"
                                ]
                            }
                        }, 
                        {
                            "stage" : "IXSCAN", 
                            "filter" : {
                                "$or" : [
                                    {
                                        "codes.isbn13" : /Line Papin/i
                                    }
                                ]
                            }, 
                            "keyPattern" : {
                                "codes.isbn13" : 1.0
                            }, 
                            "indexName" : "isbn13", 
                            "isMultiKey" : false, 
                            "multiKeyPaths" : {
                                "codes.isbn13" : [

                                ]
                            }, 
                            "isUnique" : false, 
                            "isSparse" : false, 
                            "isPartial" : false, 
                            "indexVersion" : 2.0, 
                            "direction" : "forward", 
                            "indexBounds" : {
                                "codes.isbn13" : [
                                    "[\"\", {})", 
                                    "[/Line Papin/i, /Line Papin/i]"
                                ]
                            }
                        }
                    ]
                }
            }
        }, 
        "rejectedPlans" : [
            {
                "stage" : "LIMIT", 
                "limitAmount" : 30.0, 
                "inputStage" : {
                    "stage" : "FETCH", 
                    "filter" : {
                        "status" : {
                            "$not" : {
                                "$eq" : 2.0
                            }
                        }
                    }, 
                    "inputStage" : {
                        "stage" : "OR", 
                        "inputStages" : [
                            {
                                "stage" : "IXSCAN", 
                                "filter" : {
                                    "$or" : [
                                        {
                                            "codes.asin" : /Line Papin/i
                                        }
                                    ]
                                }, 
                                "keyPattern" : {
                                    "codes.asin" : 1.0
                                }, 
                                "indexName" : "asin", 
                                "isMultiKey" : false, 
                                "multiKeyPaths" : {
                                    "codes.asin" : [

                                    ]
                                }, 
                                "isUnique" : false, 
                                "isSparse" : false, 
                                "isPartial" : false, 
                                "indexVersion" : 2.0, 
                                "direction" : "forward", 
                                "indexBounds" : {
                                    "codes.asin" : [
                                        "[\"\", {})", 
                                        "[/Line Papin/i, /Line Papin/i]"
                                    ]
                                }
                            }, 
                            {
                                "stage" : "IXSCAN", 
                                "filter" : {
                                    "$or" : [
                                        {
                                            "codes.isbn" : /Line Papin/i
                                        }
                                    ]
                                }, 
                                "keyPattern" : {
                                    "codes.isbn" : 1.0
                                }, 
                                "indexName" : "isbn", 
                                "isMultiKey" : false, 
                                "multiKeyPaths" : {
                                    "codes.isbn" : [

                                    ]
                                }, 
                                "isUnique" : false, 
                                "isSparse" : false, 
                                "isPartial" : false, 
                                "indexVersion" : 2.0, 
                                "direction" : "forward", 
                                "indexBounds" : {
                                    "codes.isbn" : [
                                        "[\"\", {})", 
                                        "[/Line Papin/i, /Line Papin/i]"
                                    ]
                                }
                            }, 
                            {
                                "stage" : "IXSCAN", 
                                "filter" : {
                                    "title" : /Line Papin/i
                                }, 
                                "keyPattern" : {
                                    "title" : 1.0, 
                                    "codes.isbn" : 1.0, 
                                    "codes.isbn13" : 1.0, 
                                    "codes.asin" : 1.0, 
                                    "status" : -1.0
                                }, 
                                "indexName" : "search", 
                                "isMultiKey" : false, 
                                "multiKeyPaths" : {
                                    "title" : [

                                    ], 
                                    "codes.isbn" : [

                                    ], 
                                    "codes.isbn13" : [

                                    ], 
                                    "codes.asin" : [

                                    ], 
                                    "status" : [

                                    ]
                                }, 
                                "isUnique" : false, 
                                "isSparse" : false, 
                                "isPartial" : false, 
                                "indexVersion" : 2.0, 
                                "direction" : "forward", 
                                "indexBounds" : {
                                    "title" : [
                                        "[\"\", {})", 
                                        "[/Line Papin/i, /Line Papin/i]"
                                    ], 
                                    "codes.isbn" : [
                                        "[MinKey, MaxKey]"
                                    ], 
                                    "codes.isbn13" : [
                                        "[MinKey, MaxKey]"
                                    ], 
                                    "codes.asin" : [
                                        "[MinKey, MaxKey]"
                                    ], 
                                    "status" : [
                                        "[MaxKey, 2.0)", 
                                        "(2.0, MinKey]"
                                    ]
                                }
                            }, 
                            {
                                "stage" : "IXSCAN", 
                                "filter" : {
                                    "$or" : [
                                        {
                                            "codes.isbn13" : /Line Papin/i
                                        }
                                    ]
                                }, 
                                "keyPattern" : {
                                    "codes.isbn13" : 1.0
                                }, 
                                "indexName" : "isbn13", 
                                "isMultiKey" : false, 
                                "multiKeyPaths" : {
                                    "codes.isbn13" : [

                                    ]
                                }, 
                                "isUnique" : false, 
                                "isSparse" : false, 
                                "isPartial" : false, 
                                "indexVersion" : 2.0, 
                                "direction" : "forward", 
                                "indexBounds" : {
                                    "codes.isbn13" : [
                                        "[\"\", {})", 
                                        "[/Line Papin/i, /Line Papin/i]"
                                    ]
                                }
                            }
                        ]
                    }
                }
            }
        ]
    },
    "ok" : 1.0
}

Indexes on books collection

1 个答案:

答案 0 :(得分:0)

刚刚找到了一种可以大大改善搜索查询执行力的解决方案:text indexes

执行时间结果:从6秒到1秒

更新mongoDB索引后,我用$text query operator和...voilà重写了查询。

// search controller
const Book    = require('../../models/book.js');
const Person  = require('../../models/person.js');
const Tag     = require('../../models/tag.js');
const to      = require('await-to-js').default;
const helpers = require('../../helpers/index.js');


//                            _     
//                           | |    
//    ___  ___  __ _ _ __ ___| |__  
//   / __|/ _ \/ _` | '__/ __| '_ \ 
//   \__ \  __/ (_| | | | (__| | | |
//   |___/\___|\__,_|_|  \___|_| |_|
//                                                           

exports.search = (req, res, next) => {
  const query = `\"${helpers.cleanString(req.params.query)}\"`;

  if(query.length < 3) {
    let er = new Error('Query length must be minimum 3 characters');
    er.status = 422;
    er.error = err;
    
    return next(er);
  }
  
  const books = new Promise(async (resolve, reject) => {
    let [err, books] = await to(
      Book.find({ 
        $text: { $search: query, $caseSensitive: false },
        'status': { $ne: 2 }
      })
      .sort({'weight': -1, 'status': -1})
      .populate('contributors.contributor_id').limit(30));
    if(err) return reject(err);

    resolve(books)
  });

  const contributors = new Promise(async (resolve, reject) => {
    let [err, contributors] = await to(
      Person.find({ 
        $text: { $search: query, $caseSensitive: false }
      })
      .populate({ 
        path : 'contributions',
        populate : {
          path :'contributors.contributor_id'
        }
      })
      .limit(20)
      );
      if(err) return reject(err);
    
    resolve(contributors)
  });

  const tags = new Promise(async (resolve, reject) => {
    let [err, tags] = await to(
      Tag.find({ 
        $text: { $search: query, $caseSensitive: false, $language: 'fr' }
      }).limit(10));
    if(err) return reject(err);
    
    tags = tags.filter(tag => tag.status > 0);

    resolve(tags)
  });

  Promise.all([books, contributors, tags])
    .then(values => {
      const booksFromContributors = values[1].map(contributor => contributor.contributions).flat() || [];

      // merge books with removing duplicates
      let mergedBooks = values[0];
      booksFromContributors.forEach(contributorsBook => {
        let duplicateBook = mergedBooks.find(book => book._id === contributorsBook._id);
        
        (!duplicateBook) &&
          mergedBooks.push(contributorsBook)
      })
      
      // remove desactivated book, status = 2
      mergedBooks = mergedBooks.filter(mergedBook => mergedBook.status !== 2);

      // reorder based on status and weight
      mergedBooks = mergedBooks
        .sort((book1, book2) => (book2.status - book1.status))
        .sort((book1, book2) => (book2.weight - book1.weight));
  res.status(200).json({ books: mergedBooks, tags: values[2] });
  })
}

下一步将是重写双重排序(也许使用Lodash)。

相关问题