Mongo并发-collection.find()返回重复的文档

时间:2019-03-06 16:33:11

标签: node.js mongodb

我们使用mongdb 3.6.2版和NodeJS客户端3.1.10版。当同时从不同的进程中执行collection.find()和collection.updateOne()时,我们可以相对轻松地重现一个场景,其中我们得到collection.find()返回的重复文档。

我们的集合“模式”基本上是这样的:

{
  _id : "uuid"
  tenantId: "string",
  name: "string",
  created: "date",
  lastUpdated: "date"
}

我们的索引如下:

{
    "v" : 2,
    "key" : {
        "_id" : 1
    },
    "name" : "_id_",
    "ns" : "users.users"
},
{
    "v" : 2,
    "key" : {
        "tenantId" : 1,
        "name" : 1
    },
    "name" : "tenantId_1_name_1",
    "ns" : "users.users"
}
>

然后,我们同时对数据库中的SINGLE记录运行多个读取和更新操作。

要设置测试,我们执行creator.js

const MongoClient = require('mongodb').MongoClient
const uri = 'mongodb://localhost/users'
const collectionName = 'users'
const userId = 'FGw8RXxaohfcVy1Ue4_qa9cFNEj1il-j'
const tenantId = 'ksuUOItGisnBpyMdCD8a0SGoOTNqRbJc'

async function start () {
  let client = await MongoClient.connect(uri, { useNewUrlParser: true })
  let db = client.db()
  let usersCollection = db.collection(collectionName)

  await createIndex(usersCollection, { tenantId: 1, name: 1 })

  const now = new Date()

  await usersCollection.insertOne({
    _id: userId,
    tenantId,
    name: 'February',
    created: now,
    lastUpdated: now
  })

  console.log(`created user: ${userId} in tenant: ${tenantId}`)
}

async function createIndex (collection, keys, options) {
  return collection.createIndex(keys, options)
}

start().then(() => {
  console.log('done!!!')
  process.exit(0)
}).catch(err => {
  console.log(err)
  process.exit(1)
})

更新从updater.js执行:

const MongoClient = require('mongodb').MongoClient
const uri = 'mongodb://localhost/users'
const collectionName = 'users'
const userId = 'FGw8RXxaohfcVy1Ue4_qa9cFNEj1il-j'

async function start () {
  let client = await MongoClient.connect(uri, { useNewUrlParser: true })
  let db = client.db()
  let usersCollection = db.collection(collectionName)

  // keep updating:
  let toggle = false
  while (true) {
    toggle = !toggle
    let userData = await usersCollection.findOne({ _id: userId })
    if (!userData) {
      throw new Error(`no users with id: ${userId}`)
    }
    await usersCollection.updateOne({
      _id: userId
    }, {
      '$set': {
        ...userData,
        lastUpdated: new Date(),
        name: toggle ? 'January' : 'February'
      }
    })
    await sleep(1)
  }
}

function sleep (milliseconds) {
  return new Promise(resolve => setTimeout(resolve, milliseconds))
}

start().then(() => {
  console.log('done!!!')
  process.exit(0)
}).catch(err => {
  console.log(err)
  process.exit(1)
})

reader.js执行读取:

const MongoClient = require('mongodb').MongoClient
const uri = 'mongodb://localhost/users'
const collectionName = 'users'
const tenantId = 'ksuUOItGisnBpyMdCD8a0SGoOTNqRbJc'

async function start () {
  let client = await MongoClient.connect(uri, {
    useNewUrlParser: true
  })
  let db = client.db()
  let usersCollection = db.collection(collectionName)

  // keep reading:
  while (true) {
    let cursor = await usersCollection.find({
      tenantId: tenantId
    }, {
      sort: { tenantId: 1, name: 1 },
      hint: { tenantId: 1, name: 1 },
      limit: 10
    })
    let users = await cursor.toArray()
    if (users.length !== 1) {
      throw new Error(`weird number of users obtained: ${JSON.stringify(users, null, 2)}`)
    }
    await sleep(1)
  }
}

function sleep (milliseconds) {
  return new Promise(resolve => setTimeout(resolve, milliseconds))
}

start().then(() => {
  console.log('done!!!')
  process.exit(0)
}).catch(err => {
  console.log(err)
  process.exit(1)
})

运行12个updater.js实例和3个reader.js实例(有时确实很快,有时几分钟后)时可再现的错误看起来像这样(源自reader.js):

Error: weird number of users obtained: [
  {
    "_id": "FGw8RXxaohfcVy1Ue4_qa9cFNEj1il-j",
    "tenantId": "ksuUOItGisnBpyMdCD8a0SGoOTNqRbJc",
    "name": "February",
    "created": "2019-03-07T15:46:28.238Z",
    "lastUpdated": "2019-03-07T16:01:37.504Z"
  },
  {
    "_id": "FGw8RXxaohfcVy1Ue4_qa9cFNEj1il-j",
    "tenantId": "ksuUOItGisnBpyMdCD8a0SGoOTNqRbJc",
    "name": "January",
    "created": "2019-03-07T15:46:28.238Z",
    "lastUpdated": "2019-03-07T16:01:37.507Z"
  }
]

注意:两个记录中的“ _id”和“创建的”字段相同。

我们需要能够按“名称”有效地进行排序,这就是为什么我们用它为数据建立索引。我们还需要能够定期更改“名称”。有没有一种方法可以在保留我们需要的功能(分页/排序)的同时避免或减轻这种重复?有什么通用的方法可以在数据库级别或在应用程序代码中解决此问题?

0 个答案:

没有答案