MongoDB随机挂起

时间:2019-11-05 04:53:37

标签: ruby mongodb

概述

我有一个将MongoDB用作数据库的ruby应用程序。在为此应用程序运行测试时,我正在使用Minitest为每个测试用例创建集合和索引。

使用docker compose创建测试环境,其中一个容器正在运行测试,另一个容器正在运行MongoDB。

问题

第一次运行测试时,过一会儿MongoDB卡住了。任何查询集合的请求都不会响应。

在使用命令行客户端开始运行测试之前,我已经能够连接到它。当我使用db.serverStatus()检查服务器的状态时,我看到某些操作已获得锁。通过查看globalLock字段,我了解到1个操作具有write锁,并且有2个操作正在等待获取read锁。

我无法理解为什么这些操作会挂起而不产生锁。我不知道如何进一步调试此问题。

MongoDB版本:3.6.13 Ruby驱动程序版本:2.8.0

我还尝试了其他版本3.6.x和4.0

我们非常感谢您的帮助或指导。

db.serverStatus输出

{
    "host" : "c658c885eb90",
    "version" : "3.6.14",
    "process" : "mongod",
    "pid" : NumberLong(1),
    "uptime" : 98,
    "uptimeMillis" : NumberLong(97909),
    "uptimeEstimate" : NumberLong(97),
    "localTime" : ISODate("2019-11-03T16:09:14.289Z"),
    "asserts" : {
        "regular" : 0,
        "warning" : 0,
        "msg" : 0,
        "user" : 0,
        "rollovers" : 0
    },
    "connections" : {
        "current" : 6,
        "available" : 838854,
        "totalCreated" : 11
    },
    "extra_info" : {
        "note" : "fields vary by platform",
        "page_faults" : 0
    },
    "globalLock" : {
        "totalTime" : NumberLong(97908000),
        "currentQueue" : {
            "total" : 2,
            "readers" : 2,
            "writers" : 0
        },
        "activeClients" : {
            "total" : 13,
            "readers" : 0,
            "writers" : 1
        }
    },
    "locks" : {
        "Global" : {
            "acquireCount" : {
                "r" : NumberLong(14528),
                "w" : NumberLong(12477),
                "W" : NumberLong(5)
            }
        },
        "Database" : {
            "acquireCount" : {
                "r" : NumberLong(1020),
                "w" : NumberLong(14459),
                "R" : NumberLong(3),
                "W" : NumberLong(6599)
            },
            "acquireWaitCount" : {
                "r" : NumberLong(2)
            },
            "timeAcquiringMicros" : {
                "r" : NumberLong(76077321)
            }
        },
        "Collection" : {
            "acquireCount" : {
                "R" : NumberLong(1018),
                "W" : NumberLong(8805)
            }
        },
        "Metadata" : {
            "acquireCount" : {
                "W" : NumberLong(37)
            }
        }
    },
    "logicalSessionRecordCache" : {
        "activeSessionsCount" : 3,
        "sessionsCollectionJobCount" : 1,
        "lastSessionsCollectionJobDurationMillis" : 0,
        "lastSessionsCollectionJobTimestamp" : ISODate("2019-11-03T16:07:36.407Z"),
        "lastSessionsCollectionJobEntriesRefreshed" : 0,
        "lastSessionsCollectionJobEntriesEnded" : 0,
        "lastSessionsCollectionJobCursorsClosed" : 0,
        "transactionReaperJobCount" : 0,
        "lastTransactionReaperJobDurationMillis" : 0,
        "lastTransactionReaperJobTimestamp" : ISODate("2019-11-03T16:07:36.407Z"),
        "lastTransactionReaperJobEntriesCleanedUp" : 0
    },
    "network" : {
        "bytesIn" : NumberLong(1682811),
        "bytesOut" : NumberLong(1019834),
        "physicalBytesIn" : NumberLong(1682811),
        "physicalBytesOut" : NumberLong(1019834),
        "numRequests" : NumberLong(7822),
        "compression" : {
            "snappy" : {
                "compressor" : {
                    "bytesIn" : NumberLong(0),
                    "bytesOut" : NumberLong(0)
                },
                "decompressor" : {
                    "bytesIn" : NumberLong(0),
                    "bytesOut" : NumberLong(0)
                }
            }
        },
        "serviceExecutorTaskStats" : {
            "executor" : "passthrough",
            "threadsRunning" : 6
        }
    },
    "opLatencies" : {
        "reads" : {
            "latency" : NumberLong(61374),
            "ops" : NumberLong(963)
        },
        "writes" : {
            "latency" : NumberLong(13074),
            "ops" : NumberLong(286)
        },
        "commands" : {
            "latency" : NumberLong(988232),
            "ops" : NumberLong(6570)
        }
    },
    "opReadConcernCounters" : {
        "available" : NumberLong(0),
        "linearizable" : NumberLong(0),
        "local" : NumberLong(0),
        "majority" : NumberLong(0),
        "none" : NumberLong(944)
    },
    "opcounters" : {
        "insert" : 246,
        "query" : 944,
        "update" : 40,
        "delete" : 0,
        "getmore" : 0,
        "command" : 6595
    },
    "opcountersRepl" : {
        "insert" : 0,
        "query" : 0,
        "update" : 0,
        "delete" : 0,
        "getmore" : 0,
        "command" : 0
    },
    "storageEngine" : {
        "name" : "ephemeralForTest",
        "supportsCommittedReads" : false,
        "readOnly" : false,
        "persistent" : false
    },
    "tcmalloc" : {
        "generic" : {
            "current_allocated_bytes" : 8203504,
            "heap_size" : 12496896
        },
        "tcmalloc" : {
            "pageheap_free_bytes" : 2760704,
            "pageheap_unmapped_bytes" : 0,
            "max_total_thread_cache_bytes" : 516947968,
            "current_total_thread_cache_bytes" : 1007120,
            "total_free_bytes" : 1532688,
            "central_cache_free_bytes" : 231040,
            "transfer_cache_free_bytes" : 294528,
            "thread_cache_free_bytes" : 1007120,
            "aggressive_memory_decommit" : 0,
            "pageheap_committed_bytes" : 12496896,
            "pageheap_scavenge_count" : 0,
            "pageheap_commit_count" : 9,
            "pageheap_total_commit_bytes" : 12496896,
            "pageheap_decommit_count" : 0,
            "pageheap_total_decommit_bytes" : 0,
            "pageheap_reserve_count" : 9,
            "pageheap_total_reserve_bytes" : 12496896,
            "spinlock_total_delay_ns" : 0,
            "formattedString" : "------------------------------------------------\nMALLOC:        8204080 (    7.8 MiB) Bytes in use by application\nMALLOC: +      2760704 (    2.6 MiB) Bytes in page heap freelist\nMALLOC: +       231040 (    0.2 MiB) Bytes in central cache freelist\nMALLOC: +       294528 (    0.3 MiB) Bytes in transfer cache freelist\nMALLOC: +      1006544 (    1.0 MiB) Bytes in thread cache freelists\nMALLOC: +      1204480 (    1.1 MiB) Bytes in malloc metadata\nMALLOC:   ------------\nMALLOC: =     13701376 (   13.1 MiB) Actual memory used (physical + swap)\nMALLOC: +            0 (    0.0 MiB) Bytes released to OS (aka unmapped)\nMALLOC:   ------------\nMALLOC: =     13701376 (   13.1 MiB) Virtual address space used\nMALLOC:\nMALLOC:            415              Spans in use\nMALLOC:             18              Thread heaps in use\nMALLOC:           4096              Tcmalloc page size\n------------------------------------------------\nCall ReleaseFreeMemory() to release freelist memory to the OS (via madvise()).\nBytes released to the OS take up virtual address space but no physical memory.\n"
        }
    },
    "transactions" : {
        "retriedCommandsCount" : NumberLong(0),
        "retriedStatementsCount" : NumberLong(0),
        "transactionsCollectionWriteCount" : NumberLong(0)
    },
    "transportSecurity" : {
        "1.0" : NumberLong(0),
        "1.1" : NumberLong(0),
        "1.2" : NumberLong(0),
        "1.3" : NumberLong(0),
        "unknown" : NumberLong(0)
    },
    "mem" : {
        "bits" : 64,
        "resident" : 41,
        "virtual" : 836,
        "supported" : true,
        "mapped" : 0
    },
    "metrics" : {
        "commands" : {
            "buildInfo" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(2)
            },
            "count" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(21)
            },
            "createIndexes" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(5656)
            },
            "drop" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(784)
            },
            "dropIndexes" : {
                "failed" : NumberLong(87),
                "total" : NumberLong(87)
            },
            "find" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(944)
            },
            "getLog" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(1)
            },
            "insert" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(246)
            },
            "isMaster" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(38)
            },
            "listCollections" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(1)
            },
            "listIndexes" : {
                "failed" : NumberLong(1),
                "total" : NumberLong(1)
            },
            "replSetGetStatus" : {
                "failed" : NumberLong(1),
                "total" : NumberLong(1)
            },
            "serverStatus" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(2)
            },
            "update" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(40)
            },
            "whatsmyuri" : {
                "failed" : NumberLong(0),
                "total" : NumberLong(1)
            }
        },
        "cursor" : {
            "timedOut" : NumberLong(0),
            "open" : {
                "noTimeout" : NumberLong(0),
                "pinned" : NumberLong(0),
                "total" : NumberLong(0)
            }
        },
        "document" : {
            "deleted" : NumberLong(0),
            "inserted" : NumberLong(246),
            "returned" : NumberLong(398),
            "updated" : NumberLong(40)
        },
        "getLastError" : {
            "wtime" : {
                "num" : 0,
                "totalMillis" : 0
            },
            "wtimeouts" : NumberLong(0)
        },
        "operation" : {
            "scanAndOrder" : NumberLong(0),
            "writeConflicts" : NumberLong(0)
        },
        "query" : {
            "updateOneOpStyleBroadcastWithExactIDCount" : NumberLong(0),
            "upsertReplacementCannotTargetByQueryCount" : NumberLong(0)
        },
        "queryExecutor" : {
            "scanned" : NumberLong(435),
            "scannedObjects" : NumberLong(438)
        },
        "record" : {
            "moves" : NumberLong(0)
        },
        "repl" : {
            "executor" : {
                "pool" : {
                    "inProgressCount" : 0
                },
                "queues" : {
                    "networkInProgress" : 0,
                    "sleepers" : 0
                },
                "unsignaledEvents" : 0,
                "shuttingDown" : false,
                "networkInterface" : "\nNetworkInterfaceASIO Operations' Diagnostic:\nOperation:    Count:   \nConnecting    0        \nIn Progress   0        \nSucceeded     0        \nCanceled      0        \nFailed        0        \nTimed Out     0        \n\n"
            },
            "apply" : {
                "attemptsToBecomeSecondary" : NumberLong(0),
                "batchSize" : NumberLong(0),
                "batches" : {
                    "num" : 0,
                    "totalMillis" : 0
                },
                "ops" : NumberLong(0)
            },
            "buffer" : {
                "count" : NumberLong(0),
                "maxSizeBytes" : NumberLong(0),
                "sizeBytes" : NumberLong(0)
            },
            "initialSync" : {
                "completed" : NumberLong(0),
                "failedAttempts" : NumberLong(0),
                "failures" : NumberLong(0)
            },
            "network" : {
                "bytes" : NumberLong(0),
                "getmores" : {
                    "num" : 0,
                    "totalMillis" : 0
                },
                "ops" : NumberLong(0),
                "readersCreated" : NumberLong(0)
            },
            "preload" : {
                "docs" : {
                    "num" : 0,
                    "totalMillis" : 0
                },
                "indexes" : {
                    "num" : 0,
                    "totalMillis" : 0
                }
            }
        },
        "storage" : {
            "freelist" : {
                "search" : {
                    "bucketExhausted" : NumberLong(0),
                    "requests" : NumberLong(0),
                    "scanned" : NumberLong(0)
                }
            }
        },
        "ttl" : {
            "deletedDocuments" : NumberLong(0),
            "passes" : NumberLong(1)
        }
    },
    "ok" : 1
}

1 个答案:

答案 0 :(得分:-1)

MongoDB卡住了

您需要缩小范围。您是否可以连接到部署中的任何节点?在每个节点上运行{ismaster: 1}?执行任何查询,特别是对其他数据库和集合的查询吗?

使用服务器日志来协助调试。如果服务器出现问题,通常会将问题记录在日志中。

还可以提高日志记录级别以进行调试,以从驱动程序获取其他诊断信息:https://docs.mongodb.com/mongoid/master/tutorials/mongoid-configuration/#logging