我认为我的Go服务器正在杀死我的Mongo服务器

时间:2016-11-09 16:17:25

标签: mongodb go mgo

总而言之,我在3个AWS区域拥有许多服务器。为了减少用Node编写的服务器,我在Go中重写了它们。让我高兴和惊喜1 Go服务器可以处理10个节点服务器处理的CPU利用率更低和ELB延迟更少。这太棒了。但是,我想也许他们太快了哈哈。我们有一个(是的,我知道非常糟糕)用于记录数据的MongoDB服务器。每个请求都以特定方式记录并发送到此服务器。由于只添加了2台Go服务器,MongoDB服务器已经开始崩溃。

TRACEBACK

2016-11-09T03:06:41.240-0500 I JOURNAL  [journal writer] warning couldn't write to / rename file /data/data/db/journal/prealloc.2: Couldn't open directory '/data/data/db/journal' for flushing: errno:24 Too many open files
2016-11-09T03:06:41.240-0500 I JOURNAL  [journal writer] warning exception opening journal file couldn't open file /data/data/db/journal/j._213 for writing errno:9 Bad file descriptor
2016-11-09T03:06:41.240-0500 I JOURNAL  [journal writer] error exception in dur::journal couldn't open file /data/data/db/journal/j._213 for writing errno:9 Bad file descriptor
2016-11-09T03:06:41.242-0500 F JOURNAL  [journal writer] dbexception in journalWriterThread causing immediate shutdown: 13516 couldn't open file /data/data/db/journal/j._213 for writing errno:9 Bad file descriptor
2016-11-09T03:06:41.242-0500 I -        [journal writer] Invariant failure false src/mongo/db/storage/mmap_v1/dur_journal_writer.cpp 258
2016-11-09T03:06:41.246-0500 I JOURNAL  [durability] warning couldn't write to / rename file /data/data/db/journal/prealloc.2: couldn't open file /data/data/db/journal/prealloc.2 for writing errno:9 Bad file descriptor
2016-11-09T03:06:41.246-0500 F JOURNAL  [durability] dbexception in durThread causing immediate shutdown: 13516 couldn't open file /data/data/db/journal/j._213 for writing errno:9 Bad file descriptor
2016-11-09T03:06:41.246-0500 I -        [durability] Invariant failure false src/mongo/db/storage/mmap_v1/dur.cpp 862
2016-11-09T03:06:41.246-0500 I CONTROL  [journal writer]
 0xf51949 0xef1671 0xed6192 0xd17613 0xf9f9b4 0x7fd09697d184 0x7fd09544337d
----- BEGIN BACKTRACE -----
{"backtrace":[{"b":"400000","o":"B51949"},{"b":"400000","o":"AF1671"},{"b":"400000","o":"AD6192"},{"b":"400000","o":"917613"},{"b":"400000","o":"B9F9B4"},{"b":"7FD096975000","o":"8184"},{"b":"7FD095349000","o":"FA37D"}],"processInfo":{ "mongodbVersion" : "3.0.3", "gitVersion" : "b40106b36eecd1b4407eb1ad1af6bc60593c6105", "uname" : { "sysname" : "Linux", "release" : "3.13.0-54-generic", "version" : "#91-Ubuntu SMP Tue May 26 19:15:08 UTC 2015", "machine" : "x86_64" }, "somap" : [ { "elfType" : 2, "b" : "400000", "buildId" : "F56F80CB96B4DBFC070BEB0ADAC7D6B274BFC6B1" }, { "b" : "7FFF14FD0000", "elfType" : 3, "buildId" : "1C0D0A18FF043EED9EE11DB5E5E90A3F74729341" }, { "b" : "7FD096975000", "path" : "/lib/x86_64-linux-gnu/libpthread.so.0", "elfType" : 3, "buildId" : "31E9F21AE8C10396171F1E13DA15780986FA696C" }, { "b" : "7FD096716000", "path" : "/lib/x86_64-linux-gnu/libssl.so.1.0.0", "elfType" : 3, "buildId" : "74864DB9D5F69D39A67E4755012FB6573C469B3D" }, { "b" : "7FD09633A000", "path" : "/lib/x86_64-linux-gnu/libcrypto.so.1.0.0", "elfType" : 3, "buildId" : "AAE7CFF8351B730830BDBCE0DCABBE06574B7144" }, { "b" : "7FD096132000", "path" : "/lib/x86_64-linux-gnu/librt.so.1", "elfType" : 3, "buildId" : "E2A6DD5048A0A051FD61043BDB69D8CC68192AB7" }, { "b" : "7FD095F2E000", "path" : "/lib/x86_64-linux-gnu/libdl.so.2", "elfType" : 3, "buildId" : "DA9B8C234D0FE9FD8CAAC8970A7EC1B6C8F6623F" }, { "b" : "7FD095C2A000", "path" : "/usr/lib/x86_64-linux-gnu/libstdc++.so.6", "elfType" : 3, "buildId" : "76190E922AF7457D078F75C9B15FA184E83EB506" }, { "b" : "7FD095924000", "path" : "/lib/x86_64-linux-gnu/libm.so.6", "elfType" : 3, "buildId" : "D144258E614900B255A31F3FD2283A878670D5BC" }, { "b" : "7FD09570E000", "path" : "/lib/x86_64-linux-gnu/libgcc_s.so.1", "elfType" : 3, "buildId" : "36311B4457710AE5578C4BF00791DED7359DBB92" }, { "b" : "7FD095349000", "path" : "/lib/x86_64-linux-gnu/libc.so.6", "elfType" : 3, "buildId" : "CF699A15CAAE64F50311FC4655B86DC39A479789" }, { "b" : "7FD096B93000", "path" : "/lib64/ld-linux-x86-64.so.2", "elfType" : 3, "buildId" : "D0F537904076D73F29E4A37341F8A449E2EF6CD0" } ] }}
 mongod(_ZN5mongo15printStackTraceERSo+0x29) [0xf51949]
 mongod(_ZN5mongo10logContextEPKc+0xE1) [0xef1671]
 mongod(_ZN5mongo15invariantFailedEPKcS1_j+0xB2) [0xed6192]
 mongod(_ZN5mongo3dur13JournalWriter20_journalWriterThreadEv+0x953) [0xd17613]
 mongod(+0xB9F9B4) [0xf9f9b4]
 libpthread.so.0(+0x8184) [0x7fd09697d184]
 libc.so.6(clone+0x6D) [0x7fd09544337d]

为了解决这个问题,我增加了该机器的ulimit。但是我认为也可以从程序方面做些事情。

这是我的mongo包

package mongo

import (
    "time"

    "gopkg.in/mgo.v2"
)

var (
    // TagDB ...
    TagDB DataStore
    // LogDB ...
    LogDB DataStore
)

func init() {
    TagDB.ConnectToTagserver()
    LogDB.ConnectToLogServer()
}

// DataStore containing a pointer to a mgo session
type DataStore struct {
    Session *mgo.Session
}

// ConnectToTagserver is a helper method that connections to pubgears' tagserver
// database
func (ds *DataStore) ConnectToTagserver() {
    mongoDBDialInfo := &mgo.DialInfo{
        Addrs:    []string{"some_IP"},
        Timeout:  60 * time.Second,
        Database: "tagserver",
    }
    sess, err := mgo.DialWithInfo(mongoDBDialInfo)
    if err != nil {
        panic(err)
    }
    sess.SetMode(mgo.Monotonic, true)
    TagDB.Session = sess
}


// database SERVER IN QUESTION
func (ds *DataStore) ConnectToLogServer() {
    mongoDBDialInfo := &mgo.DialInfo{
        Addrs:    []string{"some_IP"}, 
        Timeout:  60 * time.Second,
        Database: "nginx_logs",
    }
    sess, err := mgo.DialWithInfo(mongoDBDialInfo)
    if err != nil {
        println(1)
        panic(err)
    }
    sess.SetMode(mgo.Monotonic, true)
    LogDB.Session = sess
}

// Close is a helper method that ensures the session is properly terminated
func (ds *DataStore) Close() {
    ds.Session.Close()
}

以下是使用该软件包的文件

package models

import (
    "errors"
    "fmt"
    "strconv"
    "time"

    "gopkg.in/mgo.v2/bson"

    "./mongo"
)

// RawRequests ...
type RawRequests struct {
    TagServer   string            `json:"tag_server"`
    Server      string            `json:"server"`
    Slug        string            `json:"slug"`
    Zone        string            `json:"zone"`
    Size        string            `json:"size"`
    Network     string            `json:"network"`
    TagHash     string            `json:"tag_hash"`
    Extra       string            `json:"extra"`
    Logged      time.Time         `json:"logged"`
    Date        string            `json:"date"`
    Hour        int               `json:"hour"`
    QueryParams map[string]string `json:"query_params"`
}

// RawTagRequests ...
type RawTagRequests struct {
    TagServer   string            `json:"tag_server"`
    Server      string            `json:"server"`
    Slug        string            `json:"slug"`
    Zone        string            `json:"zone"`
    Size        string            `json:"size"`
    Network     string            `json:"network"`
    TagHash     string            `json:"tag_hash"`
    Extra       string            `json:"extra"`
    Logged      time.Time         `json:"logged"`
    Date        string            `json:"date"`
    Hour        int               `json:"hour"`
    QueryParams map[string]string `json:"query_params"`
    ChainNext   string            `json:"chain_next"`
}

// LogRequest ...
func LogRequest(tagServer string, server string, slug string, zone string, size string, network string, extra string, tagHash string, queryParams map[string]string) error {
    dbsession := mongo.LogDB.Session.Copy()
    defer dbsession.Close()
    logCollection := dbsession.DB("nginx_logs").C("raw_requests")
    d := time.Now()
    hour, _, _ := d.Clock()
    year, month, day, _ := formatDate(d.Year(), int(d.Month()), d.Day())
    date := fmt.Sprintf("%s-%s-%s", year, month, day)
    return logCollection.Insert(bson.M{"tag_server": tagServer, "server": server, "slug": slug, "zone": zone, "size": size, "network": network, "tag_hash": tagHash, "extra": extra, "logged": time.Now(), "date": date, "hour": hour, "query_params": queryParams})
}

// LogTagRequests ...
func LogTagRequests(tagServer string, server string, slug string, zone string, size string, network string, extra string, tagHash string, queryParams map[string]string, chainHash string) error {
    dbsession := mongo.LogDB.Session.Copy()
    defer dbsession.Close()
    logCollection := dbsession.DB("nginx_logs").C("raw_tag_requests")
    d := time.Now()
    hour, _, _ := d.Clock()
    year, month, day, _ := formatDate(d.Year(), int(d.Month()), d.Day())
    date := fmt.Sprintf("%s-%s-%s", year, month, day)
    return logCollection.Insert(bson.M{"tag_server": tagServer, "server": server, "slug": slug, "zone": zone, "size": size, "network": network, "tag_hash": tagHash, "extra": extra, "logged": time.Now(), "date": date, "hour": hour, "query_params": queryParams, "chain_hash": chainHash})
}

func formatDate(year int, month int, day int) (y string, m string, d string, err error) {
    var tmonth, tday, tyear string
    tyear = strconv.Itoa(year)
    // convert the month and year and replace with the correct number of digits
    if month < 10 {
        tmonth = "0" + strconv.Itoa(int(month))
    } else {
        tmonth = strconv.Itoa(int(month))
    }
    if day < 10 {
        tday = "0" + strconv.Itoa(int(day))
    } else {
        tday = strconv.Itoa(int(day))
    }
    if len(tmonth) == 2 && len(tday) == 2 && len(tyear) == 4 {
        return tyear, tmonth, tday, nil
    }
    return tyear, tmonth, tday, errors.New("One of the values passed in does not meet the time format requirement month-day-year --> 1991-01-15")
}

最后在我的路线中,我只是调用我的方法go LogRequest(tagServer, host, site, channel, adSize, network, tagExtra, tagHash, queryParams)

1 个答案:

答案 0 :(得分:1)

  1. 每个请求 - 响应周期只应使用一次副本。在每次调用模型时使用它将导致消耗大量套接字。每个模型调用都在打开套接字并在退出例程时关闭它(使用延迟)。尝试在同一请求 - 响应周期中对数据库的多次调用重用复制的会话。
  2. 从日志和代码片段看,您应该注意1,除非在每个请求 - 响应周期中,您只调用模型例程执行CRUD一次。
  3. 复制使用新套接字打开连接,保留来自父会话的auth凭证,而clone使用与父会话相同的套接字(使用克隆会话时,其中一个父/子将必须在套接字上等待,直到另一个完成它)。因此,要并行处理对服务器的多个请求,最好使用每个请求处理程序的复制。(https://godoc.org/gopkg.in/mgo.v2#Session.Clone
  4. 在机器上增加ulimit后,请确保立即反映 - http://lzone.de/blog/Apply-changes-to-limits.conf-immediately