聚合mongo数据php

时间:2014-04-14 15:16:52

标签: php mongodb mongodb-php

我收集了113份文件。这是来自大学系的数据。有三种类型的部门:

自然科学系=> “chairtype”=“E”,

人文部门=> “chairtype”=“G”,

广告部门=> “chairtype”=“T”

每个文档都包含具有相同名称的字段:

“mijczjeqeo”

“vmfqvfjptu”

“fwtweaeeba”

...

等等

每月有113人从网络表单填写这些字段。 1人填写1份文件。填写后,我通过php脚本在每个文档中创建值的历史记录。

“mijczjeqeo”值移至“versions.0.content.mijczjeqeo”

“vmfqvfjptu”值移至“versions.0.content.vmfqvfjptu”

“fwtweaeeba”值移至“versions.0.content.fwtweaeeba”

...

等等

并保存最后的值...

所有“versions.0.content。”值都移至“versions.1.content。

所有“versions.1.content。”值都移至“versions.2.content。

所有“versions.2.content。”值都移至“versions.3.content。

...

等等

我需要获得去年按chairtype分组的每个参数的平均值

例如1份文件(“有机化学系”):

{
   "_id": ObjectId("52b85dfa32b6249513f15897"),
   "atkswlntfd": 0,
   "auwbsjqzir": 0,
   "avqrnjzbgd": 0,
   "awquatbduv": 0,
   "axdducvoxb": 2.46,
   "bkoldugcrp": 4,
   "bzccjslewi": 0,
   "cclwyezydc": 0,
   "chairtype": "E",
   "confirmed": "1",
   "covfctuuhi": 0,
   "dingrnyknr": 0,
   "dkfknpzsnt": 0,
   "dqetuhllse": 0,
   "duorlxiqbw": 5,
   "eayoicezsh": 20,
   "esrfffruoy": 0,
   "ewdunlkxue": 1,
   "ewfshjnome": 0,
   "exakqiudxg": 5,
   "fabdcybqxu": 0,
   "fsplxunmaf": 0,
   "fubxmogyam": 1,
   "fuzqrnwsks": 3,
   "fwtweaeeba": 52.31,
   "fybnnlojgb": 5,
   "gdjheqrqcx": 0,
   "gpupstzwjd": 6,
   "gxilphzzcu": 0,
   "hbahrruokf": 0,
   "hbqnleclwp": 2,
   "hchpoxxnwz": 0,
   "hmorfnbfvf": 0,
   "hqatnzynxb": 0,
   "hrqssioxdv": 0,
   "hvscavwupe": 0,
   "hyzlbtnxil": 0,
   "idzxqjoxgd": 0,
   "ikxsvguboy": 0,
   "ipjpwkbqnt": 0,
   "izqighabwk": 9000000,
   "jncncbplme": 3,
   "jxkspszlrc": 1,
   "kekarveuhb": 0,
   "klyoglzriu": 0,
   "kmvuelmdwe": 0,
   "knxzfjwnax": 5,
   "kqfhjboecc": 0,
   "kqhojbwvmo": 0,
   "lchogmhynm": 0,
   "lmuwyeqvph": 7,
   "lvcdbhisbx": 0,
   "mijczjeqeo": 8,
   "mpxzquzcat": 0,
   "mqqoetqued": 0,
   "muktdrzphw": 0,
   "nceszojuvt": 0,
   "nypnjqgxop": 1,
   "ojklibfieg": 2,
   "padotysmxb": 0,
   "parent": "47de3176-bbc3-44e0-8063-8920ac56fdc8",
   "pidwyruvfq": 35.08,
   "pkeymzxsrj": 0,
   "pnjtfvzwiv": 1,
   "pqjnpoxmcx": 0,
   "pyexnkjujx": 38,
   "qfeqdvzssg": 0,
   "qidslfqnwn": 0,
   "qvjszkahdc": 0,
   "qzoriqedoh": 0,
   "rjicuyfsmt": 0,
   "rqenalbuko": 40,
   "rxkwogbxwu": 0,
   "sbqqabqukn": 1,
   "skhgbmucrp": 0,
   "slewjrvgjn": 0,
   "tidjarsatz": 0,
   "title_ru": "Кафедра органической химии",
   "tmbagkmlgb": 0,
   "type": "chair",
   "uploaded": "1",
   "uqcdessbeu": 0,
   "url": "http:\/\/www.herzen.spb.ru\/main\/structure\/fukultets\/him\/1208434887\/",
   "uuid": "a0a39ace-694c-48c5-841d-8b351e5b91da",
   "vacoxpronz": 0,
   "vdjfydjrpa": 13,
   "versions": {
     "0": {
       "_id": ObjectId("52dbbc4cfb0a29ce4a8b45bd"),
       "content": {
         "atkswlntfd": 0,
         "auwbsjqzir": 0,
         "avqrnjzbgd": 0,
         "awquatbduv": 0,
         "axdducvoxb": 3,
         "bkoldugcrp": 4,
         "bzccjslewi": 0,
         "cclwyezydc": 0,
         "confirmed": null,
         "covfctuuhi": 0,
         "dingrnyknr": 0,
         "dkfknpzsnt": 0,
         "dqetuhllse": 2,
         "duorlxiqbw": 5,
         "eayoicezsh": 20,
         "esrfffruoy": 0,
         "ewdunlkxue": 3,
         "ewfshjnome": 0,
         "exakqiudxg": 5,
         "fabdcybqxu": 1,
         "fsplxunmaf": 0,
         "fubxmogyam": 1,
         "fuzqrnwsks": 0,
         "fwtweaeeba": 55,
         "fybnnlojgb": 0,
         "gdjheqrqcx": 0,
         "gpupstzwjd": 6,
         "gxilphzzcu": 0,
         "hbahrruokf": 0,
         "hbqnleclwp": 3,
         "hchpoxxnwz": 1849020,
         "hmorfnbfvf": 2,
         "hqatnzynxb": 0,
         "hrqssioxdv": 0,
         "hvscavwupe": 0,
         "hyzlbtnxil": 0,
         "idzxqjoxgd": 2,
         "ikxsvguboy": 1,
         "ipjpwkbqnt": 0,
         "izqighabwk": 1040000,
         "jncncbplme": 1,
         "jxkspszlrc": 1,
         "kekarveuhb": 0,
         "klyoglzriu": 1,
         "kmvuelmdwe": 0,
         "knxzfjwnax": 5,
         "kqfhjboecc": 0,
         "kqhojbwvmo": 0,
         "lchogmhynm": 2,
         "lmuwyeqvph": 7,
         "lvcdbhisbx": 0,
         "mijczjeqeo": 8,
         "mpxzquzcat": 0,
         "mqqoetqued": 1,
         "muktdrzphw": 0,
         "nceszojuvt": 0,
         "nypnjqgxop": 11,
         "ojklibfieg": 0,
         "padotysmxb": 0,
         "pidwyruvfq": 34,
         "pkeymzxsrj": 0,
         "pnjtfvzwiv": 0,
         "pqjnpoxmcx": 0,
         "pyexnkjujx": 38,
         "qfeqdvzssg": 0,
         "qidslfqnwn": 0,
         "qvjszkahdc": 0,
         "qzoriqedoh": 0,
         "rjicuyfsmt": 0,
         "rqenalbuko": 40,
         "rxkwogbxwu": 0,
         "sbqqabqukn": 1,
         "skhgbmucrp": 0,
         "slewjrvgjn": 0,
         "tidjarsatz": 0,
         "tmbagkmlgb": 1,
         "uqcdessbeu": 0,
         "vacoxpronz": 0,
         "vdjfydjrpa": 11,
         "vktxndqyhm": 0,
         "vmfqvfjptu": 0,
         "vofeebpgsc": 5,
         "wewmtafjvk": 1,
         "wfqdcsrltv": 0,
         "wzmbxalguv": 0,
         "xjllpdyool": 0,
         "xjxbwynytx": 0,
         "xmirihwycl": 0,
         "xxrsnjlmfv": 0,
         "ybgdjpexth": 0,
         "ymkmkuxlrq": 0,
         "yneoycjloj": 0,
         "yogujlfvpb": 0,
         "zahigfmqxb": 0,
         "znqqhqkjte": 0,
         "zyztudtziu": 3 
      },
       "content_hash": "816090f397962f92f5329fa5bb0a9ec1",
       "datetime": ISODate("2014-01-19T11:51:40.590Z"),
       "description_ru": "Значение показателей за этап 1",
       "label_ru": "Окончание этапа 1" 
    },
     "1": {
       "_id": ObjectId("5305372cfb0a2944638b45bc"),
       "content": {
         "atkswlntfd": 0,
         "auwbsjqzir": 0,
         "avqrnjzbgd": 1,
         "awquatbduv": 0,
         "axdducvoxb": 2.46,
         "bkoldugcrp": 4,
         "bzccjslewi": 1,
         "cclwyezydc": 0,
         "confirmed": "1",
         "covfctuuhi": 0,
         "dingrnyknr": 0,
         "dkfknpzsnt": 0,
         "dqetuhllse": 0,
         "duorlxiqbw": 5,
         "eayoicezsh": 20,
         "esrfffruoy": 0,
         "ewdunlkxue": 0,
         "ewfshjnome": 0,
         "exakqiudxg": 5,
         "fabdcybqxu": 1,
         "fsplxunmaf": 0,
         "fubxmogyam": 1,
         "fuzqrnwsks": 0,
         "fwtweaeeba": 52.31,
         "fybnnlojgb": 0,
         "gdjheqrqcx": 0,
         "gpupstzwjd": 6,
         "gxilphzzcu": 0,
         "hbahrruokf": 0,
         "hbqnleclwp": 0,
         "hchpoxxnwz": 0,
         "hmorfnbfvf": 0,
         "hqatnzynxb": 0,
         "hrqssioxdv": 0,
         "hvscavwupe": 0,
         "hyzlbtnxil": 0,
         "idzxqjoxgd": 0,
         "ikxsvguboy": 0,
         "ipjpwkbqnt": 0,
         "izqighabwk": 0,
         "jncncbplme": 0,
         "jxkspszlrc": 1,
         "kekarveuhb": 1,
         "klyoglzriu": 0,
         "kmvuelmdwe": 0,
         "knxzfjwnax": 5,
         "kqfhjboecc": 0,
         "kqhojbwvmo": 0,
         "lchogmhynm": 0,
         "lmuwyeqvph": 7,
         "lvcdbhisbx": 0,
         "mijczjeqeo": 8,
         "mpxzquzcat": 0,
         "mqqoetqued": 0,
         "muktdrzphw": 0,
         "nceszojuvt": 0,
         "nypnjqgxop": 0,
         "ojklibfieg": 1,
         "padotysmxb": 0,
         "pidwyruvfq": 34.15,
         "pkeymzxsrj": 0,
         "pnjtfvzwiv": 0,
         "pqjnpoxmcx": 0,
         "pyexnkjujx": 38,
         "qfeqdvzssg": 0,
         "qidslfqnwn": 0,
         "qvjszkahdc": 0,
         "qzoriqedoh": 0,
         "rjicuyfsmt": 0,
         "rqenalbuko": 40,
         "rxkwogbxwu": 0,
         "sbqqabqukn": 1,
         "skhgbmucrp": 0,
         "slewjrvgjn": 0,
         "tidjarsatz": 0,
         "tmbagkmlgb": 0,
         "uqcdessbeu": 0,
         "vacoxpronz": 0,
         "vdjfydjrpa": 11,
         "visited": null,
         "vktxndqyhm": 0,
         "vmfqvfjptu": 0,
         "vofeebpgsc": 4.77,
         "wewmtafjvk": 0,
         "wfqdcsrltv": 0,
         "wzmbxalguv": 0,
         "xjllpdyool": 0,
         "xjxbwynytx": 0,
         "xmirihwycl": 0,
         "xxrsnjlmfv": 0,
         "ybgdjpexth": 0,
         "ymkmkuxlrq": 0,
         "yneoycjloj": 0,
         "yogujlfvpb": 0,
         "zahigfmqxb": 0,
         "znqqhqkjte": 0,
         "zyztudtziu": 3.23 
      },
       "content_hash": "d273fb095a7c08fef69fb90ec316fcb9",
       "datetime": ISODate("2014-02-19T22:58:52.805Z"),
       "description_ru": "Значение показателей за этап 2",
       "label_ru": "Окончание этапа 2" 
    },
     "2": {
       "_id": ObjectId("532854a3fb0a2973718b45c6"),
       "content": {
         "atkswlntfd": 0,
         "auwbsjqzir": 0,
         "avqrnjzbgd": 2,
         "awquatbduv": 0,
         "axdducvoxb": 2.46,
         "bkoldugcrp": 4,
         "bzccjslewi": 0,
         "cclwyezydc": 0,
         "confirmed": "1",
         "covfctuuhi": 0,
         "dingrnyknr": 0,
         "dkfknpzsnt": 0,
         "dqetuhllse": 0,
         "duorlxiqbw": 5,
         "eayoicezsh": 20,
         "esrfffruoy": 0,
         "ewdunlkxue": 1,
         "ewfshjnome": 0,
         "exakqiudxg": 5,
         "fabdcybqxu": 1,
         "fsplxunmaf": 0,
         "fubxmogyam": 1,
         "fuzqrnwsks": 0,
         "fwtweaeeba": 52.15,
         "fybnnlojgb": 5,
         "gdjheqrqcx": 0,
         "gpupstzwjd": 6,
         "gxilphzzcu": 0,
         "hbahrruokf": 0,
         "hbqnleclwp": 2,
         "hchpoxxnwz": 0,
         "hmorfnbfvf": 0,
         "hqatnzynxb": 0,
         "hrqssioxdv": 0,
         "hvscavwupe": 0,
         "hyzlbtnxil": 0,
         "idzxqjoxgd": 0,
         "ikxsvguboy": 0,
         "ipjpwkbqnt": 0,
         "izqighabwk": 0,
         "jncncbplme": 0,
         "jxkspszlrc": 1,
         "kekarveuhb": 0,
         "klyoglzriu": 0,
         "kmvuelmdwe": 0,
         "knxzfjwnax": 5,
         "kqfhjboecc": 0,
         "kqhojbwvmo": 0,
         "lchogmhynm": 0,
         "lmuwyeqvph": 7,
         "lvcdbhisbx": 0,
         "mijczjeqeo": 8,
         "mpxzquzcat": 0,
         "mqqoetqued": 0,
         "muktdrzphw": 0,
         "nceszojuvt": 0,
         "nypnjqgxop": 0,
         "ojklibfieg": 1,
         "padotysmxb": 0,
         "pidwyruvfq": 34.62,
         "pkeymzxsrj": 0,
         "pnjtfvzwiv": 0,
         "pqjnpoxmcx": 0,
         "pyexnkjujx": 38,
         "qfeqdvzssg": 0,
         "qidslfqnwn": 0,
         "qvjszkahdc": 0,
         "qzoriqedoh": 0,
         "rjicuyfsmt": 2,
         "rqenalbuko": 40,
         "rxkwogbxwu": 0,
         "sbqqabqukn": 1,
         "skhgbmucrp": 0,
         "slewjrvgjn": 0,
         "tidjarsatz": 0,
         "tmbagkmlgb": 0,
         "uploaded": null,
         "uqcdessbeu": 0,
         "vacoxpronz": 0,
         "vdjfydjrpa": 11,
         "visited": true,
         "vktxndqyhm": 0,
         "vmfqvfjptu": 0,
         "vofeebpgsc": 4.77,
         "wewmtafjvk": 0,
         "wfqdcsrltv": 0,
         "wzmbxalguv": 0,
         "xjllpdyool": 1,
         "xjxbwynytx": 0,
         "xmirihwycl": 0,
         "xxrsnjlmfv": 0,
         "ybgdjpexth": 0,
         "ymkmkuxlrq": 0,
         "yneoycjloj": 2,
         "yogujlfvpb": 0,
         "zahigfmqxb": 0,
         "znqqhqkjte": 0,
         "zyztudtziu": 3.23 
      },
       "content_hash": "64adcf5534b5b1f77282a95f0b14ef99",
       "datetime": ISODate("2014-03-18T14:13:55.593Z"),
       "description_ru": "Значение показателей за этап 3",
       "document_id": "52b85dfa32b6249513f15897",
       "document_uuid": "a0a39ace-694c-48c5-841d-8b351e5b91da",
       "label_ru": "Окончание этапа 3" 
    } 
  },
   "visited": true,
   "vktxndqyhm": 0,
   "vmfqvfjptu": 0,
   "vofeebpgsc": 4.77,
   "wewmtafjvk": 1,
   "wfqdcsrltv": 0,
   "wzmbxalguv": 0,
   "xjllpdyool": 1,
   "xjxbwynytx": 0,
   "xmirihwycl": 0,
   "xxrsnjlmfv": 0,
   "ybgdjpexth": 0,
   "ymkmkuxlrq": 0,
   "yneoycjloj": 0,
   "yogujlfvpb": 0,
   "zahigfmqxb": 0,
   "znqqhqkjte": 0,
   "zyztudtziu": 3.23 
}

现在最早有数据历史的数组是“versions.2。”,但2周后它将是“versions.3”。 6周后它将成为“版本4”。等等...

    **this is average of "versions.0.vofeebpgsc" values grouped by "chairtype":**
    array(2) {
      ["result"]=>
      array(3) {
        [0]=>
        array(2) {
          ["_id"]=>
          string(1) "E"
          ["MID"]=>
          float(1.3903333333333)
        }
        [1]=>
        array(2) {
          ["_id"]=>
          string(1) "T"
          ["MID"]=>
          float(0.4)
        }
        [2]=>
        array(2) {
          ["_id"]=>
          string(1) "G"
          ["MID"]=>
          float(0.72931034482759)
        }
      }
      ["ok"]=>
      float(1)
    }


function getMiddle($itemName,$chairType){ 
//return average of "versions.0.$itemName" value for "chairtype" = $chairType
  switch ($chairType){
    case 'E':
    $chairType = 0;
  break;
    case 'T':
    $chairType = 1;
  break;
    case 'G':
    $chairType = 2;
  break;   
  }
  $m = new MongoClient();
  $db = $m->foo_data;
  $collection = new MongoCollection($db, 'documents');

  $thisField = '$versions.content.'.$itemName;

      $out = $collection->aggregate(
          array('$match' => array('type' => 'chair')
          ),
          array('$unwind' => '$versions'),
          array('$group' => array(      '_id' => '$_id',
                      'chairtype' => array('$first' =>'$chairtype'),
                      'versions'  => array('$first' => '$versions') 
          )),
          array('$group'=> array( '_id'=>'$chairtype',
                      'MID'=> array('$avg'=> $thisField)
          ))         
      );
return round ($out['result'][$chairType]['MID'],2);
}
echo getMiddle('vofeebpgsc','G'); //I use it in foreach cycle and send different input values

我需要在“chairtype”分组的每个文档中获取“versions.0.param_name”和“versions.11.param_name”之间每个参数的平均值(它将是去年参数历史记录的平均值)对于每种类型的大学部门) 我得到了这个任务,因为“versions.0.param_name”的平均值不是很好,反映了统计数据。

例如......我有60个带有“chairtype”=“E”的文件,现在版本中有'0','1'和'2'历史数组

我需要计算:

“versions.0.content.fwtweaeeba”+

“versions.1.content.fwtweaeeba”+

“versions.2.content.fwtweaeeba”+

每个59个文件的相同字段

)/(60 * 3)=“chairtype”=“E”的“fwtweaeeba”参数的平均值很好

9个月后,我将有60个文件“chairtype”=“E”,我将有'0','1','2','3','4','5','6' ,'7',8','9','10','11',版本中的历史数组

我需要计算:

“versions.0.content.fwtweaeeba”+

“versions.1.content.fwtweaeeba”+

“versions.2.content.fwtweaeeba”+

“versions.3.content.fwtweaeeba”+

“versions.4.content.fwtweaeeba”+

“versions.5.content.fwtweaeeba”+

“versions.6.content.fwtweaeeba”+

“versions.7.content.fwtweaeeba”+

“versions.8.content.fwtweaeeba”+

“versions.9.content.fwtweaeeba”+

“versions.10.content.fwtweaeeba”+

“versions.11.content.fwtweaeeba” +

59个文档中每个文档的相同字段

)/(60 * 12)=“chairtype”=“E”的“fwtweaeeba”参数的平均值是好的

每个“chairtype”的

等等

我可以使用mongo db聚合框架吗? 我该怎么做? 有人建议在MongoDB中编译困难的聚合查询吗?

1 个答案:

答案 0 :(得分:1)

这里的数据结构不是一个好的实现,它的结构有很多问题,完全不适合聚合。这里的主要问题是:

  • 您的结构实际上并不使用任何数组,现在它不是

  • 所有特定的密钥名称都是一个真正的问题,这是可以避免的。

因此,遍历此类结构的唯一方法是使用带mapReduce的JavaScript。

定义映射器:

var mapper = function () {

  for ( var n in this.versions ) {
    for ( var k in this.versions[n].content ) {
      if (
        ( k != 'confirmed' ) ||
        ( k != 'visited' ) )
          emit(
            {
              type: this.chairtype,
              key: k
            },
            this.versions[n].content[k]
          );
    }
  }

};

所以这样做是循环每个版本条目,然后是内容上的所有内容。为您想要的每个内容键以及“chairtype”键发出密钥。而值就是匹配值。

然后是减速器:

var reducer = function (key,values) {

    return ( Array.sum( values ) != 0 ) 
        ? Array.sum( values ) / values.length : 0;

};

这只是一种简单的方法,可以使用相同的密钥为映射器的所有值生成平均值。

所以虽然这应该很好,但你应该做的是改变你的结构。事实上,如果你有这样的事情:

{
    "_id": ObjectId("52b85dfa32b6249513f15897"),
    "parent": "47de3176-bbc3-44e0-8063-8920ac56fdc8",
    "type": "chair",
    "chairtype": "E",
    "content": [
        { "key": "atkswlntfd", "value": 0, "version": 0 },
        { "key": "auwbsjqzir", "value": 0, "version": 0 },
        { "key": "avqrnjzbgd", "value": 0, "version": 0 }
    ]
}

或者通常或多或少地采用这种形式,聚合操作变得非常简单:

db.collection.aggregate([
    { "$unwind": "$content" },
    { "$group": {
       "_id": {
           "chairtype": "$chairtype",
           "key": "$content.key"
       },
       "average": { "$avg": "$content.value" }
    }}
])

或者需要其他任何变体,但现在可以通过改变结构来实现。

因此,如果文档结构不同,则需要使用mapReduce来执行此操作。