我收集了113份文件。这是来自大学系的数据。有三种类型的部门:
自然科学系=> “chairtype”=“E”,
人文部门=> “chairtype”=“G”,
广告部门=> “chairtype”=“T”
每个文档都包含具有相同名称的字段:
“mijczjeqeo”
“vmfqvfjptu”
“fwtweaeeba”
...
等等
每月有113人从网络表单填写这些字段。 1人填写1份文件。填写后,我通过php脚本在每个文档中创建值的历史记录。
“mijczjeqeo”值移至“versions.0.content.mijczjeqeo”
“vmfqvfjptu”值移至“versions.0.content.vmfqvfjptu”
“fwtweaeeba”值移至“versions.0.content.fwtweaeeba”
...
等等
并保存最后的值...
所有“versions.0.content。”值都移至“versions.1.content。”
所有“versions.1.content。”值都移至“versions.2.content。”
所有“versions.2.content。”值都移至“versions.3.content。”
...
等等
我需要获得去年按chairtype分组的每个参数的平均值
例如1份文件(“有机化学系”):
{
"_id": ObjectId("52b85dfa32b6249513f15897"),
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 0,
"awquatbduv": 0,
"axdducvoxb": 2.46,
"bkoldugcrp": 4,
"bzccjslewi": 0,
"cclwyezydc": 0,
"chairtype": "E",
"confirmed": "1",
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 0,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 1,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 0,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 3,
"fwtweaeeba": 52.31,
"fybnnlojgb": 5,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 2,
"hchpoxxnwz": 0,
"hmorfnbfvf": 0,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 0,
"ikxsvguboy": 0,
"ipjpwkbqnt": 0,
"izqighabwk": 9000000,
"jncncbplme": 3,
"jxkspszlrc": 1,
"kekarveuhb": 0,
"klyoglzriu": 0,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 0,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 0,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 1,
"ojklibfieg": 2,
"padotysmxb": 0,
"parent": "47de3176-bbc3-44e0-8063-8920ac56fdc8",
"pidwyruvfq": 35.08,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 1,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 0,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"title_ru": "Кафедра органической химии",
"tmbagkmlgb": 0,
"type": "chair",
"uploaded": "1",
"uqcdessbeu": 0,
"url": "http:\/\/www.herzen.spb.ru\/main\/structure\/fukultets\/him\/1208434887\/",
"uuid": "a0a39ace-694c-48c5-841d-8b351e5b91da",
"vacoxpronz": 0,
"vdjfydjrpa": 13,
"versions": {
"0": {
"_id": ObjectId("52dbbc4cfb0a29ce4a8b45bd"),
"content": {
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 0,
"awquatbduv": 0,
"axdducvoxb": 3,
"bkoldugcrp": 4,
"bzccjslewi": 0,
"cclwyezydc": 0,
"confirmed": null,
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 2,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 3,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 1,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 0,
"fwtweaeeba": 55,
"fybnnlojgb": 0,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 3,
"hchpoxxnwz": 1849020,
"hmorfnbfvf": 2,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 2,
"ikxsvguboy": 1,
"ipjpwkbqnt": 0,
"izqighabwk": 1040000,
"jncncbplme": 1,
"jxkspszlrc": 1,
"kekarveuhb": 0,
"klyoglzriu": 1,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 2,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 1,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 11,
"ojklibfieg": 0,
"padotysmxb": 0,
"pidwyruvfq": 34,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 0,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 0,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"tmbagkmlgb": 1,
"uqcdessbeu": 0,
"vacoxpronz": 0,
"vdjfydjrpa": 11,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 5,
"wewmtafjvk": 1,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 0,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 0,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3
},
"content_hash": "816090f397962f92f5329fa5bb0a9ec1",
"datetime": ISODate("2014-01-19T11:51:40.590Z"),
"description_ru": "Значение показателей за этап 1",
"label_ru": "Окончание этапа 1"
},
"1": {
"_id": ObjectId("5305372cfb0a2944638b45bc"),
"content": {
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 1,
"awquatbduv": 0,
"axdducvoxb": 2.46,
"bkoldugcrp": 4,
"bzccjslewi": 1,
"cclwyezydc": 0,
"confirmed": "1",
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 0,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 0,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 1,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 0,
"fwtweaeeba": 52.31,
"fybnnlojgb": 0,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 0,
"hchpoxxnwz": 0,
"hmorfnbfvf": 0,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 0,
"ikxsvguboy": 0,
"ipjpwkbqnt": 0,
"izqighabwk": 0,
"jncncbplme": 0,
"jxkspszlrc": 1,
"kekarveuhb": 1,
"klyoglzriu": 0,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 0,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 0,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 0,
"ojklibfieg": 1,
"padotysmxb": 0,
"pidwyruvfq": 34.15,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 0,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 0,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"tmbagkmlgb": 0,
"uqcdessbeu": 0,
"vacoxpronz": 0,
"vdjfydjrpa": 11,
"visited": null,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 4.77,
"wewmtafjvk": 0,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 0,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 0,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3.23
},
"content_hash": "d273fb095a7c08fef69fb90ec316fcb9",
"datetime": ISODate("2014-02-19T22:58:52.805Z"),
"description_ru": "Значение показателей за этап 2",
"label_ru": "Окончание этапа 2"
},
"2": {
"_id": ObjectId("532854a3fb0a2973718b45c6"),
"content": {
"atkswlntfd": 0,
"auwbsjqzir": 0,
"avqrnjzbgd": 2,
"awquatbduv": 0,
"axdducvoxb": 2.46,
"bkoldugcrp": 4,
"bzccjslewi": 0,
"cclwyezydc": 0,
"confirmed": "1",
"covfctuuhi": 0,
"dingrnyknr": 0,
"dkfknpzsnt": 0,
"dqetuhllse": 0,
"duorlxiqbw": 5,
"eayoicezsh": 20,
"esrfffruoy": 0,
"ewdunlkxue": 1,
"ewfshjnome": 0,
"exakqiudxg": 5,
"fabdcybqxu": 1,
"fsplxunmaf": 0,
"fubxmogyam": 1,
"fuzqrnwsks": 0,
"fwtweaeeba": 52.15,
"fybnnlojgb": 5,
"gdjheqrqcx": 0,
"gpupstzwjd": 6,
"gxilphzzcu": 0,
"hbahrruokf": 0,
"hbqnleclwp": 2,
"hchpoxxnwz": 0,
"hmorfnbfvf": 0,
"hqatnzynxb": 0,
"hrqssioxdv": 0,
"hvscavwupe": 0,
"hyzlbtnxil": 0,
"idzxqjoxgd": 0,
"ikxsvguboy": 0,
"ipjpwkbqnt": 0,
"izqighabwk": 0,
"jncncbplme": 0,
"jxkspszlrc": 1,
"kekarveuhb": 0,
"klyoglzriu": 0,
"kmvuelmdwe": 0,
"knxzfjwnax": 5,
"kqfhjboecc": 0,
"kqhojbwvmo": 0,
"lchogmhynm": 0,
"lmuwyeqvph": 7,
"lvcdbhisbx": 0,
"mijczjeqeo": 8,
"mpxzquzcat": 0,
"mqqoetqued": 0,
"muktdrzphw": 0,
"nceszojuvt": 0,
"nypnjqgxop": 0,
"ojklibfieg": 1,
"padotysmxb": 0,
"pidwyruvfq": 34.62,
"pkeymzxsrj": 0,
"pnjtfvzwiv": 0,
"pqjnpoxmcx": 0,
"pyexnkjujx": 38,
"qfeqdvzssg": 0,
"qidslfqnwn": 0,
"qvjszkahdc": 0,
"qzoriqedoh": 0,
"rjicuyfsmt": 2,
"rqenalbuko": 40,
"rxkwogbxwu": 0,
"sbqqabqukn": 1,
"skhgbmucrp": 0,
"slewjrvgjn": 0,
"tidjarsatz": 0,
"tmbagkmlgb": 0,
"uploaded": null,
"uqcdessbeu": 0,
"vacoxpronz": 0,
"vdjfydjrpa": 11,
"visited": true,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 4.77,
"wewmtafjvk": 0,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 1,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 2,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3.23
},
"content_hash": "64adcf5534b5b1f77282a95f0b14ef99",
"datetime": ISODate("2014-03-18T14:13:55.593Z"),
"description_ru": "Значение показателей за этап 3",
"document_id": "52b85dfa32b6249513f15897",
"document_uuid": "a0a39ace-694c-48c5-841d-8b351e5b91da",
"label_ru": "Окончание этапа 3"
}
},
"visited": true,
"vktxndqyhm": 0,
"vmfqvfjptu": 0,
"vofeebpgsc": 4.77,
"wewmtafjvk": 1,
"wfqdcsrltv": 0,
"wzmbxalguv": 0,
"xjllpdyool": 1,
"xjxbwynytx": 0,
"xmirihwycl": 0,
"xxrsnjlmfv": 0,
"ybgdjpexth": 0,
"ymkmkuxlrq": 0,
"yneoycjloj": 0,
"yogujlfvpb": 0,
"zahigfmqxb": 0,
"znqqhqkjte": 0,
"zyztudtziu": 3.23
}
现在最早有数据历史的数组是“versions.2。”,但2周后它将是“versions.3”。 6周后它将成为“版本4”。等等...
**this is average of "versions.0.vofeebpgsc" values grouped by "chairtype":**
array(2) {
["result"]=>
array(3) {
[0]=>
array(2) {
["_id"]=>
string(1) "E"
["MID"]=>
float(1.3903333333333)
}
[1]=>
array(2) {
["_id"]=>
string(1) "T"
["MID"]=>
float(0.4)
}
[2]=>
array(2) {
["_id"]=>
string(1) "G"
["MID"]=>
float(0.72931034482759)
}
}
["ok"]=>
float(1)
}
function getMiddle($itemName,$chairType){
//return average of "versions.0.$itemName" value for "chairtype" = $chairType
switch ($chairType){
case 'E':
$chairType = 0;
break;
case 'T':
$chairType = 1;
break;
case 'G':
$chairType = 2;
break;
}
$m = new MongoClient();
$db = $m->foo_data;
$collection = new MongoCollection($db, 'documents');
$thisField = '$versions.content.'.$itemName;
$out = $collection->aggregate(
array('$match' => array('type' => 'chair')
),
array('$unwind' => '$versions'),
array('$group' => array( '_id' => '$_id',
'chairtype' => array('$first' =>'$chairtype'),
'versions' => array('$first' => '$versions')
)),
array('$group'=> array( '_id'=>'$chairtype',
'MID'=> array('$avg'=> $thisField)
))
);
return round ($out['result'][$chairType]['MID'],2);
}
echo getMiddle('vofeebpgsc','G'); //I use it in foreach cycle and send different input values
我需要在“chairtype”分组的每个文档中获取“versions.0.param_name”和“versions.11.param_name”之间每个参数的平均值(它将是去年参数历史记录的平均值)对于每种类型的大学部门) 我得到了这个任务,因为“versions.0.param_name”的平均值不是很好,反映了统计数据。
例如......我有60个带有“chairtype”=“E”的文件,现在版本中有'0','1'和'2'历史数组
我需要计算:
(
“versions.0.content.fwtweaeeba”+
“versions.1.content.fwtweaeeba”+
“versions.2.content.fwtweaeeba”+
每个59个文件的相同字段
)/(60 * 3)=“chairtype”=“E”的“fwtweaeeba”参数的平均值很好
9个月后,我将有60个文件“chairtype”=“E”,我将有'0','1','2','3','4','5','6' ,'7',8','9','10','11',版本中的历史数组
我需要计算:
(
“versions.0.content.fwtweaeeba”+
“versions.1.content.fwtweaeeba”+
“versions.2.content.fwtweaeeba”+
“versions.3.content.fwtweaeeba”+
“versions.4.content.fwtweaeeba”+
“versions.5.content.fwtweaeeba”+
“versions.6.content.fwtweaeeba”+
“versions.7.content.fwtweaeeba”+
“versions.8.content.fwtweaeeba”+
“versions.9.content.fwtweaeeba”+
“versions.10.content.fwtweaeeba”+
“versions.11.content.fwtweaeeba” +
59个文档中每个文档的相同字段
)/(60 * 12)=“chairtype”=“E”的“fwtweaeeba”参数的平均值是好的
每个“chairtype”的等等
我可以使用mongo db聚合框架吗? 我该怎么做? 有人建议在MongoDB中编译困难的聚合查询吗?
答案 0 :(得分:1)
这里的数据结构不是一个好的实现,它的结构有很多问题,完全不适合聚合。这里的主要问题是:
您的结构实际上并不使用任何数组,现在它不是
所有特定的密钥名称都是一个真正的问题,这是可以避免的。
因此,遍历此类结构的唯一方法是使用带mapReduce的JavaScript。
定义映射器:
var mapper = function () {
for ( var n in this.versions ) {
for ( var k in this.versions[n].content ) {
if (
( k != 'confirmed' ) ||
( k != 'visited' ) )
emit(
{
type: this.chairtype,
key: k
},
this.versions[n].content[k]
);
}
}
};
所以这样做是循环每个版本条目,然后是内容上的所有内容。为您想要的每个内容键以及“chairtype”键发出密钥。而值就是匹配值。
然后是减速器:
var reducer = function (key,values) {
return ( Array.sum( values ) != 0 )
? Array.sum( values ) / values.length : 0;
};
这只是一种简单的方法,可以使用相同的密钥为映射器的所有值生成平均值。
所以虽然这应该很好,但你应该做的是改变你的结构。事实上,如果你有这样的事情:
{
"_id": ObjectId("52b85dfa32b6249513f15897"),
"parent": "47de3176-bbc3-44e0-8063-8920ac56fdc8",
"type": "chair",
"chairtype": "E",
"content": [
{ "key": "atkswlntfd", "value": 0, "version": 0 },
{ "key": "auwbsjqzir", "value": 0, "version": 0 },
{ "key": "avqrnjzbgd", "value": 0, "version": 0 }
]
}
或者通常或多或少地采用这种形式,聚合操作变得非常简单:
db.collection.aggregate([
{ "$unwind": "$content" },
{ "$group": {
"_id": {
"chairtype": "$chairtype",
"key": "$content.key"
},
"average": { "$avg": "$content.value" }
}}
])
或者需要其他任何变体,但现在可以通过改变结构来实现。
因此,如果文档结构不同,则需要使用mapReduce来执行此操作。