我有一组看起来像这样的数据:
#API name, min, max, average
['findProductByPartNumber', '336.0', '336.0', '336.0']
['findProductByPartNumber', '336.0', '339.0', '337.5']
['findProductByPartNumber', '336.0', '339.0', '338.0']
['findProductByPartNumber', '336.0', '341.0', '338.75']
['findProductByPartNumber', '336.0', '353.0', '341.6']
['findProductById', '841.0', '841.0', '841.0']
['findProductByPartNumber', '336.0', '920.0', '438.0']
['findProductByPartNumber', '336.0', '944.0', '510.29']
['findProductByPartNumber', '336.0', '952.0', '565.5']
['findProductByPartNumber', '336.0', '975.0', '611.0']
['findProductsByCategory', '113.0', '113.0', '113.0']
['findProductById', '161.0', '841.0', '501.0']
['findProductByPartNumber', '255.0', '975.0', '575.4']
我想要做的是,对于每个单独的API,生成如下内容:
API, Min, Max, Average, 90th Percentile
findProductByPartNumber, 278.69, 770.25, 458.69, 565.5
findProductById, 373.0, 841.0, 571.67, 501.0
findProductsByCategory, 112.33, 187.17, 154.46, 167.75
这是上面每个API的汇总结果。在Python中执行此操作的最佳方法是什么?
编辑:
我有以下Java代码,我想要什么。 Java是我最好的语言,我正在尝试学习Python,我不熟悉数据结构。
double[] apiValues = new double[3];
apiValues[0] = Double.valueOf(min);
apiValues[1] = Double.valueOf(max);
apiValues[2] = Double.valueOf(average);
parseAPILogs.registerAPI(name, apiValues);
...
...
private static void registerAPI(String apiName, double[] apiValues) {
if(!averagePerAPI.containsKey(apiName)) {
APIData data = new APIData();
data.addValues(apiValues);
averagePerAPI.put(apiName, data);
} else {
averagePerAPI.get(apiName).addValues(apiValues);
}
}
APIData Java类在这里有点大,但你可以看到我的想法。
答案 0 :(得分:2)
这样的事情怎么样:
def aggregate(stats):
aggregated = {}
for stat in stats:
key = stat.pop(0)
stat = map(float, stat)
if key not in aggregated:
vals = {"avg": []}
aggregated[key] = vals
aggregated[key]['min'] = min(stat[0], aggregated[key].setdefault('min', stat[0]))
aggregated[key]['max'] = max(stat[1], aggregated[key].setdefault('max', stat[1]))
aggregated[key]['avg'].append(stat[2])
return aggregated
def print_stats(aggregated):
for k, v in aggregated.items():
print k,
for k1, v1 in v.items():
if k1 == 'avg':
print "%s: %s" % (k1, sum(v1) / len(v1)),
else:
print "%s: %s" % (k1, v1),
print
stats = [
['findProductByPartNumber', '336.0', '336.0', '336.0'],
['findProductByPartNumber', '336.0', '339.0', '337.5'],
['findProductByPartNumber', '336.0', '339.0', '338.0'],
['findProductByPartNumber', '336.0', '341.0', '338.75'],
['findProductByPartNumber', '336.0', '353.0', '341.6'],
['findProductById', '841.0', '841.0', '841.0'],
['findProductByPartNumber', '336.0', '920.0', '438.0'],
['findProductByPartNumber', '336.0', '944.0', '510.29'],
['findProductByPartNumber', '336.0', '952.0', '565.5'],
['findProductByPartNumber', '336.0', '975.0', '611.0'],
['findProductsByCategory', '113.0', '113.0', '113.0'],
['findProductById', '161.0', '841.0', '501.0'],
['findProductByPartNumber', '255.0', '975.0', '575.4']
]
print_stats(aggregate(stats))
<强>输出强>
findProductsByCategory max: 113.0 avg: 113.0 min: 113.0
findProductById max: 841.0 avg: 671.0 min: 161.0
findProductByPartNumber max: 975.0 avg: 439.204 min: 255.0
就第90百分位数而言,除非您只使用.9 *平均值,否则无法在没有单个样本的情况下进行计算。
答案 1 :(得分:1)
arr = [ ['findProductByPartNumber', '336.0', '336.0', '336.0'],
['findProductByPartNumber', '336.0', '339.0', '337.5'],
['findProductByPartNumber', '336.0', '339.0', '338.0'],
['findProductByPartNumber', '336.0', '341.0', '338.75'],
['findProductByPartNumber', '336.0', '353.0', '341.6'],
['findProductById', '841.0', '841.0', '841.0'],
['findProductByPartNumber', '336.0', '920.0', '438.0'],
['findProductByPartNumber', '336.0', '944.0', '510.29'],
['findProductByPartNumber', '336.0', '952.0', '565.5'],
['findProductByPartNumber', '336.0', '975.0', '611.0'],
['findProductsByCategory', '113.0', '113.0', '113.0'],
['findProductById', '161.0', '841.0', '501.0'],
['findProductByPartNumber', '255.0', '975.0', '575.4']]
d = list(set([item[0] for item in arr]))
d = dict(zip(d, [[0, 0, 0, 0] for k in range(len(d))]))
for k in arr:
d[k[0]][0] = min(k[1], d[k[0]][0]) if k[1] is 0 else k[1]
d[k[0]][1] = max(k[2], d[k[0]][1])
d[k[0]][2] = sum(map(lambda x: float(x[3]) if x[0] is k[0] else 0, [api for api in arr])) / len(filter(lambda x: x is k[0], [api[0] for api in arr]))
for k in d.keys():
print "{0} {1} {2} {3} {4}".format(k, d[k][0], d[k][1], d[k][2], d[k][3])