我在csv文件中有一些数据如下所示(此处仅显示部分数据)。
SourceID local_date local_time Vge BSs PC hour Type
7208 8/01/2015 11:00:19 15.4 87 +BC_MSG 11 MAIN
11060 8/01/2015 11:01:56 14.9 67 +AB_MSG 11 MAIN
3737 8/01/2015 11:02:09 15.4 88 +AB_MSG 11 MAIN
9683 8/01/2015 11:07:19 14.9 69 +AB_MSG 11 MAIN
9276 8/01/2015 11:07:52 15.4 88 +AB_MSG 11 MAIN
7754 8/01/2015 11:09:26 14.7 62 +AF_MSG 11 MAIN
11111 8/01/2015 11:10:06 15.2 80 +AF_MSG 11 MAIN
9276 8/01/2015 11:10:52 15.4 88 +AB_MSG 11 MAIN
11111 8/01/2015 11:12:56 15.2 80 +AB_MSG 11 MAIN
6148 8/01/2015 11:15:29 15 70 +AB_MSG 11 MAIN
11111 8/01/2015 11:15:56 15.2 80 +AB_MSG 11 MAIN
9866 8/01/2015 11:16:28 4.102 80 +SUB_MSG 11 SUB
9866 8/01/2015 11:16:38 15.1 78 +AH_MSG 11 MAIN
9866 8/01/2015 11:16:38 4.086 78 +SUB_MSG 11 SUB
20729 8/01/2015 11:23:21 11.6 82 +AB_MSG 11 MAIN
9276 8/01/2015 11:25:52 15.4 88 +AB_MSG 11 MAIN
11111 8/01/2015 11:34:16 15.2 80 +AF_MSG 11 MAIN
20190 8/01/2015 11:36:09 11.2 55 +AF_MSG 11 MAIN
7208 8/01/2015 11:37:09 15.3 85 +AB_MSG 11 MAIN
7208 8/01/2015 11:38:39 15.3 86 +AB_MSG 11 MAIN
7754 8/01/2015 11:39:16 14.7 61 +AB_MSG 11 MAIN
8968 8/01/2015 11:39:39 15.5 91 +AB_MSG 11 MAIN
3737 8/01/2015 11:41:09 15.4 88 +AB_MSG 11 MAIN
9683 8/01/2015 11:41:39 14.9 69 +AF_MSG 11 MAIN
20729 8/01/2015 11:44:36 11.6 81 +AB_MSG 11 MAIN
9704 8/01/2015 11:45:20 14.9 68 +AF_MSG 11 MAIN
11111 8/01/2015 11:46:06 4.111 87 +SUB_MSG 11 PAN
我有以下使用pandas处理此输入的python程序
import sys
import csv
import operator
import os
from glob import glob
import fileinput
from relativeDates import *
import datetime
import math
import pprint
import numpy as np
import pandas as pd
from io import StringIO
COLLECTION = 'NEW'
BATTERY = r'C:\MyFolder\Analysis\\{}'.format(COLLECTION)
INPUT_FILE = Pandas + r'\in.csv'
OUTPUT_FILE = Pandas + r'\out.csv'
with open(INPUT_FILE) as fin:
df = pd.read_csv(INPUT_FILE,
usecols=["SourceID", "local_date","local_time","Vge",'BSs','PC'],
header=0)
#df.set_index(['SourceID','local_date','local_time','Vge','BSs','PC'],inplace=True)
df.drop_duplicates(inplace=True)
#df.reset_index(inplace=True)
hour_list = []
gb = df['local_time'].groupby(df['local_date'])
for i in list(gb)[0][1]:
hour_list.append(i.split(':')[0])
for j in list(gb)[1][1]:
hour_list.append(str(int(j.split(':')[0])+ 24))
df['hour'] = pd.Series(hour_list,index=df.index)
df.set_index(['SourceID','local_date','local_time','Vge'],inplace=True)
#gb = df['hour'].groupby(df['PC'])
#print(list(gb))
gb = df['PC']
class_list = []
for msg in df['PC']:
if 'SUB' in msg:
class_list.append('SUB')
else:
class_list.append('MAIN')
df['Type'] = pd.Series(class_list,index=df.index)
print(df.groupby(['hour','Type'])['BSs'].aggregate(np.mean))
gb = df['Type'].groupby(df['hour'])
#print(list(gb))
#print(list(df.groupby(['hour','Type']).count()))
df.to_csv(OUTPUT_FILE)
我希望随着时间的推移获得BSs
字段的平均值。这就是我在上面的print(df.groupby(['hour','Type'])['BSs'].aggregate(np.mean))
中尝试做的事情。
然而,很少有事情需要考虑。
Vge
值可根据2 types
字段归类为Type
。Vge
值的数量可能因时而异。Vge
收到SourceID
个值。Vge
SourceID
值可能在print(df.groupby(['hour','Type'])['BSs'].aggregate(np.mean))
之间略有不同,但在同一时间间隔(同一小时)内应该有些相似在这种情况下,如上所述override func tableView(tableView: UITableView, cellForRowAtIndexPath indexPath: NSIndexPath) -> UITableViewCell {
let cell = tableView.dequeueReusableCellWithIdentifier("OMCFeedCell", forIndexPath: indexPath) as! OMCFeedTableViewCell
let p = posts[indexPath.row]
cell.selectionStyle = .None
let data = p.content!.dataUsingEncoding(NSUTF8StringEncoding, allowLossyConversion: false)
let content = try! JSON(NSJSONSerialization.JSONObjectWithData(data!, options: NSJSONReadingOptions.MutableContainers))
let image = content["image"].string
let uploads = uploadAPI()
uploads.tryGetImage(image) {
(result: UIImage?) in
if let i = result {
cell.postImage.image = i
cell.postImage.contentMode = .ScaleAspectFill
let containerWidth = cell.postImage.frame.width
let aspectRatio = i.size.height / i.size.width
let scaledHeight = aspectRatio * containerWidth
// do something with the calculated height
} else {
// kill the imageview
}
}
if let text = content["text"].string {
cell.postText.text = text
// calculate the textview height
} else {
// kill the text section
}
return cell
}
override func tableView(tableView: UITableView, heightForRowAtIndexPath indexPath: NSIndexPath) -> CGFloat {
if heights[indexPath.row] == nil {
// calculate the correct overall height
}
return heights[indexPath.row]!
}
的简单均值计算不充分,因为不同时间段(小时)内的样本数量不同。
在这种情况下应该使用什么功能?