我有一个熊猫数据框:
#define BILLION 1000000000L
#define LIMIT_I 1000
#define LIMIT_J 1000
double get_current_time_seconds1()
{
/* Get current time using gettimeofday */
time_t t = time(NULL);
struct tm *tm = localtime(&t);
printf("%s\n", asctime(tm));
return (double) tm;
}
double get_current_time_seconds2()
{
struct timespec start,stop;
clock_gettime(CLOCK_REALTIME, &start);
clock_gettime(CLOCK_REALTIME, &stop);
double x = (stop.tv_sec - start.tv_sec) + (stop.tv_nsec - start.tv_nsec);
printf("%lf\n", x);
return (double) x;
}
double get_current_time_seconds3()
{
uint64_t diff;
struct timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
sleep(5);
clock_gettime(CLOCK_MONOTONIC, &end);
diff = BILLION * (end.tv_sec - start.tv_sec) + end.tv_nsec - start.tv_nsec;
printf("elapsed time = %llu nanoseconds\n", (long long unsigned int)diff);
return (double) diff;
}
和一个变量:
col1
0 1
1 1
3 1
4 2
5 2
6 1
7 1
8 1
9 2
10 2
我想通过首先按X行数进行分组将其转换为另一个数据框。计算平均值,然后对下一个y行进行分组,计算平均值,依此类推。上面df的结果应该是:
x = 3
y = 2
答案 0 :(得分:0)
使用np.put进行检查,然后我们使用diff
和cumsum
创建组密钥
df['new']=0
np.put(df['new'],np.arange(len(df)),x*[1]+y*[2])
df.groupby(df.new.diff().ne(0).cumsum()).col1.mean()
Out[588]:
new
1 1
2 2
3 1
4 2
Name: col1, dtype: int64