我希望在我的数据框的每一组上应用int list_zip_it(char * upload_zip_name,char * base, cJSON * filelist)
{
int result=0;
int error_n = 0;
struct zip *archive = zip_open(upload_zip_name, ZIP_TRUNCATE | ZIP_CREATE, &error_n);
if(!archive)
{
printf(stderr,"could not open or create archive\n");
return -1;
}
mode_t mode=0;
cJSON * item;
cJSON_ArrayForEach(item,filelist)
{
char * path=NULL;
path=item->valuestring;
// stat the item
struct stat sb;
if (stat(path, &sb) == 0 ) mode=sb.st_mode;
zip_uint32_t attr=0;
attr=((mode ) << 16L);
char rel_file[1024];
if (strncmp(path,CI_PROJECT_DIR,strlen(base))==0 )
{
snprintf(rel_file,1024,"%s",path+strlen(base)+1);
printf("archive filename: %s\n",rel_file);
}
else
{
fprintf(stderr,"filename outside base-derectory\n");
continue;
}
if (S_ISDIR(mode))
{
int index = (int)zip_add_dir(archive, rel_file);
if (index>0) zip_file_set_external_attributes(archive, index, 0, ZIP_OPSYS_UNIX, attr);
}
else if (S_ISLNK(mode)) // symlink
{
char link[1024];//=calloc(1, 1024);
memset(link, 0, 1024);
ssize_t size_link=readlink(path , link, 1023);
if (size_link > 0)
{
struct zip_source *source = zip_source_buffer(archive , link, ( zip_uint64_t)size_link,0);
if (source)
{
int index = (int)zip_add(archive, rel_file, source);
if (index>0) zip_file_set_external_attributes(archive, index, 0, ZIP_OPSYS_UNIX, attr);
}
else
{
printf(stderr,"failed to create source buffer: %s \n", zip_strerror(archive) );
zip_source_free(source);
}
}
else error("failed to read link: %s \n",path );
}
else if (S_ISREG(mode))
{
struct zip_source *source = zip_source_file(archive, path, 0, 0);
if(source == NULL)
{
error("failed to create source buffer: %s \n", zip_strerror(archive) );
result=1;
break;
}
// todo calculate filename relative to project_dir
int index = (int)zip_add(archive, rel_file, source);
if(index < 0 )
{
int zep,sep;
zip_error_get(archive, &zep, &sep);
if (zep== ZIP_ER_EXISTS )
{
fprintf(stderr,"failed to add file to archive: %s \n", zip_strerror(archive) );
zip_source_free(source);
}
else
{
fprintf(stderr,"failed to add file to archive: %s \n", zip_strerror(archive) );
zip_source_free(source);
result=1;
break;
}
}
else
{
zip_file_set_external_attributes(archive, index, 0, ZIP_OPSYS_UNIX, attr);
}
}
}
zip_close(archive);
return result;
}
请注意,对于单个群组(&#39; element_id&#39;)我在填充方面没有问题:
第一组(group1):
pading
所以我在它上面应用填充(效果很好):
{'date': {88: datetime.date(2017, 10, 3), 43: datetime.date(2017, 9, 26), 159: datetime.date(2017, 11, 8)}, u'element_id': {88: 122, 43: 122, 159: 122}, u'VALUE': {88: '8.0', 43: '2.0', 159: '5.0'}}
我希望通过print group1.set_index('date').asfreq('D', method='pad').head()
另一组(group2):
groupby
我得到以下错误:
{'date': {88: datetime.date(2017, 10, 3), 43: datetime.date(2017, 9, 26), 159: datetime.date(2017, 11, 8)}, u'element_id': {88: 122, 43: 122, 159: 122}, u'VALUE': {88: '8.0', 43: '2.0', 159: '5.0'}}
group_data=pd.concat([group1,group2],axis=0)
group_data.groupby(['element_id']).set_index('date').resample('D').asfreq()
答案 0 :(得分:1)
首先,您的date
列有dtype
个对象而不是日期时间存在问题,因此首先需要将其转换为to_datetime
。
然后可以使用GroupBy.apply
:
group_data['date'] = pd.to_datetime(group_data['date'])
df = (group_data.groupby(['element_id'])
.apply(lambda x: x.set_index('date').resample('D').ffill()))
print (df.head())
VALUE element_id
element_id date
122 2017-09-26 2.0 122
2017-09-27 2.0 122
2017-09-28 2.0 122
2017-09-29 2.0 122
2017-09-30 2.0 122
df = group_data.set_index('date').groupby(['element_id']).resample('D').ffill()
print (df.head())
VALUE element_id
element_id date
122 2017-09-26 2.0 122
2017-09-27 2.0 122
2017-09-28 2.0 122
2017-09-29 2.0 122
2017-09-30 2.0 122
编辑:
如果重复值解决方案的问题是为具有唯一dates
的子组添加新列。如果使用concat
,则会有参数keys
:
group1 = pd.DataFrame({'date': {88: datetime.date(2017, 10, 3),
43: datetime.date(2017, 9, 26),
159: datetime.date(2017, 11, 8)},
u'element_id': {88: 122, 43: 122, 159: 122},
u'VALUE': {88: '8.0', 43: '2.0', 159: '5.0'}})
d = {'level_0':'g'}
group_data=pd.concat([group1,group1], keys=('a','b')).reset_index(level=0).rename(columns=d)
print (group_data)
g VALUE date element_id
43 a 2.0 2017-09-26 122
88 a 8.0 2017-10-03 122
159 a 5.0 2017-11-08 122
43 b 2.0 2017-09-26 122
88 b 8.0 2017-10-03 122
159 b 5.0 2017-11-08 122
group_data['date'] = pd.to_datetime(group_data['date'])
df = (group_data.groupby(['g','element_id'])
.apply(lambda x: x.set_index('date').resample('D').ffill()))
print (df.head())
g VALUE element_id
g element_id date
a 122 2017-09-26 a 2.0 122
2017-09-27 a 2.0 122
2017-09-28 a 2.0 122
2017-09-29 a 2.0 122
2017-09-30 a 2.0 122