考虑这个简单的例子
mytest <- data_frame(group = c('a', 'a', 'a', 'b', 'b', 'b'),
x = c(NA,NA,NA,5,6,7),
other_var = c(NA, NA, NA, 1,2,3),
y = c(3,5,6,NA,NA,NA),
another_var = c(1,2,3, NA,NA,NA),
label_x = c('hello','hello','hello','world','world','world'),
label_y =c('bada','bada','bada','boom','boom','boom'),
label_other_var = c('ak','ak','ak','run','run','run'),
label_another_var = c('noo','noo','noo','bie','bie','bie'))
# A tibble: 6 x 9
group x other_var y another_var label_x label_y label_other_var label_another_var
<chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
1 a NA NA 3 1 hello bada ak noo
2 a NA NA 5 2 hello bada ak noo
3 a NA NA 6 3 hello bada ak noo
4 b 5 1 NA NA world boom run bie
5 b 6 2 NA NA world boom run bie
6 b 7 3 NA NA world boom run bie
在这里,我需要用nest()
group
来label_
,并且能够提取不是NA的变量(在每个嵌套数据框中)的列名。诀窍是,变量的实际名称显示在# A tibble: 4 x 2
group var
<chr> <chr>
1 a bada
2 a noo
3 b world
4 b run
列
例如,这是所需的输出:
a
确实,参加y
组。只有一个非缺失变量是another_var
和y
。但是,bada
的名称为label_y
(如another_var
变量所示),noo
的名称为b
。对于map
的推理也是如此。
运行后,我不知道如何通过mytest %>% group_by(group) %>% nest()
# A tibble: 2 x 2
group data
<chr> <list>
1 a <tibble [3 x 8]>
2 b <tibble [3 x 8]>
调用
mytest <- data_frame(group = c('a', 'a', 'a', 'b', 'b', 'b'),
+ x = c(NA,NA,NA,5,6,7),
+ y = c(3,5,6,NA,NA,NA),
+ label_x = c('hello','hello','hello','world','world','world'),
+ label_y =c('bada','bada','bada','boom','boom','boom'))
有什么想法吗? 谢谢!
编辑:最初的,较小的,有问题的提议如下
private String getCallDetails()
{
StringBuilder sb = new StringBuilder();
Cursor managedCursor = managedQuery(CallLog.Calls.CONTENT_URI,
null, null, null,null);
int number = managedCursor.getColumnIndex(CallLog.Calls.NUMBER);
int type = managedCursor.getColumnIndex(CallLog.Calls.TYPE);
int date = managedCursor.getColumnIndex(CallLog.Calls.DATE);
int duration = managedCursor.getColumnIndex(CallLog.Calls.DURATION);
while (managedCursor.moveToNext()) {
String phNumber = managedCursor.getString(number);
String callType = managedCursor.getString(type);
String callDate = managedCursor.getString(date);
Date callDayTime = new Date(Long.valueOf(callDate));
String callDuration = managedCursor.getString(duration);
String callDirection = null;
int callDirectionCode = Integer.parseInt(callType);
switch (callDirectionCode)
{
case CallLog.Calls.OUTGOING_TYPE:
callDirection = "OUTGOING";
break;
case CallLog.Calls.INCOMING_TYPE:
callDirection = "INCOMING";
break;
case CallLog.Calls.MISSED_TYPE:
callDirection = "MISSED";
break;
}
sb.append(callDirection+"||"+phNumber+"||"+callDuration+"||"+callDayTime+";\n");
}
managedCursor.close();
return sb.toString();
}
答案 0 :(得分:3)
按nest
分组后,通过提取map
非NA元素,在summarise
,first
的'label'列中循环, gather
移至单个列,同时移除NA
(na.rm = TRUE)
),select
'var'列,然后执行unnest
(仅保留之后感兴趣的列)
mytest %>%
group_by(group) %>%
nest %>%
mutate(var = map(data, ~
.x %>%
summarise(label_x = label_x[!is.na(x)][1],
label_y = label_y[!is.na(y)][1]) %>%
gather(key, var, na.rm = TRUE) %>%
select(var))) %>%
select(-data) %>%
unnest
# A tibble: 2 x 2#
# group var
# <chr> <chr>
#1 a bada
#2 b world
如果有更多列,请创建唯一的列名称,然后使用map2
遍历相应的列名称
nm1 <- unique(sub("label_", "", setdiff(names(mytest), "group")))
nm2 <- paste0("label_", nm1)
mytest %>%
group_by(group) %>%
nest %>%
mutate(var = map(data, ~
map2_chr(.x %>%
select(nm1),
.x %>%
select(nm2), ~
.y[!is.na(.x)][1]) %>%
na.omit %>%
tibble(var = .))) %>%
select(-data) %>%
unnest
# A tibble: 4 x 2
# group var
# <chr> <chr>
#1 a bada
#2 a noo
#3 b world
#4 b run
答案 1 :(得分:1)
这将输出您想要的结果:
mytest <- data_frame(group = c('a', 'a', 'a', 'b', 'b', 'b'),
x = c(NA,NA,NA,5,6,7),
y = c(3,5,6,NA,NA,NA),
label_x = c('hello','hello','hello','world','world','world'),
label_y =c('bada','bada','bada','boom','boom','boom'))
extract_good_colnames <- function(df, subgroup){
subset <- filter(df, group == subgroup)
if(sum(is.na(subset$x)) > 0){
colname = 'label_y'
}else if(sum(is.na(subset$y)) > 0){
colname = 'label_x'
}
return(tibble(group = subgroup, var = as.character(subset[1, colname])))
}
groups <- unique(mytest$group)
map_df(groups, function(x) extract_good_colnames(mytest, x))