FAMILY<- c('FAMILYA', 'FAMILYA', 'FAMILYA', 'FAMILYA', 'FAMILYA', 'FAMILYB', 'FAMILYB', 'FAMILYB', 'FAMILYB', 'FAMILYB', 'FAMILYC', 'FAMILYC', 'FAMILYC', 'FAMILYC', 'FAMILYC')
CHILDREN<-c('JAKE', 'PETE', 'JASON', 'KEVIN', 'ALFRED','DALE', 'STEVE', 'MELISSA', 'DAN', 'THOMAS', 'CAIT', 'BRANDON', 'DEAN', 'ADAM', 'KELSEY')
CHANGE<-c(1000, -1000, 2000, 3000, 5000, 100, 300, 1234, -1022, -1111, -1112, 1000, 1002, 2131, 1231)
df1<-data.frame(FAMILY, CHILDREN, CHANGE)
df1
FAMILY CHILDREN CHANGE
1 FAMILYA JAKE 1000
2 FAMILYA PETE -1000
3 FAMILYA JASON 2000
4 FAMILYA KEVIN 3000
5 FAMILYA ALFRED 5000
6 FAMILYB DALE 100
7 FAMILYB STEVE 300
8 FAMILYB MELISSA 1234
9 FAMILYB DAN -1022
10 FAMILYB THOMAS -1111
11 FAMILYC CAIT -1112
12 FAMILYC BRANDON 1000
13 FAMILYC DEAN 1002
14 FAMILYC ADAM 2131
15 FAMILYC KELSEY 1231
我希望将此数据帧转换为4个新的额外列:前两个显示1)最大值的Child,2)第二大值Child,最后两列显示3)最小值Child,4)2nd最小值子。
我也希望它旁边的变化是相应的孩子。
最终格式应如下所示:
FAMILY TOTAL CHANGE INCREASE #1 INCREASE #2 DECREASE #1 DECREASE #2
FAMILYA 10000 ALFRED: 5000 KEVIN: 3000 PETE: -1000 JAKE: 1000
FAMILYB -499 MELISSA: 1234 STEVE: 300 THOMAS: -1111 DAN: -1022
FAMILYC 4252 ADAM: 2131 KELSEY: 1231 CAIT: -1112 BRANDON: 1000
如果您认为将每个子项的值放在其旁边的单独列中会更容易,但这也是我需要帮助执行的概念。
任何帮助都会很棒,谢谢!
答案 0 :(得分:2)
library(dplyr)
library(tidyr)
# below function helps to get the second max or second min
myfun <- function(x, y) {
u <- unique(x)
u <- sort(u, decreasing = TRUE)
if(y<0)
u[length(x)-1]
else
u[y]
}
df2 <- df1 %>% group_by(FAMILY) %>%
summarise(a1=CHILDREN[which(CHANGE == max(CHANGE))] , a2 = max(CHANGE),
b2 = myfun(CHANGE, 2) , b1=CHILDREN[which(CHANGE == b2)] ,
c1=CHILDREN[which(CHANGE == min(CHANGE))] , c2 = min(CHANGE),
d2 = myfun(CHANGE,-2) , d1=CHILDREN[which(CHANGE == d2)])
#df2
# FAMILY a1 a2 b1 b2 c1 c2 d1 d2
# <fctr> <fctr> <dbl> <fctr> <dbl> <fctr> <dbl> <fctr> <dbl>
#1 FAMILYA ALFRED 5000 3000 KEVIN PETE -1000 1000 JAKE
#2 FAMILYB MELISSA 1234 300 STEVE THOMAS -1111 -1022 DAN
#3 FAMILYC ADAM 2131 1231 KELSEY CAIT -1112 1000 BRANDON
# little clumpsy here... would like if someone could suggest a better way of uniting efficiently
df3 <- unite(df2, "A1", 2,3,sep = ":")
df4 <- unite(df3, "B1", 4,3,sep = ":")
df5 <- unite(df4, "c1", 4,5,sep = ":")
df6 <- unite(df5, "c1", 6,5,sep = ":")
#df6
# FAMILY A1 B1 c1 c1
# <fctr> <chr> <chr> <chr> <chr>
#1 FAMILYA ALFRED:5000 KEVIN:3000 PETE:-1000 JAKE:1000
#2 FAMILYB MELISSA:1234 STEVE:300 THOMAS:-1111 DAN:-1022
#3 FAMILYC ADAM:2131 KELSEY:1231 CAIT:-1112 BRANDON:1000
注意:忘记添加TOTAL_CHANGE列
在TOTAL CHANGE = sum(CHANGE)
中添加summarise()
,在unite()列索引中添加+1
答案 1 :(得分:1)
以下是使用自定义函数和do
(来自dplyr
)将其应用于每个家庭组的方法。自定义函数也使用dplyr
。
首先,自定义函数生成(并排序)有序更改。然后,它返回总变化(总和)以及顺序中的第一个和最后两个变化。它必须以data.frame
返回才能与do
一起正常使用。
myFamFunction <- function(CHILDREN, CHANGE){
toOut <-
paste(CHILDREN, CHANGE, sep = ": ")[order(CHANGE, decreasing = TRUE)]
c(sum(CHANGE)
, head(toOut, 2)
, tail(toOut, 2)) %>%
rbind() %>%
data.frame(stringsAsFactors = FALSE) %>%
setNames(c("Total Change"
, "Biggest Change"
, "Second Biggest Change"
, "Second Smallest Change"
, "Smallest Change"))
}
请注意,如果少于2个孩子,这可能会引发错误(但是,如果小于4,则结果已经被怀疑)。如果你有更复杂的实际数据,告诉我们你想要发生什么,可以防止这些边缘情况。
然后,只需group_by
,将您想要的列传递给函数,瞧:
df1 %>%
group_by(FAMILY) %>%
do(myFamFunction(.$CHILDREN, .$CHANGE))
返回:
FAMILY `Total Change` `Biggest Change` `Second Biggest Change` `Second Smallest Change` `Smallest Change`
<fctr> <chr> <chr> <chr> <chr> <chr>
1 FAMILYA 10000 ALFRED: 5000 KEVIN: 3000 JAKE: 1000 PETE: -1000
2 FAMILYB -499 MELISSA: 1234 STEVE: 300 DAN: -1022 THOMAS: -1111
3 FAMILYC 4252 ADAM: 2131 KELSEY: 1231 BRANDON: 1000 CAIT: -1112