我需要该社区的R专家帮助:
set.seed(1234)
x <-data.frame(id = 1:5,value1 = sample(1:5,5,replace = T),value2 = sample(1:5,5),year = 1)
y <-data.frame(id = 1:5,value1 = sample(1:10,5),value2 = sample(1:5,5),year = 2)
z <-data.frame(id = sample(1:4,3),value1 = sample(1:10,3),value2 = sample(1:5,3),year = 3)
f <-data.frame(id = sample(1:2,2),value1 = sample(1:10,2),value2 = sample(1:5,2),year = 4)
(df <-rbind(x,y,z,f))
df1 <-df [order(df $ id,df $ year),]
df1
id value1 value2年
1 1 4 1
1 10 1 2
1 2 4 3
1 3 3 4
2 4 1 1
2 2 3 2
2 1 3 3
2 10 1 4
3 4 5 1
3 3 5 2
3 7 5 3
4 4 2 1
4 9 2 2
5 5 3 1
5 7 4 2
我想获得以下输出:
id value1 value2 year
1 9 -3 2
1 1 0 3
1 2 -1 4
2 -2 2 2
2 -3 2 3
2 6 0 4
3 -1 0 2
3 3 0 3
4 5 0 2
5 2 1 2
感谢您的帮助!
答案 0 :(得分:0)
按'id'分组后,使用mutate_at
进行数值之间的区别,并在'value'列中'year'为1,然后filter
列出具有'year'的行1
library(dplyr)
df1 %>%
group_by(id) %>%
mutate_at(vars(value1:value2), funs(. -.[year == 1])) %>%
filter(year!= 1)
# A tibble: 10 x 4
# Groups: id [5]
# id value1 value2 year
# <int> <int> <int> <int>
# 1 1 9 -3 2
# 2 1 1 0 3
# 3 1 2 -1 4
# 4 2 -2 2 2
# 5 2 -3 2 3
# 6 2 6 0 4
# 7 3 -1 0 2
# 8 3 3 0 3
# 9 4 5 0 2
#10 5 2 1 2
或与data.table
library(data.table)
setDT(df1)[, (2:3) := lapply(.SD, function(x) x - x[year == 1]),
by = id, .SDcols = value1:value2][year != 1]
在base R
中,我们可以做到
nm1 <- c('value1', 'value2')
df1[nm1] <- df1[nm1] -df1[df1$year==1,
nm1][rep(seq_along(unique(df1$id)), table(df1$id)),]
df1[df1$year != 1,]
df1 <- structure(list(id = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 4L, 4L, 5L, 5L), value1 = c(1L, 10L, 2L, 3L, 4L, 2L, 1L,
10L, 4L, 3L, 7L, 4L, 9L, 5L, 7L), value2 = c(4L, 1L, 4L, 3L,
1L, 3L, 3L, 1L, 5L, 5L, 5L, 2L, 2L, 3L, 4L), year = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 1L, 2L, 1L, 2L)), class =
"data.frame", row.names = c(NA, -15L))
答案 1 :(得分:0)
请考虑by
(按因子对子数据帧的功能),并使其调用subset
(以筛选出年份1)和within
(以重新计算列)。 by
从那里返回一个数据帧列表,您可以将它们与do.call(rbind, ...)
一起绑定。
数据
txt = 'id value1 value2 year
1 1 4 1
1 10 1 2
1 2 4 3
1 3 3 4
2 4 1 1
2 2 3 2
2 1 3 3
2 10 1 4
3 4 5 1
3 3 5 2
3 7 5 3
4 4 2 1
4 9 2 2
5 5 3 1
5 7 4 2'
df1 <- read.table(text=txt, header=TRUE)
过程
df_list <- by(df1, df1$id, function(sub){
subset(
within(sub, {
value1 <- value1 - value1[[1]]
value2 <- value2 - value2[[1]]
}),
year > 1
)
})
new_df <- data.frame(do.call(rbind, unname(df_list)),
row.names = NULL)
new_df
# id value1 value2 year
# 1 1 9 -3 2
# 2 1 1 0 3
# 3 1 2 -1 4
# 4 2 -2 2 2
# 5 2 -3 2 3
# 6 2 6 0 4
# 7 3 -1 0 2
# 8 3 3 0 3
# 9 4 5 0 2
# 10 5 2 1 2