下面是我拥有的数据集。
Name Price
A 3
A 0.5
A 0.8
A 0.1
A 0.1
B 0.5
B 0.9
B 1
B 0.4
B 5
我需要一个列来告知价格总和是否大于1,然后保留原样,否则将其加到下一个价格,直到达到1。我需要下面的输出
Name Price Result
A 3 3
A 0.5
A 0.8 1.3 #(Since the below price is 0.1 and 1.3+0.1 is 1.4,
A 0.1 #can we set a limit)
A 0.1 0.2
B 0.5
B 0.9 1.4
B 1 1
B 0.4
B 5 5.4
答案 0 :(得分:1)
使用R的向量化方法可能有更好的解决方法,但这是传统的for
循环
#Initialise values
df$Result <- NA #to store results
sum_vec <- 0 #variable to store carry over values
is_full <- TRUE #flag to check if there is a carryover from previous value
for(i in 1:nrow(df)) { #For each row of the dataframe
if(df$Price[i] >= 1 & is_full) { #if price is greater than 1 and no carryover
df$Result[i] = df$Price[i] #assign current price to result
is_full = TRUE #assign carryover as TRUE
}
else {
sum_vec = sum_vec + df$Price[i] #Add previous value to current price
if (sum_vec >= 1) { #If sum is greater than 1 add it to result
df$Result[i] = sum_vec
is_full = TRUE
sum_vec = 0 #reassign carryover value to 0
}
else {
df$Result[i] = NA #make current result as NA
is_full = FALSE #keep carryover flag as FALSE
}
}
}
df
# Name Price Result
#1 A 3.0 3.0
#2 A 0.5 NA
#3 A 0.8 1.3
#4 A 0.1 NA
#5 A 2.0 2.1
#6 B 0.5 NA
#7 B 0.9 1.4
#8 B 1.0 1.0
#9 B 0.4 NA
#10 B 5.0 5.4
对于更新后的示例,我们可以按组split
Name
并再次应用相同的内容
df$Result <- unlist(lapply(split(df$Price, df$Name), function(x) {
result <- numeric(length = length(x))
sum_vec <- 0
is_full <- TRUE
for (i in 1:length(x)) {
sum_vec = sum_vec + x[i]
if (sum_vec >= 1 | i ==length(x)) {
result[i] = sum_vec
is_full = TRUE
sum_vec = 0
}
else {
result[i] = NA
is_full = FALSE
}
}
result
}))
df
# Name Price Result
#1 A 3.0 3.0
#2 A 0.5 NA
#3 A 0.8 1.3
#4 A 0.1 NA
#5 A 0.1 0.2
#6 B 0.5 NA
#7 B 0.9 1.4
#8 B 1.0 1.0
#9 B 0.4 NA
#10 B 5.0 5.4
答案 1 :(得分:0)
这是一个ave
的解决方案,用于按Name
组对数据进行拆分。
df1$Result <- ave(df1$Price, df1$Name, FUN = function(x){
y <- rep(NA, length(x))
z <- 0
for(i in seq_along(x)){
z <- z + x[i]
if(z >= 1) {
y[i] <- z
z <- 0
}
}
if(z > 0) y[length(x)] <- z
y
})
df1
# Name Price Result
#1 A 3.0 3.0
#2 A 0.5 NA
#3 A 0.8 1.3
#4 A 0.1 NA
#5 A 0.1 0.2
#6 B 0.5 NA
#7 B 0.9 1.4
#8 B 1.0 1.0
#9 B 0.4 NA
#10 B 5.0 0.0
数据。
df1 <- read.table(text = "
Name Price
A 3
A 0.5
A 0.8
A 0.1
A 0.1
B 0.5
B 0.9
B 1
B 0.4
B 5
", header = TRUE)
答案 2 :(得分:0)
如果要使用分组的分类变量来处理数据,请尝试tapply()
或aggregate()
。
Result <- tapply(df$Price, df$Name, function(x){
temp <- x
for(i in 1:(length(x)-1)){
if(temp[i] < 1){
temp[i] <- NA
temp[i + 1] <- x[i + 1] + x[i]
}
}
return(temp)
})
df$Result <- unlist(Result)
df
# Name Price Result
# 1 A 3.0 3.0
# 2 A 0.5 NA
# 3 A 0.8 1.3
# 4 A 0.1 NA
# 5 A 2.0 2.1
# 6 B 0.5 NA
# 7 B 0.9 1.4
# 8 B 1.0 1.0
# 9 B 0.4 NA
# 10 B 5.0 5.4