添加具有自定义结果的列

时间:2018-11-29 12:35:54

标签: r

下面是我拥有的数据集。

Name  Price  
A      3     
A      0.5
A      0.8   
A      0.1
A      0.1  
B      0.5
B      0.9
B      1 
B      0.4
B      5

我需要一个列来告知价格总和是否大于1,然后保留原样,否则将其加到下一个价格,直到达到1。我需要下面的输出

Name  Price   Result
A      3      3
A      0.5
A      0.8   1.3 #(Since the below price is 0.1 and 1.3+0.1 is 1.4, 
A      0.1        #can we set a limit)
A      0.1   0.2 
B      0.5
B      0.9   1.4
B      1     1
B      0.4   
B      5     5.4

3 个答案:

答案 0 :(得分:1)

使用R的向量化方法可能有更好的解决方法,但这是传统的for循环

#Initialise values
df$Result <- NA   #to store results
sum_vec <- 0      #variable to store carry over values
is_full <- TRUE   #flag to check if there is a carryover from previous value

for(i in 1:nrow(df)) { #For each row of the dataframe
  if(df$Price[i] >= 1 & is_full) {  #if price is greater than 1 and no carryover
     df$Result[i] = df$Price[i] #assign current price to result
     is_full = TRUE #assign carryover as TRUE
  }
  else {
     sum_vec = sum_vec + df$Price[i] #Add previous value to current price
    if (sum_vec >= 1) { #If sum is greater than 1 add it to result
       df$Result[i] = sum_vec
       is_full = TRUE
      sum_vec = 0 #reassign carryover value to 0
    }
    else {
      df$Result[i] = NA #make current result as NA
      is_full = FALSE #keep carryover flag as FALSE
    }
 }
}

df
#   Name Price Result
#1     A   3.0    3.0
#2     A   0.5     NA
#3     A   0.8    1.3
#4     A   0.1     NA
#5     A   2.0    2.1
#6     B   0.5     NA
#7     B   0.9    1.4
#8     B   1.0    1.0
#9     B   0.4     NA
#10    B   5.0    5.4

对于更新后的示例,我们可以按组split Name并再次应用相同的内容

df$Result <- unlist(lapply(split(df$Price, df$Name), function(x) {
                  result <- numeric(length = length(x))
                  sum_vec <- 0    
                  is_full <- TRUE 
                  for (i in 1:length(x)) {
                     sum_vec = sum_vec + x[i]
                    if (sum_vec >= 1 | i ==length(x)) {
                       result[i] = sum_vec
                       is_full = TRUE
                       sum_vec = 0
                     }
                     else {
                      result[i] = NA 
                      is_full = FALSE 
                      }
                    }
                    result
                   }))

df
#   Name Price Result
#1     A   3.0    3.0
#2     A   0.5     NA
#3     A   0.8    1.3
#4     A   0.1     NA
#5     A   0.1    0.2
#6     B   0.5     NA
#7     B   0.9    1.4
#8     B   1.0    1.0
#9     B   0.4     NA
#10    B   5.0    5.4

答案 1 :(得分:0)

这是一个ave的解决方案,用于按Name组对数据进行拆分。

df1$Result <- ave(df1$Price, df1$Name, FUN = function(x){
  y <- rep(NA, length(x))
  z <- 0
  for(i in seq_along(x)){
    z <- z + x[i]
    if(z >= 1) {
      y[i] <- z
      z <- 0
    }
  }
  if(z > 0) y[length(x)] <- z
  y
})

df1
#   Name Price Result
#1     A   3.0    3.0
#2     A   0.5     NA
#3     A   0.8    1.3
#4     A   0.1     NA
#5     A   0.1    0.2
#6     B   0.5     NA
#7     B   0.9    1.4
#8     B   1.0    1.0
#9     B   0.4     NA
#10    B   5.0    0.0

数据。

df1 <- read.table(text = "
Name  Price  
A      3     
A      0.5
A      0.8   
A      0.1
A      0.1  
B      0.5
B      0.9
B      1 
B      0.4
B      5
", header = TRUE)

答案 2 :(得分:0)

如果要使用分组的分类变量来处理数据,请尝试tapply()aggregate()

Result <- tapply(df$Price, df$Name, function(x){
  temp <- x
  for(i in 1:(length(x)-1)){
    if(temp[i] < 1){
      temp[i] <- NA
      temp[i + 1] <- x[i + 1] + x[i]
    }
  }
  return(temp)
})

df$Result <- unlist(Result)
df

#    Name Price Result
# 1     A   3.0    3.0
# 2     A   0.5     NA
# 3     A   0.8    1.3
# 4     A   0.1     NA
# 5     A   2.0    2.1
# 6     B   0.5     NA
# 7     B   0.9    1.4
# 8     B   1.0    1.0
# 9     B   0.4     NA
# 10    B   5.0    5.4