数据帧中的R子集和标签数据

时间:2017-11-03 13:44:43

标签: r

通过R课程,我遇到了一个我似乎无法解决的课程问题。

我有一个以delivery_data读入的csv文件,然后我找到pickup_zipcode为94107的所有行,然后找到所有包含&的行#34; Whole Foods Market"," Safeway",或#34; Pizzeria Delfina"在pickup_name字段中。然后我将pickup_name标记为热点。

这是我尝试过的代码:

delivery_data = read.csv('Delivery_Data_Problem.csv')

#created vector to hold the text values I'm looking for
hotspot_locations = c("Whole Foods Market", "Safeway", "Pizzeria Delfina")
pickup_sub = list() #init as list

#running through the original data frame to get a list of the hotspots that need tagging
for( i in hotspot_locations){
  pickup_sub[[i]] = delivery_data[delivery_data$pickup_name == i, ]
}
#applies tags to the list pickup_sup
sapply(pickup_sub, function(x){
  if(x$pickup_name == "Whole Foods Market"){
    x$pickup_name = paste(x$pickup_name, '-H1')
  }else if(x$pickup_name == "Safeway"){
    x$pickup_name = paste(x$pickup_name, '-H2')
  }else if(x$pickup_name == "Pizzeria Delfina"){
    x$pickup_name = paste(x$pickup_name, '-H3')
  }
})
#this code works, but isn't exactly what we want with this, we need to tag the original data frame


#this is the first attempt to get just the indexes, but it's not returning just the indexes. should I use which() here?
full_list = delivery_data[(delivery_data$pickup_name == "Whole Foods Market" | delivery_data$pickup_name == 'Safeway' | delivery_data$pickup_name == "Pizzeria Delfina") & delivery_data$pickup_zipcode == 94107,]
# hs_zip is supposed to be a vector of indexes to help an apply function to tag the original data frame. 
hs_zip = delivery_data[which(delivery_data$pickup_zipcode == 94107),]
# then iterate over the rows and attempt to tag.
for(i in delivery_data[hs_zip,]){
  if(i$pickup_name == "Whole Foods Market"){
    i$pickup_name = paste(i$pickup_name, '-H1')
  }else if(i$pickup_name == "Safeway"){
    i$pickup_name = paste(i$pickup_name, '-H2')
  }else if(i$pickup_name == "Pizzeria Delfina"){
    i$pickup_name = paste(i$pickup_name, '-H3')
  }
}

最后一个循环给出以下错误。如何标记原始数据框,而不是副本?

  

xj [i]中的错误:无效的下标类型' list'

1 个答案:

答案 0 :(得分:1)

无需任何循环,只需运行ifelse()

delivery_data$pickup_name <- ifelse(delivery_data[pickup_zipcode == 94107 &
                                    delivery_data$pickup_name == "Whole Foods Market", 
                                    paste0(delivery_data$pickup_name, "-H1"),
                                    delivery_data$pickup_name)

delivery_data$pickup_name <- ifelse(delivery_data[pickup_zipcode == 94107 &
                                    delivery_data$pickup_name == "Safeway", 
                                    paste0(delivery_data$pickup_name, "-H2"),
                                    delivery_data$pickup_name)

delivery_data$pickup_name <- ifelse(delivery_data[pickup_zipcode == 94107 &
                                    delivery_data$pickup_name == "Pizzeria Delfina", 
                                    paste0(delivery_data$pickup_name, "-H3"),
                                    delivery_data$pickup_name)

或者,使用嵌套的ifelse()

delivery_data$pickup_name <- ifelse(delivery_data$pickup_zipcode == 94107, 
                                    ifelse(delivery_data$pickup_name == "Whole Foods Market", 
                                           paste0(delivery_data$pickup_name, "-H1"), 
                                                  ifelse(delivery_data$pickup_name == "Safeway", 
                                                         paste0(delivery_data$pickup_name, "-H2"),
                                                         ifelse(delivery_data$pickup_name == "Pizzeria Delfina", 
                                                                paste0(delivery_data$pickup_name, "-H3"), 
                                           delivery_data$pickup_name))),
                                    delivery_data$pickup_name)

使用with()更加冗长:

delivery_data$pickup_name <- with(delivery_data, 
                                  ifelse(pickup_zipcode == 94107, 
                                         ifelse(pickup_name == "Whole Foods Market", 
                                                paste0(pickup_name, "-H1"), 
                                                       ifelse(pickup_name == "Safeway", 
                                                              paste0(pickup_name, "-H2"),
                                                                      ifelse(pickup_name == "Pizzeria Delfina", 
                                                                             paste0(pickup_name, "-H3"), 
                                                pickup_name))),
                                         pickup_name))