R - 如何使用mutate和filter添加多个条件

时间:2018-04-21 17:32:09

标签: r subset mutate

我正在使用数据集,我了解如何使用mutate和filter设置一个条件, 但我想根据user_location的值设置多个条件。而且,在任何国家/地区缺少user_location的情况下,我想设置=“其他”。

# 1. Some values from the original dataframe
lat = c(-58.3815591, -63.6166720, -58.3815591, -102.5527840, -58.3815591)
lon = c(-34.6036844, -38.4160970, -34.6036844, 23.6345010, -34.6036844)
user_location = c('Buenos Aires, Argentina', 'Argentina', 'Buenos Aires', 'México', 'Ciudad Autónoma de Buenos Aire')
n = c(88,81,71,29,26)

b = data.frame(lat,lon,user_location,n)


# 2. Filter
#Just for the example:
c_argentina <- c("Argentina","Buenos Aires","Bs As",'Bs Aires','iudad   Aut*noma','San Telmo') 
c_mexico <- c("M*xico","Veracruz")

#For the complete dataset, I will be using filters for 10+ countries
c_peru <- c("Per*","Lima") 
c_brazil <- c('Brazil','Brasil','Brasilia','Rio de Janeiro','Sao Paulo') 
c_chile <- c("Chile","Santiago") 
c_uruguay <- c("Uruguay","Montevideo",'Durazno') 
c_ecuador <- c("Ecuador","Guayaquil") 
c_united_states <- c("USA","Washington","DC","Chicago",'United States','NY','Miami') 
c_canada <- c("Canada","Montreal","Alberta","Ottawa") 

# 3. Create a new column 
# For now, I understand how to setup just one condition.
b <- b %>%
as_tibble() %>% 
subset(user_location %in% c_argentina) %>%
mutate(country = "Argentina") `

1 个答案:

答案 0 :(得分:0)

原始数据

lat = c(-58.3815591, -63.6166720, -58.3815591, -102.5527840, -58.3815591)
lon = c(-34.6036844, -38.4160970, -34.6036844, 23.6345010, -34.6036844)
user_location = c('Buenos Aires, Argentina', 'Argentina', 'Buenos Aires', 'México', 'Ciudad Autónoma de Buenos Aire')
n = c(88,81,71,29,26)

b = data.frame(lat,lon,user_location,n,stringsAsFactors = F)

预期结果

lat = c(-58.3815591, -63.6166720, -58.3815591, -102.5527840, -58.3815591, NA) 
lon = c(-34.6036844, -38.4160970, -34.6036844, 23.6345010, -34.6036844, NA) 
user_location = c('Buenos Aires, Argentina', 'Argentina', 'Buenos Aires', 'México', 'Ciudad Autónoma de Buenos Aire','London') 
n = c(88,81,71,29,26,101) 
country = c('Argentina', 'Argentina', 'Argentina', 'México', 'Argentina', 'Other') 
result = data.frame(lat,lon,user_location,n,country)

标记过滤器

c_united_states <- c("USA","Washington","DC","Chicago",'United States','NY','Miami') 
c_argentina <- c("Buenos Aires","Buenos Aire","Argentina") 
c_mexico <- c("México")

解决方案

library(dplyr)

1.将匹配的城市与管道

连接起来
 c1 <- paste0(c_united_states,collapse = "|")
 c2 <- paste0(c_argentina,collapse = "|")
 c3 <- paste0(c_mexico,collapse = "|") 

2.为匹配的城市和国家/地区名称创建数据框。

c_country <- rbind(c(c1,"US"),c(c2,"Argentina"),c(c3,"Mexico")) %>%
data.frame(stringsAsFactors = F)

3.将所有新国家/地区名称解密为“其他”

b$country <- "Other"

4.使用循环来进行匹配,因为grep / grepl的匹配条件一次只能使用一个。

for (i in 1:nrow(c_country)) {
b$country <- ifelse(grepl(pattern = c_country$X1[i],x = b$user_location),c_country$X2[i],b$country)
}

您的最终结果如下。

enter image description here