我正在使用数据集,我了解如何使用mutate和filter设置一个条件, 但我想根据user_location的值设置多个条件。而且,在任何国家/地区缺少user_location的情况下,我想设置=“其他”。
# 1. Some values from the original dataframe
lat = c(-58.3815591, -63.6166720, -58.3815591, -102.5527840, -58.3815591)
lon = c(-34.6036844, -38.4160970, -34.6036844, 23.6345010, -34.6036844)
user_location = c('Buenos Aires, Argentina', 'Argentina', 'Buenos Aires', 'México', 'Ciudad Autónoma de Buenos Aire')
n = c(88,81,71,29,26)
b = data.frame(lat,lon,user_location,n)
# 2. Filter
#Just for the example:
c_argentina <- c("Argentina","Buenos Aires","Bs As",'Bs Aires','iudad Aut*noma','San Telmo')
c_mexico <- c("M*xico","Veracruz")
#For the complete dataset, I will be using filters for 10+ countries
c_peru <- c("Per*","Lima")
c_brazil <- c('Brazil','Brasil','Brasilia','Rio de Janeiro','Sao Paulo')
c_chile <- c("Chile","Santiago")
c_uruguay <- c("Uruguay","Montevideo",'Durazno')
c_ecuador <- c("Ecuador","Guayaquil")
c_united_states <- c("USA","Washington","DC","Chicago",'United States','NY','Miami')
c_canada <- c("Canada","Montreal","Alberta","Ottawa")
# 3. Create a new column
# For now, I understand how to setup just one condition.
b <- b %>%
as_tibble() %>%
subset(user_location %in% c_argentina) %>%
mutate(country = "Argentina") `
答案 0 :(得分:0)
原始数据
lat = c(-58.3815591, -63.6166720, -58.3815591, -102.5527840, -58.3815591)
lon = c(-34.6036844, -38.4160970, -34.6036844, 23.6345010, -34.6036844)
user_location = c('Buenos Aires, Argentina', 'Argentina', 'Buenos Aires', 'México', 'Ciudad Autónoma de Buenos Aire')
n = c(88,81,71,29,26)
b = data.frame(lat,lon,user_location,n,stringsAsFactors = F)
预期结果
lat = c(-58.3815591, -63.6166720, -58.3815591, -102.5527840, -58.3815591, NA)
lon = c(-34.6036844, -38.4160970, -34.6036844, 23.6345010, -34.6036844, NA)
user_location = c('Buenos Aires, Argentina', 'Argentina', 'Buenos Aires', 'México', 'Ciudad Autónoma de Buenos Aire','London')
n = c(88,81,71,29,26,101)
country = c('Argentina', 'Argentina', 'Argentina', 'México', 'Argentina', 'Other')
result = data.frame(lat,lon,user_location,n,country)
标记过滤器
c_united_states <- c("USA","Washington","DC","Chicago",'United States','NY','Miami')
c_argentina <- c("Buenos Aires","Buenos Aire","Argentina")
c_mexico <- c("México")
解决方案
library(dplyr)
1.将匹配的城市与管道
连接起来 c1 <- paste0(c_united_states,collapse = "|")
c2 <- paste0(c_argentina,collapse = "|")
c3 <- paste0(c_mexico,collapse = "|")
2.为匹配的城市和国家/地区名称创建数据框。
c_country <- rbind(c(c1,"US"),c(c2,"Argentina"),c(c3,"Mexico")) %>%
data.frame(stringsAsFactors = F)
3.将所有新国家/地区名称解密为“其他”
b$country <- "Other"
4.使用循环来进行匹配,因为grep / grepl的匹配条件一次只能使用一个。
for (i in 1:nrow(c_country)) {
b$country <- ifelse(grepl(pattern = c_country$X1[i],x = b$user_location),c_country$X2[i],b$country)
}
您的最终结果如下。