我有一个名为'names'的数据集,如下所示。 'expected.entry.in.this.col'列目前是空的,但下面我已经展示了它应该是什么样子。我该如何编写逻辑?
基本上我认为我需要在每一行和每一行中运行循环,使用'if'条件检查格式,然后适当地将数据输入'expected.entry.in.this.col' 。我该怎么做呢? (对这些任务有点不熟悉R语法)。
名称
编辑:第3行是一个错误,应该阅读williams.harry
答案 0 :(得分:1)
尝试这样的事情:
df <- data.frame(first = c("Kevin", "Megan"), last = c("Spacey", "Fox"),
format = c("f.last", "F.L."))
df$new <- NA
df$new <- ifelse(df$format == "f.last",
tolower(paste0(substr(df$first,1,1),".",df$last)),
df$new)
df$new <- ifelse(df$format == "F.L.",
paste0(substr(df$first,1,1),".", substr(df$last,1,1)),
df$new)
df
first last format new
1 Kevin Spacey f.last k.spacey
2 Megan Fox F.L. M.F
答案 1 :(得分:0)
我是这样做的,我希望你能得到逻辑!告诉我这是否是你想要的
first = c('John','Michael',"Harry","Stephen","Simon",'Rachael',"Paul")
last = c("smith","Johnson","Williams","Jones","Adams","Moore","Taylor")
format = c("first.last","firstlast","last.first","f.last","flast","f_last","f_last")
names = data.frame(cbind(first,last,format))
names$first = as.character(names$first)
names$last = as.character(names$last)
names$format = as.character(names$format)
library(stringr)
for (i in 1:dim(names)[1]){
if (names[i,"format"] == "first.last"){
names[i,"new_var"] = paste(tolower(names[i,"first"]),tolower(names[i,"last"]), sep = '.')
}else if (names[i,"format"] == "firstlast"){
names[i,"new_var"]= paste(tolower(names[i,"first"]),tolower(names[i,"last"]), sep = '')
}else if (names[i,"format"] == "last.first"){
names[i,"new_var"] = paste(tolower(names[i,"last"]),tolower(names[i,"first"]), sep = '.')
}else if (names[i,"format"] == "f.last"){
names[i,"new_var"] = paste(tolower(str_sub(names[i,"first"],1,1)),tolower(names[i,"last"]),sep=".")
}else if (names[i,"format"] == "flast"){
names[i,"new_var"] = paste(tolower(str_sub(names[i,"first"],1,1)),tolower(names[i,"last"]),sep="")
}else{
names[i,"new_var"] = paste(tolower(str_sub(names[i,"first"],1,1)),tolower(names[i,"last"]),sep="_")
}
}
names
first last format new_var
1 John smith first.last john.smith
2 Michael Johnson firstlast michaeljohnson
3 Harry Williams last.first williams.harry
4 Stephen Jones f.last s.jones
5 Simon Adams flast sadams
6 Rachael Moore f_last r_moore
7 Paul Taylor f_last p_taylor
>
答案 2 :(得分:0)
这是一个带有&#34;查找表的解决方案&#34;没有if
&#39;
mydf <- data.frame(
first= c("John", "Michael", "Harry", "Stephen", "Simon", "Rachael", "Paul"),
last = c("Smith", "Johnson", "Williams", "Jones", "Adams", "Moore", "Taylor"),
format = c("first.last", "firstlast", "last.first", "f.last", "flast", "f_last", "f_last"),
expected = c("", "", "", "", "", "", ""),
stringsAsFactors = FALSE
)
library(dplyr)
firstList <- c("first.last", "firstlast", "f.last", "flast", "f_last")
#if in the format is in firstList, then do this
mydf[mydf$format %in% firstList, ]$expected <- paste0(
mydf[mydf$format %in% firstList, ]$first, ".",
mydf[mydf$format %in% firstList, ]$last)
mydf[ !(mydf$format %in% firstList), ]$expected <- paste0(
mydf[ !(mydf$format %in% firstList), ]$last, ".",
mydf[ !(mydf$format %in% firstList), ]$first)
)