我在使用if / else语句中的字符串的any函数时遇到问题。 请注意,函数中的打印(“A”)只是一个示例。如果列包含某些值,我需要执行一系列操作。
随机生成的数据
level=c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree",NA)
df <- data.frame(pre_1=as.character(sample(c("Yes","No", NA), 30, replace = T)),
pre_2=as.character(sample(level, 30, replace = T)),
post_1=as.character(sample(level, 30, replace = T)),
post_2=as.character(sample(c("<90%", "0-80%", ">90", NA), 30, replace = T)),
stringsAsFactors=T)
选择所需数据框的一部分(“post_”)并根据特定列的值打印语句。在这种情况下,我需要为包含特定行值的列打印“A”:"Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"
select(df, starts_with("post_")) %>%
length() %>%
seq(1,.,1) %>%
for (i in .){
if (any(c("Neither agree or disagree") == (select(df, starts_with("post_"))[i]))){
print ("A")
} else {print ("B")}
}
这给出了错误
Error in if (any(c("Neither agree or disagree") == (select(df, starts_with("post_"))[i]))) { :
missing value where TRUE/FALSE needed
请注意,如果我在其下运行代码正确工作
if (any(c("Neither agree or disagree","Agree") == df[3])){print ("A")} else {
print ("B")}
任何帮助表示赞赏
答案 0 :(得分:0)
尽可能避免循环。 R的优势是矢量计算!
尝试以下
results <- character(nrow(df)) # initialise vector
results <- "B" # set B as default
at_least_one <- rowSums(df == "Strongly Agree", na.rm = TRUE) > 0 # find the rows that contain the word searched at least once
results[at_least_one] <- "A" # change those that contain the word to "A"
您可以循环使用您的价值观,例如&#34;非常同意&#34;,&#34;同意&#34;并覆盖结果向量!希望有所帮助!
答案 1 :(得分:0)
T/F
索引到LETTERS
时有点天真的方法:
1.通过colptrn
选择落入模式grepl
的列;
2.将df
转到列表中;
3. sapply
通过列表项,将它们与您的清单进行比较;
4.如果有TRUE
,那么FALSE + 1 = "A"
,如果不是TRUE + 1 = "B"
;
fu <- function(df, i, colptrn, na.rm = T){
sapply(as.list(df[grepl(colptrn, colnames(df))]),
function(li) LETTERS[1 + !any(i %in% li, na.rm = na.rm)]
)
}
## Test
fu(df, c("Neither agree or disagree", "Agree"), "post_")
post_1 post_2
"A" "B"
fu(df, c("Neither agree or disagree", "Agree"), ".*")
pre_1 pre_2 post_1 post_2
"B" "A" "A" "B"
fu(df, c("Neither agree or disagree", "Agree"), "postpostup")
named list()
答案 2 :(得分:0)
我使用@ tobiaspk1的建议并使用rowSums来调整列的某些值。问题是我希望包含更多条件(每列的所有因素)以确保该函数在其他上下文中有效(例如,当缺少中间类别时)
dfplot <- function(df,prefix){
select(df, starts_with(prefix)) %>%
length() %>%
seq(1,.,1) %>%
for (i in .){
if (dummy(as.character(select(data, starts_with(prefix))[[i]])) == FALSE) {
if (colSums(select(df, starts_with(prefix))[i] == "Agree", na.rm = TRUE) > 0){
factor(select(data, starts_with(prefix))[[i]], c("Strongly Agree", "Agree", "Neither agree or disagree","Disagree", "Strongly disagree"),ordered = T ) %>%
data.frame() %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)")+
ggtitle(names(select(data, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10))-> agreeplot
print(agreeplot)}
else if (colSums(select(df, starts_with(prefix))[i] == "51-75%", na.rm = TRUE) > 0) {
factor(select(data, starts_with(prefix))[[i]], c("1-25%", "26-50%", "51-75%", "75-90%","91-100%"),ordered = T ) %>%
data.frame() %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)")+
ggtitle(names(select(data, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10))-> numplot
print(numplot)}
else if(colSums(select(df, starts_with(prefix))[i] == "Somewhat too easy", na.rm = TRUE) > 0) {
factor(select(data, starts_with(prefix))[[i]], c("Very easy", "Somewhat too easy", "About right", "Somewhat challenging","Very challenging"),ordered = T ) %>%
data.frame() %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)")+
ggtitle(names(select(data, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10))-> aboutplot
print(aboutplot)}
else if(colSums(select(df, starts_with(prefix))[i] == "Too slow", na.rm = TRUE) > 0) {
factor(select(data, starts_with(prefix))[[i]], c("Too slow", "Slow", "About right", "Fast","Too fast"),ordered = T ) %>%
data.frame() %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)")+
ggtitle(names(select(data, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10))-> rightplot
print(rightplot)}
else if(colSums(select(df, starts_with(prefix))[i] == "Between 3 and 4 hours", na.rm = TRUE) > 0) {
factor(select(data, starts_with(prefix))[[i]], c("Less than 2 hours", "Between 2 and 3 hours", "Between 3 and 4 hours", "Between 4 and 5 hours","More than 5 hours"),ordered = T ) %>%
data.frame() %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)")+
ggtitle(names(select(data, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10))-> hoursplot
print(hoursplot)}
else {data.frame(select(df, starts_with(prefix))[[i]]) %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)")+
ggtitle(names(select(data, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10))-> elseplot
print(elseplot)}}
else {data.frame(select(df, starts_with(prefix))[[i]]) %>%
na.omit() %>%
ggplot(.,aes(x=.)) +
geom_bar(aes(y = (..count..)/sum(..count..)), stat="count") +
geom_text(aes( label =paste(round((..count..)/sum(..count..)*100),"%"), y= (..count..)/sum(..count..)), stat= "count", vjust = -.5)+
scale_y_continuous(labels=percent,limits = c(-0, 1)) +
scale_x_discrete(drop=FALSE) +
ylab("Relative Frequencies (%)") +
ggtitle(names(select(df, starts_with(prefix)))[i]) +
theme_light(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5,size = 10)) -> dummyplot
print(dummyplot)}
}
}