我使用对LimeSurvey的API调用将数据导入我正在处理的Shiny R应用程序中。然后我操纵数据帧,以便我只有一个人随着时间的推移给出的响应。数据框可能如下所示:
Appetite <- c("No","Yes","No","No","No","No","No","No","No")
Dental.Health <- c("No","Yes","No","No","No","No","Yes","Yes","No")
Dry.mouth <- c("No","Yes","Yes","Yes","Yes","No","Yes","Yes","No")
Mouth.opening <- c("No","No","Yes","Yes","Yes","No","Yes","Yes","No")
Pain.elsewhere <- c("No","Yes","No","No","No","No","No","No","No")
Sleeping <- c("No","No","No","No","No","Yes","No","No","No")
Sore.mouth <- c("No","No","Yes","Yes","No","No","No","No","No")
Swallowing <- c("No","No","No","No","Yes","No","No","No","No")
Cancer.treatment <- c("No","No","Yes","Yes","No","Yes","No","No","No")
Support.for.my.family <- c("No","No","Yes","Yes","No","No","No","No","No")
Fear.of.cancer.coming.back <- c("No","No","Yes","Yes","No","No","Yes","No","No")
Intimacy <- c("Yes","No","No","No","No","No","No","No","No")
Dentist <- c("No","Yes","No","No","No","No","No","No","No")
Dietician <- c("No","No","Yes","Yes","No","No","No","No","No")
Date.submitted <- c("2002-07-25 00:00:00",
"2002-09-05 00:00:00",
"2003-01-09 00:00:00",
"2003-01-09 00:00:00",
"2003-07-17 00:00:00",
"2003-11-06 00:00:00",
"2004-12-17 00:00:00",
"2005-06-03 00:00:00",
"2005-12-17 00:00:00")
theDataFrame <- data.frame( Date.submitted,
Appetite,
Dental.Health,
Dry.mouth,
Mouth.opening,
Pain.elsewhere,
Sleeping,
Sore.mouth,
Swallowing,
Cancer.treatment,
Support.for.my.family,
Fear.of.cancer.coming.back,
Intimacy,
Dentist,
Dietician)
需要明确的是,与上述示例相比,此数据框可能包含更多(或更少)变量(或更少)变量的观测值。
我的目标是制作如下所示的动态直方图:
library(dplyr)
library(ggplot2)
library(tidyr)
df <- data.frame(timeline = Sys.Date() - 1:10,
q3 = sample(c("Yes", "No"), size = 10, replace = T),
q4 = sample(c("Yes", "No"), size = 10, replace = T),
q5 = sample(c("Yes", "No"), size = 10, replace = T),
q6 = sample(c("Yes", "No"), size = 10, replace = T),
q7 = sample(c("Yes", "No"), size = 10, replace = T),
q8 = sample(c("Yes", "No"), size = 10, replace = T),
stringsAsFactors = F) %>%
mutate(q3 = ifelse(q3 == "Yes", 1, 0),
q4 = ifelse(q4 == "Yes", 1, 0),
q5 = ifelse(q5 == "Yes", 1, 0),
q6 = ifelse(q6 == "Yes", 1, 0),
q7 = ifelse(q7 == "Yes", 1, 0),
q8 = ifelse(q8 == "Yes", 1, 0)
) %>%
gather(key = question, value = value, q3, q4, q5, q6, q7, q8)
g <- ggplot(df, aes(x = timeline, y = value, fill = question)) +
geom_bar(stat = "identity")
g
我想我需要在时间轴上使用库(lubridate),因为整个数据帧都是纯文本。我处理'。'在列名中如下:
myColNames <- colnames(theDataFrame)
myNames <- myColNames
myNames <- gsub("^X\\.\\.", "", myNames)
myNames <- gsub("\\.", " ", myNames)
names(theDataFrame) <- myNames # items in myChoices get "labels" from myNames
但最具挑战性的方面是让它动态运作。数据集将仅包含Date.submitted和(x)只有“是”或“否”的其他列数
我希望我已经提供了足够的信息(这是我在Stack Exchange上的第一个问题!)
答案 0 :(得分:1)
我们可以使用base R
theDataFrame[-1] <- +(theDataFrame[-1]=="Yes")
或者当数据集很大时使用lapply
theDataFrame[-1] <- lapply(theDataFrame[-1], function(x) as.integer(x=="Yes"))
答案 1 :(得分:0)
您还可以使用dplyr::mutate_all
和purrr::map
注意:我在stringsAsFactors = F
theDataFrame
theDataFrame <- data.frame( Date.submitted,
Appetite,
Dental.Health,
Dry.mouth,
Mouth.opening,
Pain.elsewhere,
Sleeping,
Sore.mouth,
Swallowing,
Cancer.treatment,
Support.for.my.family,
Fear.of.cancer.coming.back,
Intimacy,
Dentist,
Dietician, stringsAsFactors = F)
创建一个函数来进行所需的转换,例如:
ConvertYesNo<- function(x){
if(x=="Yes") y <- as.integer(1)
else if (x=="No") y <- as.integer(0)
else y <- x
return(y)
}
- 使用mutate_all
,它会考虑所有列或使用mutate_at
选择所需的列。 map
函数如下:
theDataFramex <- theDataFrame %>%
mutate_all(funs(map_chr(.,ConvertYesNo)))
> head(theDataFramex,3 )
Date.submitted Appetite Dental.Health Dry.mouth Mouth.opening Pain.elsewhere Sleeping
1 2002-07-25 00:00:00 0 0 0 0 0 0
2 2002-09-05 00:00:00 1 1 1 0 1 0
3 2003-01-09 00:00:00 0 0 1 1 0 0
Sore.mouth Swallowing Cancer.treatment Support.for.my.family Fear.of.cancer.coming.back
1 0 0 0 0 0
2 0 0 0 0 0
3 1 0 1 1 1
Intimacy Dentist Dietician
1 1 0 0
2 0 1 0
3 0 0 1