有没有一种简单的方法可以在R中生成游泳者情节?与KM曲线中的数据相同,但每个单独的生存表示为一条线。例如:
我搜索过stack -flow,这是一个R-help邮件列表,并且在没有明显答案的情况下咨询了谷歌博士,尽管我的搜索技术可能不是最理想的。谢谢!
**** ADDENDED **** 抱歉没有恰当地提出问题 - 这是我第一次!到处玩,我已经能够做到以下几点:
OS DeathYN TreatmentGroup
4 444 days 1 0
5 553 days 1 0
8 812 days 0 0
1 844 days 0 0
10 1071 days 0 0
9 1147 days 0 0
6 1349 days 0 0
3 1375 days 0 0
2 1384 days 0 1
7 1687 days 0 0
orderedData$GroupColor[orderedData$TreatmentGroup==0] <- "yellow"
orderedData$GroupColor[orderedData$TreatmentGroup==1] <- "red"
orderedData$YCoord <- barplot(as.numeric(orderedData$OS), horiz=TRUE, col=orderedData$GroupColor, xlim=c(0,max(orderedData$OS) + 50), xlab="Overall Survival")
points(x=20+as.numeric(orderedData$OS), y=orderedData$YCoord,pch=62, col="green")
legend(1000,2, c("Control", "Treatment", "still living"), col=c("yellow","red", "green"), lty=1, lwd=c(10,10,0),pch=62)
这让我现在足够接近,但美学并不完美。如果有一个包或更好的解决方案,有人可以建议我很乐意看到它!
答案 0 :(得分:10)
你要求&#34;容易&#34;生成游泳者情节的方法。这可能比您希望的更复杂,但它与您发布的内容非常接近。如果您需要制作大量的游泳运动员情节,您可以将其调整为适合您的东西,然后将其转换为函数。
首先创建一些假数据:
library(ggplot2)
library(reshape2)
library(dplyr)
library(grid)
set.seed(33)
dat = data.frame(Subject = 1:10,
Months = sample(4:20, 10, replace=TRUE),
Treated=sample(0:1, 10, replace=TRUE),
Stage = sample(1:4, 10, replace=TRUE),
Continued=sample(0:1, 10, replace=TRUE))
dat = dat %>%
group_by(Subject) %>%
mutate(Complete=sample(c(4:(max(Months)-1),NA), 1,
prob=c(rep(1, length(4:(max(Months)-1))),5), replace=TRUE),
Partial=sample(c(4:(max(Months)-1),NA), 1,
prob=c(rep(1, length(4:(max(Months)-1))),5), replace=TRUE),
Durable=sample(c(-0.5,NA), 1, replace=TRUE))
# Order Subjects by Months
dat$Subject = factor(dat$Subject, levels=dat$Subject[order(dat$Months)])
# Melt part of data frame for adding points to bars
dat.m = melt(dat %>% select(Subject, Months, Complete, Partial, Durable),
id.var=c("Subject","Months"))
现在的情节:
ggplot(dat, aes(Subject, Months)) +
geom_bar(stat="identity", aes(fill=factor(Stage)), width=0.7) +
geom_point(data=dat.m,
aes(Subject, value, colour=variable, shape=variable), size=4) +
geom_segment(data=dat %>% filter(Continued==1),
aes(x=Subject, xend=Subject, y=Months + 0.1, yend=Months + 1),
pch=15, size=0.8, arrow=arrow(type="closed", length=unit(0.1,"in"))) +
coord_flip() +
scale_fill_manual(values=hcl(seq(15,375,length.out=5)[1:4],100,70)) +
scale_colour_manual(values=c(hcl(seq(15,375,length.out=3)[1:2],100,40),"black")) +
scale_y_continuous(limits=c(-1,20), breaks=0:20) +
labs(fill="Disease Stage", colour="", shape="",
x="Subject Recevied Study Drug") +
theme_bw() +
theme(panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
答案 1 :(得分:0)
鉴于此处的游泳者数据帧已转换为数据框https://blogs.sas.com/content/graphicallyspeaking/files/2014/06/Swimmer_93.txt
df %>% dplyr::glimpse()
##观察:15
##变量:9
## $ subjectID&#34; 1&#34;,&#34; 2&#34;,&#34; 3&#34;,&#34; 3&#34;,&#34; 4&#34;,& #34; 4&#34;,&#34; 5&#34;,&#34; 5&#34;,&#34; 5&#34;,...
## $阶段第1阶段,阶段2,阶段3,阶段3,阶段4,...
## $ startTime 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ...... ## $ endTime 18.5,17.0,14.0,14.0,13.5,13.5,12.5,12 ...
## $ isContinued TRUE,FALSE,TRUE,TRUE,TRUE,TRUE,TRUE,T ... ## $ responseType&#34;完成回复&#34;,&#34;完成回复&#34;,&#34; P ...
## $ responseStartTime 6.5,10.5,2.5,6.0,7.0,11.5,3.5,6.5,10 ......
## $ responseEndTime 13.5,17.0,3.5,NA,11.0,NA,4.5,8.5,NA,...
## $ Durable -0.25,-0.25,-0.25,-0.25,NA,NA,-0.25,-...
df.shapes <- df %>%
# Get just the subject and response time columns
dplyr::select(subjectID, responseType, responseStartTime) %>%
# Melt the data frame, so one row per response value.
reshape2::melt(id.vars=c("subjectID", "responseType"),
value.name="time") %>%
# Remove na values
dplyr::filter(!is.na(time)) %>%
# Remove response variable column
dplyr::select(-variable) %>%
# Add 'start' to the end of the response type
dplyr::mutate(responseType=paste(responseType, "start", sep=" "))
# Add the end time for each
df.shapes %<>%
dplyr::bind_rows(df %>%
dplyr::select(subjectID, endTime, responseEndTime,
isContinued) %>%
# Place endtime as response endtime if not
# continuing and responseEndTime is NA
dplyr::mutate(responseEndTime=dplyr::if_else(
!isContinued & is.na(responseEndTime),
endTime, responseEndTime)) %>%
dplyr::select(-endTime, -isContinued) %>%
# Remove other existing NA responseEndTimes
dplyr::filter(!is.na(responseEndTime)) %>%
dplyr::mutate(responseType="Response end") %>%
dplyr::rename(time=responseEndTime))
# Append on the durable column
df.shapes %<>%
dplyr::bind_rows(df %>%
dplyr::select(subjectID, Durable) %>%
dplyr::filter(!is.na(Durable)) %>%
dplyr::mutate(responseType="Durable") %>%
dplyr::rename(time=Durable))
# Add on the arrow sets
df.shapes %<>%
dplyr::bind_rows(df %>%
dplyr::select(subjectID, endTime, isContinued) %>%
dplyr::filter(isContinued) %>%
dplyr::select(-isContinued) %>%
dplyr::mutate(responseType="Continued Treatment") %>%
dplyr::mutate(endTime=endTime+0.25) %>%
dplyr::rename(time=endTime))
responseLevels = c("Complete response start",
"Partial response start",
"Response end", "Durable", "Continued Treatment")
# Convert responseType to factor and set the levels
df.shapes %<>%
dplyr::mutate(responseType = factor(responseType,
levels=responseLevels)) %>%
# Order by response type
dplyr::arrange(desc(responseType))
设置Unicode变量。
unicode = list(triangle=sprintf('\u25B2'),
circle=sprintf('\u25CF'),
square=sprintf('\u25A0'),
arrow=sprintf('\u2794'))
df.shapes数据框应该看起来像这样
df %>% dplyr::glimpse()
##观察:45
##变量:3
## $ subjectID&#34; 1&#34;,&#34; 3&#34;,&#34; 3&#34;,&#34; 4&#34;,&#34; 4&#34;,& #34; 5&#34;,&#34; 5&#34;,&#34; 5&#34;,&#34; 6&#34;,&#34; 6&#34;,...
## $ responseType继续治疗,继续治疗,继续...
## $ time 18.75,14.25,14.25,13.75,13.75,12.75,12.75,......
现在将数据框传输到ggplot
df %>%
# Get just the variables we need for the base of the plot
dplyr::select(subjectID, endTime, stage) %>%
# Remove duplicate rows
dplyr::distinct() %>%
# Order subject ID by numeric value
dplyr::mutate(subjectID=forcats::fct_reorder(.f=subjectID,
.x=as.numeric(subjectID),
.desc = TRUE)) %>%
# Pipe into ggplot
ggplot(aes(subjectID, endTime)) + # Base axis
geom_bar(stat="identity", aes(fill=factor(stage))) + # Bar plot
geom_point(data=df.shapes, size=5, # Use df.shapes to add reponse points
aes(subjectID, time, colour=responseType,
shape=responseType)) +
coord_flip() + # Flip to horizonal bar plot.
scale_colour_manual(values=c(RColorBrewer::brewer.pal(3, "Set1")[1:2],
rep("black", 3))) + # Add colours
scale_shape_manual(values=c(rep(unicode[["triangle"]], 2), # Add shapes
unicode[["circle"]], unicode[["square"]],
unicode[["arrow"]])) +
scale_y_continuous(limits=c(-0.5, 20), breaks=0:20) + # Set time limits
labs(fill="Disease Stage", colour="Symbol Key", shape="Symbol Key",
x="Subject ID ", y="Months since diagnosis",
title="Swimmer Plot",
caption=paste(c("Durable defined as subject with six months",
"or more of confirmed response", sep=" ") +
theme(plot.title = element_text(hjust = 0.5), # Put title in middle
plot.caption = element_text(size=7, hjust=0)) # Make caption small
可在此处找到完整说明:http://rpubs.com/alexiswl/swimmer