R - ggplot2在线性区域外推回归线

时间:2014-01-03 14:04:26

标签: r ggplot2

我有以下数据框(附件)。我已经为各种准直器/头部组合绘制了CR与A的对比度。

(p <- ggplot(df,aes(x=A,y=CR,col=Head))+geom_point()+geom_line() +facet_grid(Collimator~Head, scales="fixed")+scale_x_continuous("Activity [MBq]", expand = c(0,0))+ylim(0,80000)+ ylab("Count Rate [cps]") + theme_bw()+theme(legend.position = "none"))

enter image description here

在理想世界中,上述情节将是线性的。实际上,由于系统死区时间增加,CR将开始因A而下降。我想在每个方面添加的是直线拟合,它仅通过前2个数据点 - 这是在死区时间开始之前。

ggplot2中有一种简单的方法吗?我可以将geom_smooth(method = "lm")与其他选项一起使用吗?

structure(list(A0 = c(76L, 274L, 786L, 1060L, 1294L, 2092L, 2639L, 
3437L, 4223L, 76L, 274L, 786L, 1060L, 1294L, 2092L, 2639L, 3437L, 
4223L, 76L, 274L, 786L, 1060L, 1294L, 2092L, 2639L, 3437L, 4223L, 
76L, 274L, 786L, 1060L, 1294L, 2092L, 2639L, 3437L, 4223L, 76L, 
274L, 786L, 1060L, 1294L, 2092L, 2639L, 3437L, 4223L, 76L, 274L, 
786L, 1060L, 1294L, 2092L, 2639L, 3437L, 4223L), T = c(85L, 87L, 
88L, 89L, 89L, 90L, 91L, 92L, 93L, 97L, 98L, 99L, 100L, 101L, 
102L, 103L, 103L, 104L, 306L, 308L, 310L, 311L, 313L, 315L, 316L, 
317L, 321L, 328L, 330L, 331L, 332L, 336L, 338L, 340L, 341L, 342L, 
352L, 354L, 357L, 358L, 361L, 363L, 364L, 366L, 368L, 376L, 378L, 
379L, 380L, 385L, 386L, 388L, 389L, 390L), A = c(64.8860944957, 
233.0628375794, 667.3247389509, 898.2821229937, 1096.5821388243, 
1769.5416286837, 2228.0796200189, 2896.4301555482, 3552.1951906822, 
63.4538798403, 228.3428223318, 653.8100019998, 880.0900106808, 
1072.3775535078, 1730.4829756141, 2178.8997717016, 2837.7713207042, 
3480.2557273313, 43.0160516527, 154.5083447781, 441.5789350636, 
594.4069507774, 722.9307781622, 1164.4170158664, 1466.1502053416, 
1905.9469999005, 2324.4554021136, 41.2913626085, 148.3134747414, 
424.6633733747, 571.6370071823, 692.6559941892, 1115.653739112, 
1402.1405840581, 1822.7365994889, 2235.4125025879, 39.4886520314, 
141.8383609947, 404.6153215597, 544.6504360348, 661.1848647194, 
1064.9635212237, 1340.925513839, 1739.9197611259, 2129.880289335, 
37.7646447282, 135.6459396877, 388.3926422248, 522.8131775262, 
632.318659446, 1020.3651426265, 1282.3829893435, 1667.0556688926, 
2044.484697239), Counts = c(102830L, 328231L, 784020L, 1010212L, 
1160531L, 1582051L, 1760850L, 1888034L, 1897347L, 99780L, 317952L, 
749548L, 965314L, 1106831L, 1488386L, 1672990L, 1793667L, 1789803L, 
129507L, 453800L, 1053106L, 1327867L, 1473197L, 1900706L, 2075742L, 
1991265L, 1756820L, 121230L, 424329L, 994864L, 1237568L, 1374478L, 
1734922L, 1921046L, 1878514L, 1664225L, 213389L, 712467L, 1498082L, 
1777791L, 1882367L, 1824631L, 1525162L, 1250229L, 1072038L, 193591L, 
651249L, 1354850L, 1594421L, 1653835L, 1669993L, 1436444L, 1144518L, 
1015859L), CR = c(3428L, 10941L, 26134L, 33674L, 38684L, 52735L, 
58695L, 62934L, 63245L, 3326L, 10598L, 24985L, 32177L, 36894L, 
49613L, 55766L, 59789L, 59660L, 4317L, 15127L, 35104L, 44262L, 
49107L, 63357L, 69191L, 66376L, 58561L, 4041L, 14144L, 33162L, 
41252L, 45816L, 57831L, 64035L, 62617L, 55474L, 7113L, 23749L, 
49936L, 59260L, 62746L, 60821L, 50839L, 41674L, 35735L, 6453L, 
21708L, 45162L, 53147L, 55128L, 55666L, 47881L, 38151L, 33862L
), Head = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("H1", 
"H2"), class = "factor"), Collimator = structure(c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L), .Label = c("HRGP", "LEGP", "LEHS"), class = "factor")), .Names = c("A0", 
"T", "A", "Counts", "CR", "Head", "Collimator"), row.names = c(NA, 
-54L), class = "data.frame")

2 个答案:

答案 0 :(得分:8)

这应该这样做:

library(ggplot2)
(p <- ggplot(df,aes(x=A,y=CR,col=Head))+
    geom_point()+geom_line() +
    facet_grid(Collimator~Head, scales="fixed")+
    scale_x_continuous("Activity [MBq]",
                       expand = c(0,0))+ylim(0,80000)+
  ylab("Count Rate [cps]") + theme_bw()+theme(legend.position = "none"))

 library(plyr)
 subdf <- ddply(df,c("Collimator","Head"),
                function(x) x[1:2,])
 p + geom_smooth(method="lm",data=subdf,colour="gray",se=FALSE,
                 fullrange=TRUE)

enter image description here

答案 1 :(得分:6)

一种方法是为CollimatorHead的每个组合制作包含pnly前两个值的新数据框。然后计算这两个值的斜率和截距值,并在geom_abline()中使用它们绘制直线。

 library(plyr)
#subset only firt two values
 df2<-ddply(df,.(Collimator,Head),function(x) head(x,2))
 head(df2)
   A0   T         A Counts    CR Head Collimator
1  76  97  63.45388  99780  3326   H1       HRGP
2 274  98 228.34282 317952 10598   H1       HRGP
3  76  85  64.88609 102830  3428   H2       HRGP
4 274  87 233.06284 328231 10941   H2       HRGP
5  76 328  41.29136 121230  4041   H1       LEGP
6 274 330 148.31347 424329 14144   H1       LEGP

#caclulate slope and intercept
 df3<-ddply(df2,.(Collimator,Head),summarise, int=coefficients(lm(CR~A))[1],
                      slop=coefficients(lm(CR~A))[2])
 df3
  Collimator Head      int      slop
1       HRGP   H1 527.5309  44.10241
2       HRGP   H2 529.3279  44.67324
3       LEGP   H1 143.0519  94.40105
4       LEGP   H2 146.2766  96.95737
5       LEHS   H1 567.3029 155.85205
6       LEHS   H2 694.4843 162.54077

ggplot(df,aes(x=A,y=CR,col=Head))+geom_point()+geom_line() + 
  facet_grid(Collimator~Head, scales="fixed") + 
  scale_x_continuous("Activity [MBq]", expand = c(0,0))+ ylim(0,80000) + 
  ylab("Count Rate [cps]") + 
  theme_bw()+theme(legend.position = "none")+
  geom_abline(data=df3,aes(intercept=int,slope=slop,color=Head))

enter image description here