如何编写一个函数来为每个Facet移动回归线?

时间:2015-10-01 20:19:24

标签: r ggplot2

fit<-lm(log(all$fd) ~ log(all$area))
fintercept<-fit$coefficients[[1]] #intercept
fslope<-fit$coefficients[[2]] #slope
interceptmax<-max(log(all$fd)-fslope*log(all$area))

ggplot(all, aes(x=log(area), y=log(fd))) +
  geom_point()+
  geom_abline(aes(intercept=interceptmax,slope=fslope))+ #shifted regression line
  #facet_wrap(~id)+
  theme_bw()+
  theme(panel.grid.major = element_line(colour = "#808080"))+
  ggsave('test.png',width=6, height=4,dpi=300)

这个ggplot2给了我一个包络曲线(一个移位的回归线)。如果我删除facet_wrap(~id),它按预期工作。但是我希望分别为每个方面计算线条。有什么建议我怎么能为每个方面做这个? (类似于使用&#39; lm&#39;在ggplot2中工作)

这里所有&#39;的子集是:

structure(list(fd = c(11, 7.75, 55.25, 45.25, 9.5, 89, 14.5, 
8, 84.25, 5.25, 79.5, 7.75, 71, 38.5, 242.25, 33, 32, 19, 58.5, 
249.25, 19, 72.5, 6.25, 27.0333333333333, 26.5, 81, 30, 29, 39.75, 
18.5, 64.25, 91, 4.5, 30.5, 74, 256.75, 9, 81, 27, 7.5, 107, 
26.75, 47.25, 16, 57, 37, 48.25, 48, 36, 147.25, 23.5, 42, 1.08333333333333, 
21.5, 6.51666666666667, 198, 47.5, 8.75, 16, 43.5, 34.75, 30.25, 
132.25, 2.25, 12.5, 225, 37, 17.25, 63.5, 48, 19.75, 12, 62.5, 
64, 27, 11, 72.25, 246, 27.75, 15.5, 178, 93.75, 3.75, 3, 46.25, 
4, 6.25, 5.25, 20, 44, 44.5, 1, 33, 18.25, 14.5, 29.25, 9, 33, 
133, 67), area = c(20168.2374, 432.528, 5780.8535, 1411.5435, 
543.8975, 660.447, 24995.9752, 543.8975, 2659.9178, 277.1287, 
147.8883, 153.0683, 2217.0298, 1188.8045, 4237.2205, 489.5078, 
1051.5352, 1362.3337, 1401.1836, 169.3852, 1333.8439, 1051.5352, 
29.5259, 1429.6734, 668.2169, 17068.0216, 660.447, 16860.8226, 
116.5495, 3820.2325, 784.7664, 841.7461, 1696.4422, 85.9876, 
2066.8105, 2090.1204, 121.7294, 9999.9441, 127.4274, 1152.5447, 
934.9857, 1424.4935, 1774.1419, 543.8975, 784.7664, 237.5019, 
3486.124, 1080.025, 6164.1717, 3348.8546, 236.2069, 510.2277, 
31.5979, 916.8558, 1994.2908, 3501.6639, 1152.5447, 486.9178, 
533.5376, 271.9488, 336.6985, 1103.3349, 9764.2552, 36.7778, 
2375.0191, 512.8176, 422.1681, 859.8761, 1118.8749, 237.5019, 
133.6434, 453.2479, 1787.0918, 6063.1622, 237.5019, 1383.0537, 
668.2169, 1424.4935, 621.5971, 1486.6532, 7355.5662, 984.1955, 
68.8937, 154.6223, 116.5495, 1665.3624, 43.5118, 450.6579, 510.2277, 
1494.4231, 404.0381, 598.2873, 339.2884, 12406.043, 274.5387, 
151.2553, 753.6865, 29059.6666, 3470.5841, 1885.5113), id = structure(c(3L, 
2L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 3L, 
4L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 
3L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 3L, 4L, 4L, 3L, 4L, 
3L, 4L, 4L, 4L, 3L, 4L, 3L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 4L, 
4L, 4L, 2L, 3L, 4L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 2L, 
1L, 4L, 2L, 4L, 2L, 4L, 2L, 4L, 4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 
2L, 2L, 4L), .Label = c("Csb", "Dfb(E)", "Dfa", "Cfa"), class = "factor")), .Names = c("fd", 
"area", "id"), row.names = c("65775", "61848", "11286", "22467", 
"34601", "53841", "14661", "8941", "88607", "47681", "7898", 
"9598", "40615", "35550", "70985", "64634", "43864", "88065", 
"37520", "100794", "74916", "76759", "48928", "17575", "7424", 
"30848", "53970", "65662", "27392", "82248", "48004", "93560", 
"51147", "42321", "30070", "22096", "25575", "49067", "23702", 
"65877", "53617", "90727", "59449", "34568", "48155", "96101", 
"31358", "40150", "75458", "71566", "1687", "86603", "77451", 
"68075", "14478", "69151", "66028", "77394", "68580", "16328", 
"26945", "73615", "24179", "19497", "82448", "25098", "23565", 
"8113", "66312", "96062", "60313", "34767", "97877", "48598", 
"96135", "36877", "7309", "71065", "49622", "65051", "55340", 
"105109", "26480", "4550", "18935", "61714", "31667", "55035", 
"27563", "17332", "35694", "20092", "44334", "21344", "22303", 
"64816", "12814", "12391", "52770", "84979"), class = "data.frame")

跟进:How can I shift the regression line?

1 个答案:

答案 0 :(得分:3)

为了使每个面具有不同的线,您可以分别为每个id计算斜率和所需的截距。简单地创建所需值的新数据集并在geom_abline中使用它通常是最简单的。此新数据集需要包含id列以及要绘制的值,以便您可以获得每个方面的单独信息。

您可以通过多种方式创建此数据集,包括逐个拟合回归并“手动”收集值。我将使用包 dplyr 中的函数来计算每id的回归,保存斜率,并使用您的公式计算新的截距。

    library(dplyr)

    all2 = all %>%
        group_by(id) %>%
        do({model = lm(log(fd) ~ log(area), data = .)
        data.frame(fslope = coef(model)[2], interceptmax = max(log(.$fd) - coef(model)[2]*log(.$area)))
        })
    all2

Source: local data frame [4 x 3]
Groups: id [4]

      id     fslope interceptmax
  (fctr)      (dbl)        (dbl)
1    Csb -0.5556930    8.3703705
2 Dfb(E)  0.5378457    0.5057893
3    Dfa  0.1227013    4.6143276
4    Cfa  0.3247770    3.3895178

现在使用geom_abline中的新数据集。

ggplot(all, aes(x=log(area), y=log(fd))) +
    geom_point() +
    geom_abline(data = all2, aes(intercept=interceptmax, slope=fslope)) + #shifted regression line
    facet_wrap(~id) +
    theme_bw() +
    theme(panel.grid.major = element_line(colour = "#808080"))

结果图表的每个方面都有不同的线条。

enter image description here