困惑于为R

时间:2019-01-23 05:13:38

标签: r dplyr multiple-columns tidyr anova

概述:

我有一个名为'Subset_leaf_1_New'的数据框,其中有两列有趣的栏目:(1) Urbanisation_index 包含不同的子级别(1-4) Canopy_Index 。这些观察结果凸显了城市化程度如何影响称为栎栎(Quercus petraea)”的橡树树种的冠层覆盖程度。

我特别想通过使用ANOVA 确定城市化指数的不同类别或子级别之间的 Canopy_Index中是否存在显着变化( 1-4)栎属。

首先,我将数据帧重新格式化为正确的格式以执行ANOVA,然后执行ANOVA。但是,我真的不确定是否可以正确地重新构造数据框架以及是否使用了正确的方法。

为了进行比较,我使用 excel 进行了ANOVA(请参阅所需结果),这可能是正确的所需结果;但是,我对自己的方法是否正确感到非常困惑,最好是我想使用R进行分析。

如果有人可以提供帮助,我将非常感激。

所需结果:

enter image description here

R代码

library(tidyr)
library(purrr)
library(dplyr)

##Subset the dataframe to include just two columns: 
##(1) Urbanisation Index 
##(2) Canopy Index

df1<-Subset_leaf_1_New %>% select(Urbanisation_index, Canopy_Index)

   ##Subset for ANOVA
    ##Produce New Dataframe to perform the ANOVA

Canopy_Summarised_1 <- map(split(df1, df1$Urbanisation_index), 2)

Canopy_Summarised_1 <- lapply(b, function(x){
  x1<-x
  length(x1) <- max(sapply(Canopy_Summarised_1, length))
  x1
}) %>% data.frame()

colnames(Canopy_Summarised_1) <- paste("sub_level", 1:4, sep = "_")

##Change NA's in the data frame to zeros

Canopy_Summarised_1[is.na(Canopy_Summarised_1)] <- 0

##Produce new dataframe containing the zero's
Canopy_Summarised_1_New<-data.frame(Canopy_Summarised_1)

####One way Anova between the Urbanisation_index and the Canopy_Index
Obs_1_canopy<-aov(Canopy_Summarised_1_New$sub_level_1~., data=Canopy_Summarised_1_New[ , 2:4] )

summary(aov(Canopy_Summarised_1_New$sub_level_1~., data=Canopy_Summarised_1_New[ , 2:4]))

输出

enter image description here

数据

structure(list(X = c(9L, 10L, 26L, 27L, 40L, 41L, 52L, 53L, 54L, 
73L, 76L, 77L, 92L, 94L, 98L, 99L, 100L, 101L, 109L, 110L, 111L, 
132L, 133L, 134L, 156L, 157L, 209L, 211L, 213L, 222L, 223L, 224L, 
238L, 240L, 241L, 242L, 243L, 244L, 246L, 247L, 248L, 249L, 250L, 
251L, 252L, 253L, 258L, 269L, 270L, 271L, 272L, 273L, 274L, 275L
), Obs_.no = c(9L, 10L, 26L, 27L, 40L, 41L, 52L, 53L, 54L, 73L, 
76L, 77L, 92L, 94L, 98L, 99L, 100L, 101L, 109L, 110L, 111L, 132L, 
133L, 134L, 156L, 157L, 209L, 211L, 213L, 222L, 223L, 224L, 238L, 
240L, 241L, 242L, 243L, 244L, 247L, 248L, 249L, 250L, 251L, 252L, 
253L, 254L, 259L, 270L, 271L, 272L, 273L, 274L, 275L, 276L), 
    Date_observed = structure(c(2L, 2L, 6L, 6L, 7L, 7L, 8L, 8L, 
    8L, 6L, 6L, 6L, 4L, 4L, 9L, 9L, 9L, 9L, 4L, 4L, 4L, 6L, 6L, 
    6L, 2L, 2L, 2L, 2L, 11L, 6L, 6L, 6L, 5L, 5L, 9L, 5L, 5L, 
    5L, 10L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 12L, 3L, 3L, 3L, 10L, 
    10L, 10L, 10L), .Label = c("10/14/18", "10/19/18", "10/20/18", 
    "10/21/18", "10/22/18", "10/23/18", "10/24/18", "10/25/18", 
    "10/27/18", "10/28/18", "10/29/18", "11/6/18"), class = "factor"), 
    Latitude = c(52.947709, 52.947709, 51.60157, 51.60157, 50.446841, 
    50.446841, 52.01434, 52.01434, 52.01434, 55.919722, 50.114277, 
    50.114277, 51.78666, 51.78666, 53.38728, 53.38728, 53.38728, 
    53.38728, 53.582285, 53.582285, 53.582285, 50.79387, 50.79387, 
    50.79387, 52.04252, 52.04252, 53.675996, 53.675996, 55.43828, 
    52.36286, 52.36286, 52.36286, 52.16104, 52.16104, 54.7311, 
    51.61842, 51.61842, 51.61842, 55.91913, 51.06433, 51.06433, 
    51.06433, 51.06433, 55.920966, 55.920966, 55.920966, 57.158724, 
    53.37687, 53.37687, 53.37687, 54.27745, 54.27745, 54.27745, 
    54.27745), Longitude = c(-1.435407, -1.435407, -3.67111, 
    -3.67111, -3.706923, -3.706923, 1.04007, 1.04007, 1.04007, 
    -3.210278, -5.541128, -5.541128, -0.71433, -0.71433, -2.95811, 
    -2.95811, -2.95811, -2.95811, -2.802239, -2.802239, -2.802239, 
    0.26684, 0.26684, 0.26684, -2.43733, -2.43733, -1.272824, 
    -1.272824, -4.64226, -2.06327, -2.06327, -2.06327, 0.18702, 
    0.18702, -5.8041, -0.16034, -0.16034, -0.16034, -3.20987, 
    -1.79923, -1.79923, -1.79923, -1.79923, -3.193503, -3.193503, 
    -3.193503, -2.166099, -1.34506, -1.34506, -1.34506, -0.47911, 
    -0.47911, -0.47911, -0.47911), Altitude = c(104L, 104L, 184L, 
    184L, 36L, 36L, 47L, 47L, 47L, 110L, 9L, 9L, 166L, 166L, 
    0L, 0L, 0L, 0L, 36L, 36L, 36L, 18L, 18L, 18L, 45L, 45L, 0L, 
    0L, 0L, 123L, 123L, 123L, 75L, 75L, 0L, 73L, 73L, 73L, 109L, 
    0L, 0L, 0L, 0L, 115L, 115L, 115L, 119L, 34L, 34L, 34L, 0L, 
    0L, 0L, 0L), Species = c("Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea", "Quercus petraea", "Quercus petraea", 
    "Quercus petraea"), Tree_diameter = c(569, 892, 46.5, 27.7, 
    44.6, 22.6, 48.1, 83.7, 77.9, 85, 82.9, 52.9, 40.6, 68.6, 
    117, 240, 210, 310, 57, 73.9, 37.1, 81.5, 29.3, 43.3, 41.9, 
    43.8, 240, 122, 21, 124, 133, 119, 79.3, 62.4, 70, 115.9, 
    111.1, 88.9, 90.8, 36, 31, 37.5, 42.3, 73, 54, 75, 159, 49, 
    57.9, 54.8, 53.5, 88.8, 71.3, 101.9), Urbanisation_index = c(2, 
    2, 4, 4, 3, 3, 4, 4, 4, 2, 4, 3, 4, 4, 1, 1, 1, 1, 3, 3, 
    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 4, 4, 3, 2, 2, 2, 1, 
    2, 2, 2, 2, 2, 2, 2, 1, 4, 4, 4, 4, 4, 4, 4), Stand_density_index = c(3L, 
    4L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 1L, 3L, 2L, 3L, 3L, 2L, 2L, 
    2L, 1L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 
    2L, 4L, 4L, 4L, 2L, 1L, 1L, 4L, 2L, 2L, 2L, 2L, 4L, 4L, 4L, 
    2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), Canopy_Index = c(65, 75, 
    55, 85, 85, 85, 95, 85, 85, 45, 65, 75, 75, 65, 35, 75, 65, 
    85, 65, 95, 75, 75, 75, 65, 75, 65, 75, 95, 95, 85, 85, 85, 
    75, 75, 65, 85, 75, 65, 55, 95, 95, 95, 95, 45, 55, 35, 55, 
    65, 95, 95, 45, 65, 45, 55), Phenological_Index = c(2L, 2L, 
    4L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 1L, 2L, 4L, 3L, 2L, 
    1L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 1L, 1L, 4L, 4L, 4L, 2L, 
    2L, 2L, 2L, 3L, 2L, 3L, 2L)), row.names = c(NA, -54L), class = c("data.table", 
"data.frame"), .internal.selfref = <pointer: 0x0>, index = structure(integer(0), "`__Species`" = integer(0)))

0 个答案:

没有答案