确定两个分组变量的每个组合中具有最高计数和总和的因子

时间:2015-04-28 19:03:32

标签: r class classification

对于这项研究,我们记录了每棵树的胸径(dbh)的种类和直径>在100m 2 圆形图中,高度为1.5m,直径> 1.8cm。有100多个圆形地块被采样。

我想确定四种直径等级中最具代表性的树种。直径等级是幼树(2.5-8厘米),极(> 8-18厘米),成熟(> 18-28厘米)和大(> 28厘米)。

如果可能,我想了一个例子,说明如何根据密度(每个类别中每个物种的数量)以及体积(基础面积)选择每个类别中最具代表性的物种。

我在crossvalidated.com上发布了一个关于哪种方法(密度或体积)更合适的问题......

https://stats.stackexchange.com/q/148734/57117

请注意,每个网站的名称(location)都不遵循任何模式。根据从子样本中采样的数据集中的位置来分配名称。此外,如果某个站点的某个直径类别中没有物种,NA将起作用。

这是一个示例数据集,其中15个位置被采样,1200个树被测量。我的数据与以下示例数据非常相似。

tree.species<-c("PSME", "PIEN", "LAOC", "POTR", "SALIX")
tree.diameters<- data.frame(location= paste0(sample(LETTERS[c(2,4,6,8)], 4, replace=TRUE),sample(seq(006,250,57), 1200, replace = TRUE)), 
                            dbh= c(rep(3.81,200),rnorm(350, mean = 6.32, sd=1.5),rnorm(50, mean = 75, sd=6),
                                   runif(550, min=20, max=100),rnorm(50, mean = 150, sd=2.3)), #Units in cm
                            species = factor(sample(tree.species, 1200, replace=TRUE)))

#Add basal area (m sq. per ha)
tree.diameters$basal.area=100*(pi*((tree.diameters$dbh/2)^2)/(4*10000)) 

#Order the data
tree.diameters<-tree.diameters[order(tree.diameters$location, tree.diameters$dbh,tree.diameters$species),]

> head(tree.diameters, n=15)
    location      dbh species basal.area
426     B120 3.303363    PSME 0.02142607
358     B120 3.657538   SALIX 0.02626682
450     B120 3.667190    PSME 0.02640565
150     B120 3.810000    PIEN 0.02850230
94      B120 3.810000    POTR 0.02850230
10      B120 3.810000    PSME 0.02850230
90      B120 3.810000    PSME 0.02850230
18      B120 3.810000   SALIX 0.02850230
134     B120 3.810000   SALIX 0.02850230
194     B120 3.810000   SALIX 0.02850230
274     B120 3.979974   SALIX 0.03110214
290     B120 5.345510   SALIX 0.05610586
310     B120 5.480217    POTR 0.05896921
254     B120 5.625061   SALIX 0.06212757
478     B120 5.852126    LAOC 0.06724456

我一直致力于通过密度进行选择的一般方法类似于location=="B120"&amp; location=="B177",但我不确定如何迭代它并将所有结果放入列表或data.frame中。我也不确定如何计算音量basal.area)的相同内容

> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh < 8, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh < 8,
+                                select=species)))))
[1] "SALIX"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 8 | dbh < 18, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 8 | dbh < 18,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 18 | dbh < 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 18 | dbh < 28,
+                                select=species)))))
[1] "POTR"
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh > 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh > 28, 
+                                select=species)))))
[1] "PIEN"
> #Location "B120"
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh < 8, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh < 8,
+                                select=species)))))
[1] "SALIX"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 8 | dbh < 18, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 8 | dbh < 18,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh >= 18 | dbh < 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh >= 18 | dbh < 28,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B120" & dbh > 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B120" & dbh > 28, 
+                                select=species)))))
[1] "PIEN"
> 
> #Location "B177"
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh < 8, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh < 8,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh >= 8 | dbh < 18, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh >= 8 | dbh < 18,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh >= 18 | dbh < 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh >= 18 | dbh < 28,
+                                select=species)))))
[1] "POTR"
> 
> names(which(table(subset(tree.diameters, 
+                          location == "B177" & dbh > 28, 
+                          select=species))
+             ==max(table(subset(tree.diameters, 
+                                location == "B177" & dbh > 28, 
+                                select=species)))))
[1] "PSME"

我希望输出类似于这样(除了为每个类填充vol.i)。

location den.sap den.pole den.mat den.lrg vol.sap vol.pole vol.mat vol.lrg
B120     SALIX   POTR     POTR    PIEN    ?       ?        ?       ?
B177     POTR    POTR     POTR    PSME    ?       ?        ?       ?

1 个答案:

答案 0 :(得分:1)

这个问题要求很多 - 但它有很好的文档记录,它提供了展示dplyrtidyr包的绝佳机会。所以这里。

首先按大小对树进行分类:

data flatMap Function.tupled(f)

计算每个位置/物种/大小组合的计数和总体积:

library(dplyr) ; library(tidyr)
tree.diameters <- tree.diameters %>%
  mutate(size = ifelse(dbh <= 8, "sapling",
                        ifelse(dbh <= 18, "pole",
                              ifelse(dbh <= 28, "mature", "large"))))

最后,为每个位置/大小组合选择最大种类,并将该长表扩展为宽格式。重复音量并将两个结果表连接在一起。

treesummary <- tree.diameters %>%
  group_by(location, species, size) %>%
  summarise(vol = sum(basal.area), count = n()) 

收率:

result <- inner_join(
  treesummary %>%
    group_by(location, size) %>%
    arrange(-count) %>%
    slice(1) %>%
    select(-count, -vol) %>%
    spread(size, species) %>%
    setNames(c("location", paste0("den.", names(.)[-1]))),

  treesummary %>%
    group_by(location, size) %>%
    arrange(-vol) %>%
    slice(1) %>%
    select(-count, -vol) %>%
    spread(size, species) %>%
    setNames(c("location", paste0("vol.", names(.)[-1]))),

  by = "location"
)