Question

我想运行树回归。数据是这种格式：

           L2         L3        L4        L5        L6 ele         ndvi    nd_var nd_ps ldclas
1 0.010814554 0.11304182 0.1360298 0.2098749 0.2437155 179  0.012483470  286688.2  7361   agri
2 0.010853562 0.10954640 0.1279681 0.1986370 0.2224236 183 -0.005020924  383210.9  7353   agri
3 0.011879258 0.12245614 0.1507865 0.2681184 0.2980641 184  0.005531083 1210329.6  7539   agri
4 0.009947186 0.09288491 0.1018834 0.2433811 0.2778357 193 -0.043884473  372672.2  7189   agri
5 0.010979766 0.10698310 0.1283619 0.2131286 0.2349639 193 -0.022636201  472360.7  7392   agri
6 0.011418039 0.11616439 0.1401070 0.2539036 0.3128864 195 -0.001042468  629364.2  7263   agri

ldclas是因变量。 ldclas有10个级别，即农业，茶叶，柚木，橡胶等。

dput(tt)的输出

structure(list(L2 = c(0.00912571167754499, 0.00930928144178689, 
0.00934829001668829, 0.0088274108106519, 0.00936205774900643, 
0.00895361502356821, 0.00898573973231054, 0.00755389557122373, 
0.0075997880122842, 0.00758602027996606, 0.00788891039096519, 
0.00775582231188981, 0.00781777710732146, 0.00793250820997264, 
0.00815738117116897, 0.00817114890348711), L3 = c(0.0878981140668165, 
0.0923722488117655, 0.0880612335627261, 0.0763632354274946, 0.0775283746839917, 
0.082748198553099, 0.0864766441738899, 0.0545518285458678, 0.0588628437949073, 
0.0566956847778226, 0.0579540351748395, 0.0588628437949073, 0.0606105526796531, 
0.0575345850425006, 0.0649681734989524, 0.0623116559941389), 
    L4 = c(0.0848333226476736, 0.0903004613645694, 0.088516691528972, 
    0.073088240743156, 0.0761924635739359, 0.0779299017254917, 
    0.0815206072387071, 0.036532542034421, 0.0375518390833337, 
    0.0378298291875827, 0.0388722920785162, 0.0384089752381013, 
    0.0395672673391385, 0.0402622425997609, 0.0436212896927688, 
    0.0423240025396071), L5 = c(0.22561265031896, 0.236273695432274, 
    0.208398062322137, 0.17396888632849, 0.135616814946827, 0.208075000349006, 
    0.217836087108599, 0.118148392542544, 0.198013927471506, 
    0.166792295353943, 0.149716162488461, 0.183937655785095, 
    0.18880666123728, 0.129386334036449, 0.223697354335399, 0.193560287413347
    ), L6 = c(0.177203322015849, 0.200068266889341, 0.190253179119034, 
    0.163732501780303, 0.16849603196228, 0.173259562144258, 0.184647722672334, 
    0.0603306628998872, 0.0772578120116587, 0.0753302439845328, 
    0.0664678622506211, 0.0696583196748293, 0.0774350596463369, 
    0.0615492403883001, 0.0991922068030903, 0.0796728110341496
    ), ele = c(666, 773, 766, 678, 787, 809, 857, 738, 748, 855, 
    500, 612, 588, 397, 261, 258), ndvi = c(-0.0283995447391665, 
    -0.0135402419404802, -0.0395083528567925, -0.0819444409706586, 
    -0.103586067539291, -0.0490366118119649, -0.0288226681221347, 
    -0.17071641510358, -0.136545326259316, -0.154017449391041, 
    -0.16240155229558, -0.146503439773889, -0.136064892814646, 
    -0.168614157809797, -0.122837753698589, -0.144167470536185
    ), nd_var = c(131202.666666667, 433640.666666667, 461440.222222222, 
    210334.888888889, 79202, 4817.55555555556, 55640.6666666667, 
    105110.222222222, 263000.888888889, 63993.5555555556, 95738.8888888889, 
    29214, 34386.8888888889, 74852.6666666667, 63421.5555555556, 
    47259.5555555556), nd_ps = c(7836, 7407, 8644, 7460, 8731, 
    7675, 8202, 8457, 8160, 8152, 7705, 8108, 8016, 7898, 7901, 
    7946), ldclas = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("agri", "coconut", 
    "DDF", "grass", "MDF", "rubber", "tea", "teak", "water", 
    "young rubber"), class = "factor")), .Names = c("L2", "L3", 
"L4", "L5", "L6", "ele", "ndvi", "nd_var", "nd_ps", "ldclas"), row.names = 95:110, class = "data.frame")

我使用了以下代码：

library(party)
ct <- ctree(ldclas ~ L2 + L3 + L4 + L5 + L6 + ele + ndvi + nd_var + nd_ps, data = tt)

我得到的结果如下：

1) ele <= 637; criterion = 1, statistic = 216.044
  2) L3 <= 0.09185959; criterion = 1, statistic = 187.431
    3) L5 <= 0.05141302; criterion = 1, statistic = 165.797
      4)*  weights = 12

但是我无法知道在回归树中哪个类的因变量是分离的。例如：哪类响应变量被分类为ele> 637以及在图上得到这个的代码是什么？

Answer 1

我不确定我是否完全理解您的问题而且您没有提供任何可重复的示例，所以我将尝试用一个独立的示例来包装它并在路上发表您的意见

因此，让我们运行一个带有3个不同级别的响应变量的分类树

library(party)
irisct <- ctree(Species ~ .,data = iris)
plot(irisct)

enter image description here

因此，该图显示了每个叶子（终端节点）中解释变量的分布（百分比）。例如，您可以看到在节点编号2中，我们有100％的Setosa。 n=50（您询问了评论）意味着我们在该特定节点中有50个观察结果（不是唯一的，但是总体而言）。现在，如果我们想要查看树结构，我们可以这样做：

irisct
##1) Petal.Length <= 1.9; criterion = 1, statistic = 140.264
##  2)*  weights = 50 
##1) Petal.Length > 1.9
##  3) Petal.Width <= 1.7; criterion = 1, statistic = 67.894
##    4) Petal.Length <= 4.8; criterion = 0.999, statistic = 13.865
##      5)*  weights = 46 
##    4) Petal.Length > 4.8
##      6)*  weights = 8 
##  3) Petal.Width > 1.7
##    7)*  weights = 46

您可以在2)* weights = 50中看到，wieghts = 50，这意味着我们在该节点中有50个观测值。由于我们未在weights中指定ctree()参数，默认情况下ctree会为每个观察设置1的权重（您可以将权重参数设置为不同，请参阅{{1} }）。您还可以在某些节点上看到?ctree，这意味着它们是终端节点。

现在向您提出主要问题，您可以通过使用以下代码获取每个节点中每个级别的分布（如果它是终端，则不是metter）

输出为您提供该特定节点中每个级别的分布

希望这就是你所需要的

具有因变量类的树回归图

1 个答案: