递归后为什么矩阵为空?

时间:2017-05-26 18:03:15

标签: r recursion

我刚开始使用R编程,我正在使用UCI的葡萄酒数据在R中编写CART算法。

正在使用递归来构建完整的CART树。问题是即使一切正常执行,存储树的矩阵()也会在CART函数中的以下行之后被完全删除:

tree <- CART(tree, data[indexL,], K, impurity, size_tree + 1, constants)

R是否有一种特定的方式来执行递归,或者在执行上一行之后只有一个缺失的步骤?

data <- read.csv(file = "wine.csv", header = FALSE)
names(data) <- list("U1", "U2", "U3", "U4", "U5", "U6", "U7", "U8", "U9", "U10", "U11", "U12", "U13", "U14")
data <- subset(data, select = c(U2:U14, U1))

constants <- list(1, 2, 3, 4, 5, 6)
names(constants) <- list("parent", "k", "v", "left", "right", "y")

yDistribution <- function(data, K){
  PY <- matrix(0, 1, K)
  for(i in c(1:K)){
    PY[i] <- length(which(data[, ncol(data)] == i))/nrow(data)
  }
  return(PY)
}

impurityGini <- function(PY){
  i <- 1 - sum(PY^2)
  return(i)
}


CART <- function(tree, data, K, impurity, index, constants){
  rows <- dim(data)[1]
  columns <- dim(data)[2]
  columns <- columns - 1

  if(length(tree) == 1){
    tree <- matrix(0, 1, 6)
  }

  PY <- yDistribution(data, K)
  y <- which.max(PY)

  tree[index, constants$y] <- y
  i <- impurityGini(PY)

  if(i == 0){
    return()
  }

  uniques <- integer(columns)
  for(j in c(1:columns)){
    uniques[j] <- length(unique(data[, j]))
  }

  S <- matrix(0, sum(uniques) - columns, 2)
  offs <- 1

  for(k in c(1:columns)){
    u <- sort(unique(data[, k]))

    S[offs:(offs + uniques[k] - 2), 1] <- k
    S[offs:(offs + uniques[k] - 2), 2] <- (u[1:length(u)-1]+u[2:length(u)])/2
    offs <- offs + uniques[k] - 1
  }

  E <- matrix(0, length(S[, 1]), 1)
  for(j in 1:length(S[, 1])){
    k <- S[j, 1]
    v <- S[j, 2]
    indexL <- which(data[, k] < v)
    indexR <- which(data[, k] >= v)
    impurityL <- impurityGini(yDistribution(matrix(data[indexL, length(data)], length(indexL), 1), k))
    impurityR <- impurityGini(yDistribution(matrix(data[indexR, length(data)], length(indexR), 1), k))
    E[j] <- length(indexL)/rows * impurityL + length(indexR)/rows * impurityR

  }
  j <- which.min(E)
  minE <- E[j]
  if(minE >= i){
    return()
  }

  k <- S[j, 1]
  v <- S[j, 2]
  indexL <- matrix(which(data[, k] < v), length(which(data[, k] < v)), 1)
  indexR <- matrix(which(data[, k] >= v), length(which(data[, k] >= v)), 1)

  tree[index, constants$k] <- k
  tree[index, constants$v] <- v

  size_tree <- dim(tree)[1]
  tree[index, constants$left] <- size_tree + 1
  tree[index, constants$right] <- size_tree + 2
  tree <- rbind(tree, c(index, 0, 0, 0, 0, 0))
  tree <- rbind(tree, c(index, 0, 0, 0, 0, 0))
  print(tree)
  tree <- CART(tree, data[indexL,], K, impurity, size_tree + 1, constants)
  tree <- CART(tree, data[indexR,], K, impurity, size_tree + 2, constants)
  return(tree)
}

0 个答案:

没有答案