循环以创建新变量R.

时间:2014-11-12 03:42:24

标签: r

使用R中的循环解决方案是否有更简洁的方法来执行以下操作?截至目前,我正在使用剪切和粘贴,这会更改参考列,因为我希望将结果附加到现有数据集作为新变量。提前谢谢。

library(tm)
library(SnowballC)
data <- data.frame(c("asd"), c(3))
list <- c("eat", "drink")

text <- c("I would like to eat and drink all day")

text <- Corpus(VectorSource(text))
text <- tm_map(text, content_transformer(tolower))
text <- tm_map(text, removePunctuation)
text <- tm_map(text, removeNumbers)
text <- tm_map(text, removeWords, stopwords('english'))
text <- tm_map(text, stemDocument)
text <- tm_map(text, stripWhitespace)
text <- as.data.frame(text)
text <- text$text
text <- strsplit(as.matrix(text), ' +')
text <- lapply(text, lapply, function(z) paste0(' ', z))
text <- lapply(text, unlist)

implode <- function(..., sep='') {
  paste(..., collapse=sep)
}

wordProportion <- function(x, corpus) {
  y <- length(grep(corpus, x))
  return(y/ length(x))
}

estimateProportion <- function (corpus, domain) {
  x <- as.data.frame(extractCorpus2(domain)) %>% distinct(text) 
  x <- implode(x$text, sep=" | ")
  x <- unlist(lapply(corpus, wordProportion, x))
  x <- findInterval(x, quantile(x[x > 0], seq(0, .9, .1)))
}

data$topic_1 <- estimateProportion(text, list)

1 个答案:

答案 0 :(得分:1)

这是一个循环,你必须稍后添加列标题,因为你不能使用$ this way

for (i in 1:n) {
 train[,(i + y)] <- estimateProportion(train_corpus, train_lda_term[, i])
 # where y is the number of preceding columns and n is however many columns you want to add
}
colnames(train[(1+y):(n+y)] <- paste0("TOPIC_0",1:n)

编辑:除非列车是列表,然后将train[,(i + y)]替换为train[(i + y)],将colnames替换为names