我训练了一个模型来预测stackoverfow问题的标签。但是现在我想在系统中添加一个函数,以便当我在文本框中提供问题时,它应该自动预测该问题的标签。
我在下面提供了代码。请为我解决我的问题。
rm(list = ls())
# for installing the library run following codes
install.packages("readr")
install.packages("caret")
install.packages("tm")
install.packages("RTextTools")
library(readr)
library(caret)
library(tm)
library(RTextTools)
# set file path
setwd("C:/Users/topic modeling/topic modeling")
# read the questions file and tags file
train = read_csv("train.csv")
# selected only top 40000 records for memory constarint
train = train[1:40000,]
# check the tags
tags = data.frame(table(train$Tag))
names(tags) = c("Tags", "Count")
tags = tags[order(-tags$Count),]
print(head(tags))
# selected only top 15 tags and related data for processing
tags = tags$Tags[1:15]
train = subset(train, train$Tag %in% tags)
train$TagId = match(train$Tag,tags)
print(nrow(train))
# 90% we took for training
x = round(nrow(train) * 0.9)
# cleaning the data
train$Title = toupper(train$Title)
train$Title = gsub("[<].*[>]", "", as.character(train$Title))
train$Title = gsub("&", "", train$Title)
train$Title = gsub("(RT|via)((?:\\b\\W*@\\w+)+)", "", train$Title)
train$Title = gsub("@\\w+", "", train$Title)
train$Title = gsub("[[:punct:]]", "", train$Title)
train$Title = gsub("[[:digit:]]", "", train$Title)
train$Title = gsub("http\\w+", "", train$Title)
train$Title = gsub("[ \t]{2,}", "", train$Title)
train$Title = gsub("^\\s+|\\s+$", "", train$Title)
# create document matrix
matrix <- create_matrix(train["Title"], language="english", weighting=tm::weightTfIdf)
container <- create_container(matrix,train$TagId,trainSize=1:x, testSize=(x+1):nrow(train),virgin=FALSE)
# create maxent model using SVM
maxent_model <- train_models(container,algorithms=c("SVM"))
maxent_results <- classify_models(container,maxent_model)
# test the model on test data
maxenttestData = train[(x+1):nrow(train),]
maxenttestData = data.frame(maxenttestData, maxent_results)
write.csv(maxenttestData, "MAXENT.csv", row.names = FALSE)
save(maxent_model, file = "maxent.rda")
new = read_csv("new.csv")
newtestdata= new[1:nrow(new),]
#print(newtestdata)
load("maxent.rda")
predict(maxent_model, newdata = newtestdata)
此代码显示错误“ UseMethod(“ predict”)中的错误: 没有适用于“预测”的适用方法应用于“列表”类的对象