Question

我有一个包含句子和标签的训练集（1 et -1）。创建一个svm模型后。我想预测新数据的标签和分数。这是我的代码：

library(tm);
require(RcmdrPlugin.temis);
library(RTextTools);
require(e1071)
news=read.csv("C:..polarity.csv",header=F,sep=';') # the training data
traindata <- as.data.frame(news[1:196,]);
trainvector <- as.vector(traindata[,1]); # Choose the sentences without the labels
trainsource <- VectorSource(trainvector);
traincorpus <- Corpus(trainsource) # Create a training corpus
#Cleaning the training corpus
traincorpus <- tm_map(traincorpus,stripWhitespace)
traincorpus <- tm_map(traincorpus,tolower)
traincorpus <- tm_map(traincorpus, removeWords,stopwords("french"))
traincorpus <- tm_map(traincorpus,removeNumbers)
traincorpus <- tm_map(traincorpus, function(x)
gsub("(['’\n??]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+", " ", x))
corpus1 <- Corpus(DirSource("C.../file", encoding="UTF-8"),
readerControl=list(language="fr"))  # Import the corpus of test
testcorpus=corpus1 # create a copy and cleaning it
testcorpus <- tm_map(testcorpus,stripWhitespace)
testcorpus <- tm_map(testcorpus,tolower)
testcorpus <- tm_map(testcorpus, removeWords,stopwords("french"))
testcorpus <- tm_map(testcorpus,removeNumbers)
testcorpus <- tm_map(testcorpus, function(x)
gsub("(['’\n??]|[[:punct:]]|[[:space:]]|[[:cntrl:]])+", " ", x))
#Creating the dtm of test and train corpus with words stemming 
tr_matrix <- create_matrix(traincorpus, language="french", stemWords=TRUE,
removeStopwords=TRUE,weighting=weightTf)
tr=as.matrix(tr_matrix)
ts_matrix <- create_matrix(testcorpus, language="french", stemWords=TRUE,
removeStopwords=TRUE,weighting=weightTf)
ts = as.matrix(ts_matrix)
y=traindata[,2]
model<-svm(y~tr)
pred=fitted(model)
pred <- predict(model, ts, decision.values = TRUE, probability = TRUE,na.action=TRUE)

我有这个错误： Erreur dans矩阵（ret $ dec，nrow = nrow（newdata），byrow = TRUE，dimnames = list（rowns，： la longueur de'dinnames'[1] n'estpaségaleàl'étenduedutableau 我想这是因为训练数据和新数据之间的结构差异。有谁可以帮助我吗？感谢

predict.svm函数R文本挖掘？

0 个答案: