如何将glmnet预测向量作为常规列添加到数据帧

时间:2015-08-12 10:45:47

标签: r glmnet

我想用glmnet包进行分类项目。我设法构建模型并获得每行的prdiction,但我没有看到将它用作常规变量的方法。如何将预测向量添加到数据框中作为我能够处理的常规列(如计算其平均值等)。 这是我的玩具数据框和代码。另外,我看到glmnet将预测向量称为“1”,为什么我该怎么改变呢?

df <- read.table(text = "target birds    wolfs     
                     1        9         7 
                     1        8         4 
                     0        2         8 
                     1        2         3 
                     1        8         3 
                     0        1         2 
                     1        7         1 
                     0        1         5 
                     1        9         7 
                     1        8         7 
                     0        2         7 
                     0        2         3 
                     1        6         3 
                     0        1         1 
                     0        3         9 
                     0        1         1  ",header = TRUE)
library(Matrix)
some_matrix <- data.matrix(df[,2:3])
Matrix(some_matrix, sparse=TRUE)
set.seed(2)
split <- df[1:10,]
split <- NULL
train <- df[1:10,]
test<-df[11:16,]
train_sparse <- sparse.model.matrix(~.,train[2:3])
test_sparse <- sparse.model.matrix(~.,test[2:3])
library(glmnet)  
fit <- glmnet(train_sparse,train[,1],family='binomial')
cv <- cv.glmnet(train_sparse,train[,1],nfolds=3)
pred <- predict(fit, test_sparse,type="response", s=cv$lambda.min)
pred
            1
11 0.09691107
12 0.82760730
13 1.00000000
14 0.26871408
15 0.58367863
16 0.26871408
c<-cbind(test,pred)
target birds wolfs          1
11      0     2     7 0.09691107
12      0     2     3 0.82760730
13      1     6     3 1.00000000
14      0     1     1 0.26871408
15      0     3     9 0.58367863
16      0     1     1 0.26871408
typeof(c$1)
Error: unexpected numeric constant in "typeof(c$1"
mean(c$1)
Error: unexpected numeric constant in "mean(c$1"

> str(c)
'data.frame':   6 obs. of  5 variables:
 $ target: int  0 0 1 0 0 0
 $ birds : int  2 2 6 1 3 1
 $ wolfs : int  7 3 3 1 9 1
 $ 1     : num  0.0969 0.8276 1 0.2687 0.5837 ...
 $ pred  : num [1:6, 1] 0.0969 0.8276 1 0.2687 0.5837 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr  "11" "12" "13" "14" ...
  .. ..$ : chr "1"

1 个答案:

答案 0 :(得分:1)

df <- read.table(text = "target birds    wolfs     
                     1        9         7 
                     1        8         4 
                     0        2         8 
                     1        2         3 
                     1        8         3 
                     0        1         2 
                     1        7         1 
                     0        1         5 
                     1        9         7 
                     1        8         7 
                     0        2         7 
                     0        2         3 
                     1        6         3 
                     0        1         1 
                     0        3         9 
                     0        1         1  ",header = TRUE)
set.seed(44)

library(Matrix)
library(glmnet)
some_matrix <- data.matrix(df[,2:3])
Matrix(some_matrix, sparse=TRUE)
set.seed(2)
split <- df[1:10,]
split <- NULL
train <- df[1:10,]
test<-df[11:16,]
train_sparse <- model.matrix(~.,train[2:3]) #changed to model.matrix 
test_sparse <- model.matrix(~.,test[2:3])
library(glmnet)  
fit <- glmnet(train_sparse,train[,1],family='binomial')
cv <- cv.glmnet(train_sparse,train[,1],nfolds=3) # i believe you can do all of the modeling in here. You should skip the "fit" part.
pred <- predict(fit, test_sparse,type="response", s=cv$lambda.min)

test_sparse<-as.data.frame(test_sparse[,-1]) #removing intercept
test_sparse$predictionColumn<-c(pred) #adding in new column