Question

我使用24个数据集在R中生成了一个名为 mat 的矩阵。每个数据集的维度为[1,1：10000]。我使用 rbind 将这24个数据集组合在一个矩阵中。矩阵现在具有[1：24,1：10000]的尺寸。然后，我在矩阵的末尾添加了一列 Type ，使用 cbind 指定矩阵每行中数据集的类型（A，B，C）。我现在想要执行属于3种主要数据类型（A，B，C）的这24个数据集的LDA。为此，我想将完整矩阵转换为以下类型的数据帧。

Datasets  V1  V2 V3  ......... V10000 Type
spectra1  ..  .. ..  ........   ..     A
spectra2  ..  .. ..  ........   ..     B
spectra3  ..  .. ..  ........   ..     B
spectra4  ..  .. ..  ........   ..     C

我尝试使用

as.data.frame(as.table(mat))

但这并没有给出理想的结果。有人可以帮我这个吗？

Answer 1

我认为添加一些细节可能会有所帮助 - 但请看一下R文本的介绍。

 # ----------------------------------------------------------------
 # Create data - assume variables defined columnwise

 (mat1 <- matrix(1:20 , 4))

    # [,1] [,2] [,3] [,4] [,5]
# [1,]    1    5    9   13   17
# [2,]    2    6   10   14   18
# [3,]    3    7   11   15   19
# [4,]    4    8   12   16   20

 (mat2 <- cbind(mat1,letters[1:2]))

    # [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] "1"  "5"  "9"  "13" "17" "a" 
# [2,] "2"  "6"  "10" "14" "18" "b" 
# [3,] "3"  "7"  "11" "15" "19" "a" 
# [4,] "4"  "8"  "12" "16" "20" "b" 

 # ----------------------------------------------------------------
# Look at variable class

str(mat1)
# int [1:4, 1:5] 1 2 3 4 5 6 7 8 9 10 ...

str(mat2)
# chr [1:4, 1:6] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" ...

#By adding a character variable to the matrix the integer values are 
#converted to character.

 # ---------------------------------------------------------------
# Matrix names
 colnames(mat1) # no names have beed assigned
#NULL

 #To assign names use 
 colnames(mat1) <- paste0("var",1:5)

 # -----------------------------------------------------------------
# Convert to dataframe
(df1 <- data.frame(mat1))

#  var1 var2 var3 var4 var5
#1    1    5    9   13   17
#2    2    6   10   14   18
#3    3    7   11   15   19
#4    4    8   12   16   20

 #Note the matrix names are carried over - if this didn't happen perhaps 
 #your matrix is not named.


 (df2 <- data.frame(mat2))
   X1 X2 X3 X4 X5 X6
#1  1  5  9 13 17  a
#2  2  6 10 14 18  b
#3  3  7 11 15 19  a
#4  4  8 12 16 20  b

#Note if the matrix does not have names R generates some when conveerting to 
 #a dataframe.

 # -----------------------------------------------------------------------
 # Look at variable class again
str(df1)
#'data.frame':  4 obs. of  5 variables:
# $ var1: int  1 2 3 4
# $ var2: int  5 6 7 8
 #$ var3: int  9 10 11 12
 #$ var4: int  13 14 15 16
 #$ var5: int  17 18 19 20

# or 
sapply(df1 , class)
     #var1      var2      var3      var4      var5 
  #"integer" "integer" "integer" "integer" "integer" 

 #As mat1 were all integer the dataframe inherits the variable class.


 str(df2)
#'data.frame':  4 obs. of  6 variables:
# $ X1: Factor w/ 4 levels "1","2","3","4": 1 2 3 4
# $ X2: Factor w/ 4 levels "5","6","7","8": 1 2 3 4
# $ X3: Factor w/ 4 levels "10","11","12",..: 4 1 2 3
# $ X4: Factor w/ 4 levels "13","14","15",..: 1 2 3 4
# $ X5: Factor w/ 4 levels "17","18","19",..: 1 2 3 4
# $ X6: Factor w/ 2 levels "a","b": 1 2 1 2

 #As mat2 were character when converting to a dataframe these are
#coerced to factors.

 # What to do
# Leave them as character when converting to dataframe using stringsAsFactors argument
 df2 <- data.frame(mat2, stringsAsFactors=FALSE)


# Then convert variables from character to numeric.
 df2$X2 <- as.numeric(df2$X2)

#or convert all at once
df2[,1:5] <- sapply(df2[,1:5] , as.numeric)

str(df2)
'data.frame':   4 obs. of  6 variables:
 $ X1: num  1 2 3 4
 $ X2: num  5 6 7 8
 $ X3: num  9 10 11 12
 $ X4: num  13 14 15 16
 $ X5: num  17 18 19 20
 $ X6: chr  "a" "b" "a" "b"

 #Note if your dataframe has factors that should be numeric
 #you can convert them with as.numeric(as.character(variable))


 # ----------------------------------------------
# Dataframe names
names(df2) <- c(paste0("var",1:5),"type")

将矩阵转换为数据框

1 个答案: