Question

我有一个包含600多列的大型数据集。提取物看起来像这样：

   Set ARng_1 = Range("Annual_Series_1")
   Set MRng_1 = Range("Monthly_Series_1")



      With ActiveChart
        For i = .SeriesCollection.Count To 1 Step -1
              .SeriesCollection(i).Delete
        Next i
        .ChartType = xlLineMarkers
        .SeriesCollection.Add Source:=ARng_1
        .SeriesCollection.Add Source:=MRng_1
        .SeriesCollection(2).AxisGroup = 2
        .Axes(xlCategory, xlPrimary).HasTitle = True
        .Axes(xlCategory, xlPrimary).AxisTitle.Characters.Text = "Dates"
        .Axes(xlValue, xlPrimary).HasTitle = True
        .Axes(xlValue, xlSecondary).HasTitle = True
        .Axes(xlValue, xlPrimary).AxisTitle.Characters.Text = "Annual"
        .Axes(xlValue, xlSecondary).AxisTitle.Characters.Text = "Monthly"
        .Axes(xlCategory).HasMajorGridlines = True
        .SeriesCollection(1).Format.Line.ForeColor.RGB = RGB(255, 0, 0)
        .SeriesCollection(2).Format.Line.ForeColor.RGB = RGB(255, 0, 0)
        .SeriesCollection(1).XValues = date_rng
      End With

现在我想将其转换/转置为以下结构：

  category q151_a34 q151_a35 q151_a36 q151_a37 q152_a34 q152_a35 q152_a36 q152_a37
1 men      2         12      37       24       0        7        19       23
2 women    3         10      23       19       2        4        29       16

我找到了两步解决方案： 第1步：拆分变量

  category  var     a34 a35 a36 a37
1 men       q151    2   12   37  24
2 men       q152    0   7    19  23
3 women     q151    3   10   23  19
4 women     q152    2   4    29  16

第2步：转置

get.first<- function(x) sapply(strsplit(x, "\\_"), `[[`, 1)
data<- within(data, var<- get.first(as.character(variable)))
get.second <- function(x) sapply(strsplit(x, "\\_"), `[[`, 2)
data<- within(data, answer<- get.first(as.character(variable)))

是否有更好/更高效的方式？

非常感谢

Answer 1

您可以使用tidyr组软件包中的tidyverse：

# install.packages('tidyverse')


library(tidyverse)

# Data

dff <- structure(list(category = structure(1:2, .Label = c("men", "women" ), class = "factor"), q151_a34 = 2:3, q151_a35 = c(12L, 10L), 
    q151_a36 = c(37L, 23L), q151_a37 = c(24L, 19L), q152_a34 = c(0L, 
    2L), q152_a35 = c(7L, 4L), q152_a36 = c(19L, 29L), q152_a37 = c(23L, 
    16L)), .Names = c("category", "q151_a34", "q151_a35", "q151_a36",  "q151_a37", "q152_a34", "q152_a35", "q152_a36", "q152_a37"), class = "data.frame", row.names = c(NA, 
-2L))

# Code

gather(dff, key=Qs, value = values, q151_a34:q152_a37) %>%
    separate(Qs, into = c('var','A')) %>%
    spread(A, values)


# Output

  category  var a34 a35 a36 a37
1      men q151   2  12  37  24
2      men q152   0   7  19  23
3    women q151   3  10  23  19
4    women q152   2   4  29  16

基本上，首先需要在gather函数的帮助下将列名移动到行中，然后使用separate函数用下划线分隔值。这会将两个以q开头的值放在一列中，同时将这些值以a开头放在另一列中。从这里开始，下一步是在a函数的帮助下，将以spread开头的值作为列名称。结合起来，这些步骤应该达到您想要的输出。

编辑：

如果您不想处理获取要插入行的列的名称，则可以尝试使用以下内容：

gather(dff, key=Qs, value = values, -category) %>%
    separate(Qs, into = c('var','A')) %>%
    spread(A, values)

这与之前的解决方案完全相同，唯一的区别是您只关注自己希望保留列的列。从本质上讲，除了列名category之外，这会降低所有内容。

我希望这会有所帮助。

将列组转换为行

1 个答案:

编辑：