Question

我有一个数据框dfSub，里面有许多参数。这是基于每小时的能源使用数据。我需要按小时对数据进行排序，例如每小时从数据框中获取所有能量值。因此，我希望每小时有24列数据框，行充满能量值。

小时指定为1:24，数据框中链接为dfSub $ hr。热量是dfSub $ heat

我构造了一个for循环并尝试使用cbind保存，但它不起作用，错误消息是关于行和列的不同大小。

我打印结果并在屏幕上看到它们，但不能保存为d（数据帧）

这是代码：

  d = NULL
    for (i in 1:24) {
      subh= subset(dfSub$heat, dfSub$hr == i)

      print(subh)

    d = cbind(d, as.data.frame(subh))
    }

追加功能不适用，因为我不知道每小时预期的热值长度。

感谢任何帮助。

dfSub的一部分

    hr wk month dyid wend     t heat
    1  2     1    1    0  -9.00   81
    2  2     1    1    0  -8.30   61
    3  2     1    1    0  -7.80   53
    4  2     1    1    0  -7.00   51
    5  2     1    1    0  -7.00   30
    6  2     1    1    0  -6.90   31
    7  2     1    1    0  -7.10   51
    8  2     1    1    0  -6.50   90
    9  2     1    1    0  -8.90  114
   10  2     1    1    0  -9.90  110
   11  2     1    1    0 -11.70  126
   12  2     1    1    0  -9.70  113
   13  2     1    1    0 -11.60  104
   14  2     1    1    0 -10.00  107
   15  2     1    1    0 -10.20  117
   16  2     1    1    0  -9.00   90
   17  2     1    1    0  -8.00  114
   18  2     1    1    0  -7.80   83
   19  2     1    1    0  -8.10   82
   20  2     1    1    0  -8.20   61
   21  2     1    1    0  -8.80   34
   22  2     1    1    0  -9.10   52
   23  2     1    1    0 -10.10   41
   24  2     1    1    0  -8.80   52
    1  2     1    2    0  -8.70   44
    2  2     1    2    0  -8.40   50
    3  2     1    2    0  -8.10   33
    4  2     1    2    0  -7.70   41
    5  2     1    2    0  -7.80   33
    6  2     1    2    0  -7.50   43
    7  2     1    2    0  -7.30   40
    8  2     1    2    0  -7.10   8

预期输出为：

hr1 hr2 hr3 hr4..... hr24
81  61  53  51 ..... 52
44  50  33  41

Answer 1

在这种情况下，可以避免使用for-loop。一个选项是使用tidyr::spread以宽格式转换每小时数据。

library(tidyverse)
df %>% select(-t, -wend) %>%
  mutate(hr = sprintf("hr%02d",hr)) %>%
  spread(hr, heat)

<强>结果：

#   wk month dyid hr01 hr02 hr03 hr04 hr05 hr06 hr07 hr08 hr09 hr10 hr11 hr12 hr13 hr14 hr15 hr16 hr17 hr18 hr19 hr20 hr21 hr22 hr23 hr24
# 1  2     1    1   81   61   53   51   30   31   51   90  114  110  126  113  104  107  117   90  114   83   82   61   34   52   41   52
# 2  2     1    2   44   50   33   41   33   43   40    8   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA

数据：

df <- read.table(text = 
"hr wk month dyid wend     t heat
1  2     1    1    0  -9.00   81
2  2     1    1    0  -8.30   61
3  2     1    1    0  -7.80   53
4  2     1    1    0  -7.00   51
5  2     1    1    0  -7.00   30
6  2     1    1    0  -6.90   31
7  2     1    1    0  -7.10   51
8  2     1    1    0  -6.50   90
9  2     1    1    0  -8.90  114
10  2     1    1    0  -9.90  110
11  2     1    1    0 -11.70  126
12  2     1    1    0  -9.70  113
13  2     1    1    0 -11.60  104
14  2     1    1    0 -10.00  107
15  2     1    1    0 -10.20  117
16  2     1    1    0  -9.00   90
17  2     1    1    0  -8.00  114
18  2     1    1    0  -7.80   83
19  2     1    1    0  -8.10   82
20  2     1    1    0  -8.20   61
21  2     1    1    0  -8.80   34
22  2     1    1    0  -9.10   52
23  2     1    1    0 -10.10   41
24  2     1    1    0  -8.80   52
1  2     1    2    0  -8.70   44
2  2     1    2    0  -8.40   50
3  2     1    2    0  -8.10   33
4  2     1    2    0  -7.70   41
5  2     1    2    0  -7.80   33
6  2     1    2    0  -7.50   43
7  2     1    2    0  -7.30   40
8  2     1    2    0  -7.10   8",
header = TRUE, stringsAsFactors = FALSE)

Answer 2

用tidyr：

> df<-read.fwf(textConnection(
+ "hr,wk,month,dyid,wend,t,heat
+     1  2     1    1    0  -9.00   81
+     2  2     1    1    0  -8.30   61
+     3  2     1    1    0  -7.80   53
+     4  2     1    1    0  -7.00   51
+     5  2     1    1    0  -7.00   30
+     6  2     1    1    0  -6.90   31
+     7  2     1    1    0  -7.10   51
+     8  2     1    1    0  -6.50   90
+     9  2     1    1    0  -8.90  114
+    10  2     1    1    0  -9.90  110  
+    11  2     1    1    0 -11.70  126
+    12  2     1    1    0  -9.70  113
+    13  2     1    1    0 -11.60  104
+    14  2     1    1    0 -10.00  107
+    15  2     1    1    0 -10.20  117
+    16  2     1    1    0  -9.00   90
+    17  2     1    1    0  -8.00  114
+    18  2     1    1    0  -7.80   83
+    19  2     1    1    0  -8.10   82
+    20  2     1    1    0  -8.20   61
+    21  2     1    1    0  -8.80   34
+    22  2     1    1    0  -9.10   52
+    23  2     1    1    0 -10.10   41
+    24  2     1    1    0  -8.80   52
+     1  2     1    2    0  -8.70   44
+     2  2     1    2    0  -8.40   50
+     3  2     1    2    0  -8.10   33
+     4  2     1    2    0  -7.70   41
+     5  2     1    2    0  -7.80   33
+     6  2     1    2    0  -7.50   43
+     7  2     1    2    0  -7.30   40
+     8  2     1    2    0  -7.10   8"
+ ),header=TRUE,sep=",",widths=c(5,3,6,5,5,7,5))
> 
> library(tidyr)
> df1 <- select(df,dyid,hr,heat)
> df2 <- spread(df1,hr,heat)
> colnames(df2)[2:ncol(df2)] <- paste0("hr",colnames(df2)[2:ncol(df2)])
> df2
  dyid hr1 hr2 hr3 hr4 hr5 hr6 hr7 hr8 hr9 hr10 hr11 hr12 hr13 hr14 hr15 hr16 hr17 hr18 hr19 hr20 hr21 hr22 hr23 hr24
1    1  81  61  53  51  30  31  51  90 114  110  126  113  104  107  117   90  114   83   82   61   34   52   41   52
2    2  44  50  33  41  33  43  40   8  NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA

＆GT;

Answer 3

我找到了帮助我在这里解决任务的解决方案：Append data frames together in a for loop

使用空列表并稍后在数据框中组合

datalist = list()
for (i in 1:24) {
  subh= subset(dfSub$heat, dfSub$hr == i)
  datalist[[i]] = subh
}
big_data = do.call(rbind, datalist)

cbind和rbind都工作。谢谢大家的帮助：）

如何使用cbind

3 个答案: