Data.Table:重新排列数据时代码出错--R

时间:2017-05-29 19:24:01

标签: r data.table

我正在尝试重新安排我的数据:

 Type Student Rt1 Rt2 Rt3 Rt4 Rt5 Rt6 Rt7 Rt8 Rt9 Rt10 Rt11
1:  SNR  789331 3.6 3.8 4.0 4.2 3.4 2.4 3.0 3.2 3.2  3.6  4.0
2:  SNR  805933 4.8 4.0 4.0 3.6 3.2 3.2 3.2 3.2 NaN  NaN  3.2
3:  SNR  826523 4.4 4.2 4.2 4.4 4.6 4.6 NaN 4.6 NaN  4.2  4.2
4:  SNR  832929 3.8 3.8 3.8 4.0 3.6 NaN NaN NaN NaN  NaN  NaN
5:  SNR  838607 5.0 5.0 5.0 5.0 5.0 4.4 4.2 4.4 3.8  NaN  3.6
6:  SNR  841903 3.2 4.2 4.2 NaN 3.6 NaN 4.0 3.4 4.2  NaN  4.6

到此:

  Student Type timePeriod  week Rating
1  789331  SNR        Rt1 Jan11    3.6
2  805933  SNR        Rt1 Jan11    4.8
3  826523  SNR        Rt1 Jan11    4.4
4  832929  SNR        Rt1 Jan11    3.8
5  838607  SNR        Rt1 Jan11    5.0
6  841903  SNR        Rt1 Jan11    3.2

以下是我一直在尝试使用的代码。它使列和一切都很好,但它分配的评级是不正确的。我不确定我做错了什么。

  pulse1<-NULL
  timePeriods<-c("Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11")
  weeks<-c("Jan11","Jan25","Feb1","Feb8", "Feb15", "Mar1", "Mar8", "Mar15","Mar22", "Mar29", "Apr5")
  measureType<-c("Time", 11)

  for (columnNumber in 1:11)
  {
    temp.data<-data.frame(Student=pulse$Student, Type=pulse$Type, 
                          timePeriod=timePeriods[columnNumber], week=weeks[columnNumber], 
                          Rating=pulse[, columnNumber+2])
    pulse1<-rbind(pulse1, temp.data)  
  }

此代码产生的输出是:

 Student Type timePeriod  week Rating
1  789331  SNR        Rt1 Jan11      3
2  805933  SNR        Rt1 Jan11      3
3  826523  SNR        Rt1 Jan11      3
4  832929  SNR        Rt1 Jan11      3
5  838607  SNR        Rt1 Jan11      3
6  841903  SNR        Rt1 Jan11      3

谢谢!!! 我在下面附上了我的数据:

> dput (pulse)
structure(list(Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = c("FYS", "SNR"), class = "factor"), Student = c(789331L, 
805933L, 826523L, 832929L, 838607L, 841903L, 843618L, 852125L, 
876406L, 879972L, 885650L, 888712L, 903303L, 796882L, 827911L, 
830271L, 831487L, 834598L, 836364L, 839802L, 855524L, 873527L, 
885409L, 894218L, 928026L, 932196L, 955389L, 956952L, 957206L, 
957759L, 959200L, 962490L, 968728L, 969005L, 971179L, 976863L, 
981621L, 952797L, 965873L, 967416L, 975424L), Rt1 = c(3.6, 4.8, 
4.4, 3.8, 5, 3.2, 4.4, 3.2, 3.6, 3.8, 4, 4.4, 3.6, NaN, NaN, 
NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, NaN, 
4, 3.8, 3, NaN, 3.6, NaN, 4.4, NaN, NaN, 3.6, 3.4, 4.2, NaN), 
    Rt2 = c(3.8, 4, 4.2, 3.8, 5, 4.2, 4.4, NaN, 4, 3.8, 4.4, 
    4, 3.8, 4.4, 4.2, 4.6, 4.4, 5, 4, 3.4, 5, 3.8, 4.8, 4.4, 
    4.6, 3.2, 5, 4.2, 4.4, 4.4, 3.4, 3.8, 3.8, 3.6, 4.8, 4.4, 
    4.8, NaN, 4.75, NaN, 4), Rt3 = c(4, 4, 4.2, 3.8, 5, 4.2, 
    4.6, 3.8, 4.2, 3.8, 4, NaN, 4.6, 4, 3.6, 4.8, 4.2, 3.8, 4, 
    2, 4.6, 3.8, 4.6, 4.4, 4.8, NaN, 4.6, NaN, 4, 4.4, NaN, 4.2, 
    3.6, 4.6, 4.4, 5, 4.6, NaN, 5, 4.2, 3.4), Rt4 = c(4.2, 3.6, 
    4.4, 4, 5, NaN, 4.4, 4, 4, NaN, 4, 4.2, 4, 4, NaN, 5, 4.6, 
    4, 4, 1.8, 4.6, 4.2, 4.8, 4.6, NaN, NaN, NaN, NaN, NaN, 4.4, 
    NaN, 4.2, 3.4, 4.4, NaN, 3.8, NaN, 4, 5, NaN, NaN), Rt5 = c(3.4, 
    3.2, 4.6, 3.6, 5, 3.6, 4.4, 3.8, 4, 4, 4.2, 4.4, NaN, 2.8, 
    3.4, 5, 4.4, 4.2, 3.6, 4.2, 4.2, 4, 4.4, 5, NaN, NaN, 4, 
    NaN, 4, 3.8, 3.2, 4.2, 3.4, NaN, 4.4, NaN, 5, 4.4, 4, 4.2, 
    NaN), Rt6 = c(2.4, 3.2, 4.6, NaN, 4.4, NaN, 4.4, 3.6, 2.4, 
    4.2, 4, 4.4, 3.4, 3.6, 3.4, 4.4, NaN, 4, 3.2, 2.2, 4.4, NaN, 
    4.4, 5, NaN, NaN, NaN, 3.2, 4.4, 4, 3, 4.6, 3, NaN, 4.25, 
    NaN, 4.2, 3.6, 3.8, 4.4, NaN), Rt7 = c(3, 3.2, NaN, NaN, 
    4.2, 4, 4.4, 3.6, 2.8, 4, 4.4, 4.6, 3.8, 2.8, NaN, 4.8, 4.2, 
    4, 3.6, 3, 4.8, 4.2, 4.2, 5, NaN, NaN, 4.4, 4.4, 4, 3.2, 
    NaN, NaN, 1, 4.4, 4.2, 3.6, 3.8, 4, 1.4, 4.6, 2.8), Rt8 = c(3.2, 
    3.2, 4.6, NaN, 4.4, 3.4, 4.2, 4, 3.8, 4, 4.2, 3.8, 3.6, 1.4, 
    NaN, NaN, 4.6, NaN, 3.6, 4.2, 4, 4.4, 4.4, NaN, NaN, NaN, 
    4.6, 4.2, 4.2, 3.2, 4, 3.6, 3, 4.6, 4.8, 3.6, 4.2, 4.2, 2.2, 
    5, NaN), Rt9 = c(3.2, NaN, NaN, NaN, 3.8, 4.2, 3.6, NaN, 
    3, 4, 3.8, 4.2, 3.8, 2.2, NaN, 5, 4.8, NaN, 3.4, 2.8, 5, 
    NaN, NaN, NaN, NaN, NaN, 4.4, NaN, 4, 3, NaN, 1, 3, NaN, 
    NaN, NaN, NaN, NaN, 1.6, NaN, NaN), Rt10 = c(3.6, NaN, 4.2, 
    NaN, NaN, NaN, 4, 3.4, 3.2, 4, 4, 4, 3.6, 2, NaN, NaN, 4.4, 
    4, 3.4, 1.8, 4.2, 3.8, 3.8, 4, NaN, NaN, NaN, 4.2, 3.8, 4.2, 
    4.2, 3.2, 1.6, 4.6, NaN, 4, 5, 4, 3.4, NaN, 3.6), Rt11 = c(4, 
    3.2, 4.2, NaN, 3.6, 4.6, 4.4, 4.6, 4.2, NaN, NaN, 4.6, 4.6, 
    4.2, NaN, 5, 4.6, 4.2, 4, 4, 4.6, 4.4, 3.6, 5, NaN, NaN, 
    NaN, NaN, 4.4, 4.6, NaN, NaN, 1.6, 4.6, 5, NaN, 5, 4, NaN, 
    NaN, NaN)), row.names = c(NA, -41L), class = c("data.table", 
"data.frame"), .Names = c("Type", "Student", "Rt1", "Rt2", "Rt3", 
"Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11"), .internal.selfref = <pointer: 0x0000000007b30788>)

2 个答案:

答案 0 :(得分:3)

一种可能的解决方案是融合数据集,然后合并到weeks

weeks_time <- data.frame(timePeriod=c("Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11"),
                    weeks=c("Jan11","Jan25","Feb1","Feb8", "Feb15", "Mar1", "Mar8", "Mar15","Mar22", "Mar29", "Apr5"))

pulse_m <- melt(pulse, id.vars = c("Student", "Type"), variable.name = "timePeriod", value.name = "Rating")
merge(pulse_m, weeks_time)

  #   timePeriod Student Type Rating weeks
  #1:        Rt1  789331  SNR    3.6 Jan11
  #2:        Rt1  805933  SNR    4.8 Jan11
  #3:        Rt1  826523  SNR    4.4 Jan11
  #4:        Rt1  832929  SNR    3.8 Jan11
  #5:        Rt1  838607  SNR    5.0 Jan11
 #---                                     
#447:       Rt11  981621  FYS    5.0  Apr5
#448:       Rt11  952797  FYS    4.0  Apr5
#449:       Rt11  965873  FYS    NaN  Apr5
#450:       Rt11  967416  FYS    NaN  Apr5
#451:       Rt11  975424  FYS    NaN  Apr5

这可以避免任何循环。

答案 1 :(得分:1)

以下是使用dplyrtidyr

的方法
library(dplyr)
library(tidyr)

df.weeks <- data.frame(timePeriod = c("Rt1", "Rt2", "Rt3", "Rt4", "Rt5", "Rt6", "Rt7", "Rt8", "Rt9", "Rt10", "Rt11"),
                       week = c("Jan11","Jan25","Feb1","Feb8", "Feb15", "Mar1", "Mar8", "Mar15","Mar22", "Mar29", "Apr5"))


pulse <- pulse %>% 
         gather(timePeriod, Rating, Rt1:Rt11) %>% 
         merge(df.weeks) %>%
         select(Student, Type, timePeriod, week, Rating)