拼接加权指数并获得平滑值

时间:2012-05-10 22:48:18

标签: r time-series

我刚才问过this问题,所提供的答案对我来说是令人满意的。但是,现在我重新编写代码,我有一个新问题。

以下是数据:

a=1:50
b=14:63
c=rep(1,50)
wa=c(rep(2,5),rep(6,5),rep(5,5),rep(2,15),rep(0,10),rep(1,10))
wb=c(rep(5,5),rep(2,5),rep(1,5),rep(6,15),rep(5,10),rep(0,10))
wc=c(rep(3,5),rep(2,5),rep(4,5),rep(2,15),rep(5,10),rep(9,10))
z=data.frame(a,b,c,wa,wb,wc)
z$ind=rowSums(z[,1:3]*z[,4:6])/rowSums(z[,4:6])

然后我们运行这两个函数:

changeWeight<-function(x){
  test <- NA
  for(i in 2:NROW(z)) {
    test[i] <- if(z$wa[i]-z$wa[i-1]==0) 0 else 1
  }
  return(test)
}

z$test<-changeWeight()

spliceValue <- function(x) {
  splice <- 0
  for(i in 2:NROW(z)) {
    splice[i] <- if(z$test[i]==1) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6]))/z$ind[i] else splice[i-1]
  }
  return(splice)
}
z$splice<-spliceValue()

所以这就是问题所在。我跑这个:

z$spind=ifelse(z$splice==0,z$ind,z$splice*z$ind)

并获取以下数据框:

    a  b c wa wb wc  ind test    splice    spind
1   1 14 1  2  5  3  7.5   NA 0.0000000  7.50000
2   2 15 1  2  5  3  8.2    0 0.0000000  8.20000
3   3 16 1  2  5  3  8.9    0 0.0000000  8.90000
4   4 17 1  2  5  3  9.6    0 0.0000000  9.60000
5   5 18 1  2  5  3 10.3    0 0.0000000 10.30000
6   6 19 1  6  2  2  7.6    1 1.4473684 11.00000
7   7 20 1  6  2  2  8.4    0 1.4473684 12.15789
8   8 21 1  6  2  2  9.2    0 1.4473684 13.31579
9   9 22 1  6  2  2 10.0    0 1.4473684 14.47368
10 10 23 1  6  2  2 10.8    0 1.4473684 15.63158
11 11 24 1  5  1  4  8.3    1 1.3975904 11.60000
12 12 25 1  5  1  4  8.9    0 1.3975904 12.43855
13 13 26 1  5  1  4  9.5    0 1.3975904 13.27711
14 14 27 1  5  1  4 10.1    0 1.3975904 14.11566
15 15 28 1  5  1  4 10.7    0 1.3975904 14.95422
16 16 29 1  2  6  2 20.8    1 0.5432692 11.30000
17 17 30 1  2  6  2 21.6    0 0.5432692 11.73462
18 18 31 1  2  6  2 22.4    0 0.5432692 12.16923
19 19 32 1  2  6  2 23.2    0 0.5432692 12.60385
20 20 33 1  2  6  2 24.0    0 0.5432692 13.03846
21 21 34 1  2  6  2 24.8    0 0.5432692 13.47308
22 22 35 1  2  6  2 25.6    0 0.5432692 13.90769
23 23 36 1  2  6  2 26.4    0 0.5432692 14.34231
24 24 37 1  2  6  2 27.2    0 0.5432692 14.77692
25 25 38 1  2  6  2 28.0    0 0.5432692 15.21154
26 26 39 1  2  6  2 28.8    0 0.5432692 15.64615
27 27 40 1  2  6  2 29.6    0 0.5432692 16.08077
28 28 41 1  2  6  2 30.4    0 0.5432692 16.51538
29 29 42 1  2  6  2 31.2    0 0.5432692 16.95000
30 30 43 1  2  6  2 32.0    0 0.5432692 17.38462
31 31 44 1  0  5  5 22.5    1 1.4577778 32.80000
32 32 45 1  0  5  5 23.0    0 1.4577778 33.52889
33 33 46 1  0  5  5 23.5    0 1.4577778 34.25778
34 34 47 1  0  5  5 24.0    0 1.4577778 34.98667
35 35 48 1  0  5  5 24.5    0 1.4577778 35.71556
36 36 49 1  0  5  5 25.0    0 1.4577778 36.44444
37 37 50 1  0  5  5 25.5    0 1.4577778 37.17333
38 38 51 1  0  5  5 26.0    0 1.4577778 37.90222
39 39 52 1  0  5  5 26.5    0 1.4577778 38.63111
40 40 53 1  0  5  5 27.0    0 1.4577778 39.36000
41 41 54 1  1  0  9  5.0    1 5.5000000 27.50000
42 42 55 1  1  0  9  5.1    0 5.5000000 28.05000
43 43 56 1  1  0  9  5.2    0 5.5000000 28.60000
44 44 57 1  1  0  9  5.3    0 5.5000000 29.15000
45 45 58 1  1  0  9  5.4    0 5.5000000 29.70000
46 46 59 1  1  0  9  5.5    0 5.5000000 30.25000
47 47 60 1  1  0  9  5.6    0 5.5000000 30.80000
48 48 61 1  1  0  9  5.7    0 5.5000000 31.35000
49 49 62 1  1  0  9  5.8    0 5.5000000 31.90000
50 50 63 1  1  0  9  5.9    0 5.5000000 32.45000

所以z$spind可以达到第10条记录,但是在记录11中没有平滑过渡,其中权重(wa,wb,wc)发生变化。这是因为我的spliceValue函数除以z$ind,其在记录6的第一次拼接后与z$spind的平滑索引不匹配。因此,spliceValue函数的任何后续运行都将返回错误的数字。我为权重的第二次和所有后续更改创建了一种解决方法。

splicetValue <- function(x) {
  splicet <- 0
  for(i in 2:NROW(z)) {
    splicet[i] <- if(z$test[i]==1&z$splice[i-1]!=0) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6]))*z$splice[i-1] else if(z$test[i]==1&z$splice[i-1]==0) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6])) else z$spind[i]
  }
  return(splicet)
}
z$splicet<-splicetValue()

spliceiValue <- function(x) {
  splice <- 0
  for(i in 2:NROW(z)) {
    splice[i] <- if(z$test[i]==1) (z$splicet[i])/z$ind[i] else splice[i-1]
  }
  return(splice)
}
z$splice<-spliceiValue()

z$spind=ifelse(z$splice==0,z$ind,z$splice*z$ind)

我现在唯一的问题是整个代码需要运行的时间少于z$test==1次。

通过输入z$test==1,我得到xx=sum(z$test,na.rm=T)次。现在我不知道如何在迭代循环中包装上面的代码,以便我的最终结果如下所示:

    a  b c wa wb wc  ind test   splice    spind  splicet
1   1 14 1  2  5  3  7.5   NA 0.000000  7.50000  0.00000
2   2 15 1  2  5  3  8.2    0 0.000000  8.20000  8.20000
3   3 16 1  2  5  3  8.9    0 0.000000  8.90000  8.90000
4   4 17 1  2  5  3  9.6    0 0.000000  9.60000  9.60000
5   5 18 1  2  5  3 10.3    0 0.000000 10.30000 10.30000
6   6 19 1  6  2  2  7.6    1 1.447368 11.00000 11.00000
7   7 20 1  6  2  2  8.4    0 1.447368 12.15789 12.15789
8   8 21 1  6  2  2  9.2    0 1.447368 13.31579 13.31579
9   9 22 1  6  2  2 10.0    0 1.447368 14.47368 14.47368
10 10 23 1  6  2  2 10.8    0 1.447368 15.63158 15.63158
11 11 24 1  5  1  4  8.3    1 2.022828 16.78947 16.78947
12 12 25 1  5  1  4  8.9    0 2.022828 18.00317 18.00317
13 13 26 1  5  1  4  9.5    0 2.022828 19.21687 19.21687
14 14 27 1  5  1  4 10.1    0 2.022828 20.43056 20.43056
15 15 28 1  5  1  4 10.7    0 2.022828 21.64426 21.64426
16 16 29 1  2  6  2 20.8    1 1.098940 22.85796 22.85796
17 17 30 1  2  6  2 21.6    0 1.098940 23.73711 23.73711
18 18 31 1  2  6  2 22.4    0 1.098940 24.61626 24.61626
19 19 32 1  2  6  2 23.2    0 1.098940 25.49541 25.49541
20 20 33 1  2  6  2 24.0    0 1.098940 26.37457 26.37457
21 21 34 1  2  6  2 24.8    0 1.098940 27.25372 27.25372
22 22 35 1  2  6  2 25.6    0 1.098940 28.13287 28.13287
23 23 36 1  2  6  2 26.4    0 1.098940 29.01202 29.01202
24 24 37 1  2  6  2 27.2    0 1.098940 29.89118 29.89118
25 25 38 1  2  6  2 28.0    0 1.098940 30.77033 30.77033
26 26 39 1  2  6  2 28.8    0 1.098940 31.64948 31.64948
27 27 40 1  2  6  2 29.6    0 1.098940 32.52863 32.52863
28 28 41 1  2  6  2 30.4    0 1.098940 33.40778 33.40778
29 29 42 1  2  6  2 31.2    0 1.098940 34.28694 34.28694
30 30 43 1  2  6  2 32.0    0 1.098940 35.16609 35.16609
31 31 44 1  0  5  5 22.5    1 1.602011 36.04524 36.04524
32 32 45 1  0  5  5 23.0    0 1.602011 36.84625 36.84625
33 33 46 1  0  5  5 23.5    0 1.602011 37.64725 37.64725
34 34 47 1  0  5  5 24.0    0 1.602011 38.44826 38.44826
35 35 48 1  0  5  5 24.5    0 1.602011 39.24926 39.24926
36 36 49 1  0  5  5 25.0    0 1.602011 40.05027 40.05027
37 37 50 1  0  5  5 25.5    0 1.602011 40.85127 40.85127
38 38 51 1  0  5  5 26.0    0 1.602011 41.65228 41.65228
39 39 52 1  0  5  5 26.5    0 1.602011 42.45328 42.45328
40 40 53 1  0  5  5 27.0    0 1.602011 43.25429 43.25429
41 41 54 1  1  0  9  5.0    1 8.811059 44.05530 44.05530
42 42 55 1  1  0  9  5.1    0 8.811059 44.93640 31.04697
43 43 56 1  1  0  9  5.2    0 8.811059 45.81751 31.65573
44 44 57 1  1  0  9  5.3    0 8.811059 46.69861 32.26450
45 45 58 1  1  0  9  5.4    0 8.811059 47.57972 32.87326
46 46 59 1  1  0  9  5.5    0 8.811059 48.46082 33.48202
47 47 60 1  1  0  9  5.6    0 8.811059 49.34193 34.09079
48 48 61 1  1  0  9  5.7    0 8.811059 50.22304 34.69955
49 49 62 1  1  0  9  5.8    0 8.811059 51.10414 35.30832
50 50 63 1  1  0  9  5.9    0 8.811059 51.98525 35.91708

请注意,每当权重发生变化时,z $ spind都没有巨大的峰值。所以这是一个正确拼接的索引。由于权重改变了5次,因此需要上述代码的4次迭代。我想将这些代码包装在某种迭代循环中,这就是我想出来的,但由于我的R技能有限,这是徒劳的尝试:

xx=sum(z$test,na.rm=T)

fixValue = function(y) {
  for(q in 1:xx-1) {

    splicetValue <- function(x) {
      splicet <- 0
      for(i in 2:NROW(z)) {
        splicet[i] <- if(z$test[i]==1&z$splice[i-1]!=0) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6]))*z$splice[i-1] else if(z$test[i]==1&z$splice[i-1]==0) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6])) else z$spind[i]
      }
      return(splicet)
    }
    z$splicet<-splicetValue()

    spliceiValue <- function(x) {
      splice <- 0
      for(i in 2:NROW(z)) {
        splice[i] <- if(z$test[i]==1) (z$splicet[i])/z$ind[i] else splice[i-1]
      }
      return(splice)
    }
    z$splice<-spliceiValue()

    z$spind=ifelse(z$splice==0,z$ind,z$splice*z$ind)

  }
}

我做错了什么?

1 个答案:

答案 0 :(得分:1)

您的代码可以进一步改进,但是您唯一错过的就是让它成功:

xx=sum(z$test,na.rm=T)

fixValue = function() { ## argument is not needed
  for(q in 1:xx-1) {

    splicetValue <- function(x) {
      splicet <- 0
      for(i in 2:NROW(z)) {
        splicet[i] <- if(z$test[i]==1&z$splice[i-1]!=0) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6]))*z$splice[i-1] else if(z$test[i]==1&z$splice[i-1]==0) (rowSums(z[i,1:3]*z[i-1,4:6])/rowSums(z[i-1,4:6])) else z$spind[i]
      }
      return(splicet)
    }
    z$splicet<-splicetValue()

    spliceiValue <- function(x) {
      splice <- 0
      for(i in 2:NROW(z)) {
        splice[i] <- if(z$test[i]==1) (z$splicet[i])/z$ind[i] else splice[i-1]
      }
      return(splice)
    }
    z$splice<-spliceiValue()

    z$spind=ifelse(z$splice==0,z$ind,z$splice*z$ind)

  }
return(z) ## this one
}
fixValue()