在数据框中输入其他行作为相邻现有行的值之间的中间点

时间:2019-05-31 09:00:57

标签: r dataframe time-series missing-data

假设您在r中具有以下时间序列数据集:

n <- 3
set.seed(1)
data.frame(Day = rep("Mon", n),
           Time = 1:n,
           Temper = round(rnorm(n, 4, 2), 0))
print(df)

  Day Time Temper
  Mon    1      3
  Mon    2      4
  Mon    3      2

现在说您希望在相邻现有值之间添加中点作为数据框中的额外行。例如,假设您希望在每对相邻值之间添加确切的中点,以产生以下新数据框:

  Day Time Temper
  Mon    1      3
  Mon  1.5    3.5
  Mon    2      4
  Mon  2.5      3
  Mon    3      2

有什么有效的R代码可以在更大的数据集上实现这一目标?

如果此代码还可以使用不只是精确中点的值填充数据框,那就太好了,例如“三分之一”数据点:

  Day Time Temper
  Mon    1      3
  Mon 1.33   3.33
  Mon    2      4
  Mon 2.33   3.33
  Mon    3      2

3 个答案:

答案 0 :(得分:1)

这是使用fetch(url, { method: requestMethod, body: formData, headers: { Accept: 'application/json', 'Content-Type': 'application/json', }, }) .then((response) => { return response.json(); }) .then((responseJson) => { console.log("Response JSON :- " + JSON.stringify(responseJson)); }) .catch((error) => { console.log("Exception :- " + error); }) at XMLHttpRequest.dispatchEvent (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28784:27). at XMLHttpRequest.setReadyState (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28537:20). at XMLHttpRequest.__didCompleteResponse (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28364:16). at blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28474:47. at RCTDeviceEventEmitter.emit (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:3280:37). at MessageQueue.__callFunction (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2577:44). at blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2334:17. at MessageQueue.__guard (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2531:13). at MessageQueue.callFunctionReturnFlushedQueue (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2333:14)". 的想法。我们首先dplyr的{​​{1}}行,然后填写这些NA,即

purrr

给出,

rbind

答案 1 :(得分:1)

另一种解决方案:

library(tidyverse)

df %>%
  slice(rep(1:n(), each = 2)) %>%
  mutate_at(c("Time", "Temper"), function(x) {
    replace(x, seq(2, n(), 2),
            (x + (1 / 3) * (lead(x) - lag(x)))[seq(2, n(), 2)])
  }) %>%
  mutate_at(c("Time", "Temper"), round, 2) %>%
  slice(-n())
#>   Day Time Temper
#> 1 Mon 1.00   3.00
#> 2 Mon 1.33   3.33
#> 3 Mon 2.00   4.00
#> 4 Mon 2.33   3.33
#> 5 Mon 3.00   2.00

答案 2 :(得分:0)

您可能要对这3个解决方案进行基准测试(请参见库微基准测试),具体取决于因子的数量和数值变量。

使用基数R

n <- 3
set.seed(1)
dframe <- data.frame(Day = rep("Mon", n),
                     Time = 1:n,
                     Temper = round(rnorm(n, 4, 2), 0))

# --- convert factor to numeric
mframe <- as.data.frame(sapply(dframe, as.numeric))

# --- function to use on variables 
pfun <- function(x, coef = 1/4){

        # x <- mframe$Time ; coef <- .25 ;
        newp <- x[1:(length(x)-1)] + diff(x, lag = 1) * coef
        res <- c(rbind(x[1:(length(x) -1) ], newp) , x[length(x)] )

        return( res )
}

# --- base R way

# pfun( mframe$Time )
# sapply(mframe, pfun, .5)
apply(mframe, 2, pfun)

dframe_final <- as.data.frame ( apply(mframe, 2, pfun) )
# str(dframe_final)

# --- get Day's or other factors back
for(col in names(dframe)[sapply(dframe, is.factor)]){
        dframe_final[[col]] <- factor(dframe_final[[col]])
        levels( dframe_final[[col]] ) <- levels(dframe[[col]])
}

dplyr

# --- dplyr way
library(dplyr)
library(purrr)

lfactors <- dframe %>% 
        map_if(is.factor, levels)

dframe2 <- dframe %>% 
         as_tibble %>%
        map_dfr(as.numeric) %>% 
        map_dfr(pfun) %>% 
        mutate_at(.vars = names(dframe)[sapply(dframe, is.factor)], .funs = factor)

# --- get Day's or other factors back
for(col in names(dframe)[sapply(dframe, is.factor)]){
        dframe2[[col]] <- factor(dframe2[[col]])
        levels( dframe2[[col]] ) <- levels(dframe[[col]])
}

data.table

# --- data.table way
library(data.table)

dframe3 <- data.table(dframe)

dframe3 <- dframe3[ , lapply(.SD, as.numeric)]
dframe3 <- dframe3[ , lapply(.SD, pfun)]

# --- get Day's or other factors back
for(col in names(dframe)[sapply(dframe, is.factor)]){
        dframe3[ , (col) := factor(get(col)) ]
        levels( dframe3[[col]] ) <- levels(dframe[[col]])
}