假设您在r中具有以下时间序列数据集:
n <- 3
set.seed(1)
data.frame(Day = rep("Mon", n),
Time = 1:n,
Temper = round(rnorm(n, 4, 2), 0))
print(df)
Day Time Temper
Mon 1 3
Mon 2 4
Mon 3 2
现在说您希望在相邻现有值之间添加中点作为数据框中的额外行。例如,假设您希望在每对相邻值之间添加确切的中点,以产生以下新数据框:
Day Time Temper
Mon 1 3
Mon 1.5 3.5
Mon 2 4
Mon 2.5 3
Mon 3 2
有什么有效的R代码可以在更大的数据集上实现这一目标?
如果此代码还可以使用不只是精确中点的值填充数据框,那就太好了,例如“三分之一”数据点:
Day Time Temper
Mon 1 3
Mon 1.33 3.33
Mon 2 4
Mon 2.33 3.33
Mon 3 2
答案 0 :(得分:1)
这是使用fetch(url, {
method: requestMethod,
body: formData,
headers: {
Accept: 'application/json',
'Content-Type': 'application/json',
},
})
.then((response) => {
return response.json();
})
.then((responseJson) => {
console.log("Response JSON :- " + JSON.stringify(responseJson));
})
.catch((error) => {
console.log("Exception :- " + error);
})
和at XMLHttpRequest.dispatchEvent (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28784:27).
at XMLHttpRequest.setReadyState (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28537:20).
at XMLHttpRequest.__didCompleteResponse (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28364:16).
at blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:28474:47.
at RCTDeviceEventEmitter.emit (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:3280:37).
at MessageQueue.__callFunction (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2577:44).
at blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2334:17.
at MessageQueue.__guard (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2531:13).
at MessageQueue.callFunctionReturnFlushedQueue (blob:http://localhost:8081/1a80ef13-8ce9-4c75-a477-e214c5fd3ff2:2333:14)".
的想法。我们首先dplyr
的{{1}}行,然后填写这些NA,即
purrr
给出,
rbind
答案 1 :(得分:1)
另一种解决方案:
library(tidyverse)
df %>%
slice(rep(1:n(), each = 2)) %>%
mutate_at(c("Time", "Temper"), function(x) {
replace(x, seq(2, n(), 2),
(x + (1 / 3) * (lead(x) - lag(x)))[seq(2, n(), 2)])
}) %>%
mutate_at(c("Time", "Temper"), round, 2) %>%
slice(-n())
#> Day Time Temper
#> 1 Mon 1.00 3.00
#> 2 Mon 1.33 3.33
#> 3 Mon 2.00 4.00
#> 4 Mon 2.33 3.33
#> 5 Mon 3.00 2.00
答案 2 :(得分:0)
您可能要对这3个解决方案进行基准测试(请参见库微基准测试),具体取决于因子的数量和数值变量。
n <- 3
set.seed(1)
dframe <- data.frame(Day = rep("Mon", n),
Time = 1:n,
Temper = round(rnorm(n, 4, 2), 0))
# --- convert factor to numeric
mframe <- as.data.frame(sapply(dframe, as.numeric))
# --- function to use on variables
pfun <- function(x, coef = 1/4){
# x <- mframe$Time ; coef <- .25 ;
newp <- x[1:(length(x)-1)] + diff(x, lag = 1) * coef
res <- c(rbind(x[1:(length(x) -1) ], newp) , x[length(x)] )
return( res )
}
# --- base R way
# pfun( mframe$Time )
# sapply(mframe, pfun, .5)
apply(mframe, 2, pfun)
dframe_final <- as.data.frame ( apply(mframe, 2, pfun) )
# str(dframe_final)
# --- get Day's or other factors back
for(col in names(dframe)[sapply(dframe, is.factor)]){
dframe_final[[col]] <- factor(dframe_final[[col]])
levels( dframe_final[[col]] ) <- levels(dframe[[col]])
}
# --- dplyr way
library(dplyr)
library(purrr)
lfactors <- dframe %>%
map_if(is.factor, levels)
dframe2 <- dframe %>%
as_tibble %>%
map_dfr(as.numeric) %>%
map_dfr(pfun) %>%
mutate_at(.vars = names(dframe)[sapply(dframe, is.factor)], .funs = factor)
# --- get Day's or other factors back
for(col in names(dframe)[sapply(dframe, is.factor)]){
dframe2[[col]] <- factor(dframe2[[col]])
levels( dframe2[[col]] ) <- levels(dframe[[col]])
}
# --- data.table way
library(data.table)
dframe3 <- data.table(dframe)
dframe3 <- dframe3[ , lapply(.SD, as.numeric)]
dframe3 <- dframe3[ , lapply(.SD, pfun)]
# --- get Day's or other factors back
for(col in names(dframe)[sapply(dframe, is.factor)]){
dframe3[ , (col) := factor(get(col)) ]
levels( dframe3[[col]] ) <- levels(dframe[[col]])
}