您好我有一个i / p数据框如下
df = data.frame('value' = c("(1_00),(0_04),(1_08),(0_12),(1_14)","(1_15),(0_22),(1_29)",
"(0_30),(1_38),(0_40),(1_44)","(0_45),(1_57),(0_59)",
"(0_15),(1_21),(0_26),(0_29)"),stringsAsFactors = F)
从上面的数据框中我无法找到来自“sample_calc”列的“输出”列 “sample_calc”计算如下
for row:1- i.e `(1_00),(0_04),(1_08),(0_12),(1_14)` calculated as second set
of character in second block after "_" (04) minus second set (00) in the first
block multply by first set in first block (i.e: (4-0)*1 )
similarly from 3rd block to 2nd block .finally we need need to add all blocks.`(4-0)*1 + (8-4)*0 +(12-8)*1 + (14-12)*0`
o / p df1
df1 = data.frame('value' = c("(1_00),(0_04),(1_08),(0_12),(1_14)",
"(1_15),(0_22),(1_29)",
"(0_30),(1_38),(0_40),(1_44)",
"(0_45),(1_57),(0_59)","(1_00),(0_07),(1_14)",
"(0_15),(1_21),(0_26),(0_29)"),
"sample_calc"=c("(4-0)*1 + (8-4)*0 +(12-8)*1 + (14-12)*0",
"(22-15)*1 + (29-22)*0",
"(38-30)*0 + (40-38)*1 + (44-40)*0",
"(57-45)*0 + (59-57)*1",
"(7-0)*1 + (14-7)*0",
"(21-15)*0 + (26-21)*1 + (29-26)*0"),
"output"=c(8,7,2,2,7,5),stringsAsFactors = F)
答案 0 :(得分:1)
首先,我将字符串转换为数字字符串,代码如下:
foo <- lapply(strsplit(gsub("\\(|\\)", "", df$value), ","),
function(x) as.numeric(unlist(strsplit(x, "_"))))
foo
包含相同的信息但数量相同:
foo[1:2]
[[1]]
[1] 1 0 0 4 1 8 0 12 1 14
[[2]]
[1] 1 15 0 22 1 29
接下来,我们只需将您的计算应用于foo
的子列表(我们在“第二组”中获取“ID”(y
)元素并应用所需逻辑:sum((x[y] - x[y - 2]) * x[y - 3])
)
sapply(foo, function(x) {y <- seq(4, length(x), 2);
sum((x[y] - x[y - 2]) * x[y - 3])})
[1] 8 7 2 2 7 5
答案 1 :(得分:0)
process.df.column <- function(df, col) {
blocks <- function(s) unlist(strsplit(s, ","))
unblock <- Vectorize(function(bs) gsub("\\)", "", gsub("\\(", "", bs)))
values.l <- lapply(lapply(t(df[, "value", drop = FALSE]), blocks), unblock)
get.2nd <- function(bs) as.numeric(unlist(strsplit(bs, "_"))[2])
get.1st <- function(bs) as.numeric(unlist(strsplit(bs, "_"))[1])
process.row <- function(bs.vec) {
l <- length(bs.vec)
second.seconds <- sapply(bs.vec, get.2nd)[2:l]
first.seconds <- sapply(bs.vec, get.2nd)[1:(l - 1)]
first.firsts <- sapply(bs.vec, get.1st)[1:(l - 1)]
(second.seconds - first.seconds) * first.firsts
}
sapply(lapply(values.l, process.row), sum)
}
process.df.column(df1, "value")
[1] 8 7 2 2 7 5
答案 2 :(得分:0)
这是基础R解决方案:
df$output <-
sapply(strsplit(gsub('[()]','',df$value),'[_,]'),
function(x) {
x<-as.numeric(x)
sum(-x[!seq_along(x)%%2]*diff(c(0,head(x[!!seq_along(x)%%2],-1),0)))})
# df
# value output
# 1 (1_00),(0_04),(1_08),(0_12),(1_14) 8
# 2 (1_15),(0_22),(1_29) 7
# 3 (0_30),(1_38),(0_40),(1_44) 2
# 4 (0_45),(1_57),(0_59) 2
# 5 (0_15),(1_21),(0_26),(0_29) 5
tidyverse等价物:
library(tidyverse)
df %>%
rowid_to_column() %>%
separate_rows(value,sep=",") %>%
mutate_at("value",~gsub('[()]','',.x)) %>%
separate(value,c("a","b"),convert = T) %>%
group_by(rowid) %>%
mutate_at("a",~-diff(c(0,.x[-length(.x)],0))) %>%
summarize(output = sum(a*b)) %>%
select(-rowid) %>%
cbind(df,.)
# value output
# 1 (1_00),(0_04),(1_08),(0_12),(1_14) 8
# 2 (1_15),(0_22),(1_29) 7
# 3 (0_30),(1_38),(0_40),(1_44) 2
# 4 (0_45),(1_57),(0_59) 2
# 5 (0_15),(1_21),(0_26),(0_29) 5