我有这个数据框:
df<-structure(list(Name = c("sub1", "sub1", "sub1", "sub1", "sub1",
"sub1", "sub1", "sub1", "sub1", "sub1", "sub1", "sub1", "sub1",
"sub1", "sub1", "sub1", "sub1", "sub1", "sub1", "sub1", "sub1",
"sub1", "sub1", "sub2", "sub2", "sub2", "sub2", "sub2", "sub2"
), StimulusName = c("Alpha11", "Alpha11", "Alpha11", "Alpha11",
"Alpha11", "Alpha11", "Alpha11", "Alpha11", "Alpha11", "Alpha11",
"Alpha11", "Alpha11", "Alpha11", "Alpha11", "Alpha11", "Alpha11",
"Alpha11", "Alpha11", "Alpha12", "Alpha12", "Alpha12", "Alpha12",
"Alpha12", "Alpha11", "Alpha11", "Alpha11", "Alpha11", "Alpha11",
"Alpha11"), FixationSeq = c(2L, 2L, 2L, 2L, NA, NA, NA, NA, 3L,
3L, 3L, 3L, 3L, NA, NA, NA, NA, NA, 1L, NA, NA, 2L, NA, NA, NA,
NA, NA, 2L, 2L)), row.names = c(NA, -29L), class = c("tbl_df",
"tbl", "data.frame"), spec = structure(list(cols = list(Name = structure(list(), class = c("collector_character",
"collector")), StimulusName = structure(list(), class = c("collector_character",
"collector")), FixationSeq = structure(list(), class = c("collector_integer",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector"))), class = "col_spec"))
在FixationSeq
列中有唯一编号(在我的示例2和3中,Name
= sub1
和StimulusName
= Alpha11
)。在这些数字之间,存在用NA
填充的段。 3后还有一个用NA
填充的句段。
我希望能够创建一个新列SaccadeCount
并向NA
细分的每个实例添加一个递增的数字标签(作为一个整体,即可能是多个行)到SaccadeCount
中的相关行。
此外,我想另外一列称为SaccadeDuration
,并列出NA
唯一段出现的行数。因此,在示例df
中,对应于NA
段在2和3之间的行将填充为'3',因为那是2和3之间的行的总数。
我想使用dplyr完成此操作,并按Name
和StimulusName
列对操作进行分组。
输出可能看起来像这样:
Name StimulusName FixationSeq SaccadeCount SaccadeDuration
sub1 Alpha11 2
sub1 Alpha11 2
sub1 Alpha11 2
sub1 Alpha11 2
sub1 Alpha11 NA 1 3
sub1 Alpha11 NA 1 3
sub1 Alpha11 NA 1 3
sub1 Alpha11 3
sub1 Alpha11 3
sub1 Alpha11 3
sub1 Alpha11 3
sub1 Alpha11 3
sub1 Alpha11 3
sub1 Alpha11 NA 2 5
sub1 Alpha11 NA 2 5
sub1 Alpha11 NA 2 5
sub1 Alpha11 NA 2 5
sub1 Alpha11 NA 2 5
sub1 Alpha12 1
sub1 Alpha12 NA 1 2
sub1 Alpha12 NA 1 2
sub1 Alpha12 2
sub1 Alpha12 NA 2 1
sub2 Alpha11 NA 1 4
sub2 Alpha11 NA 1 4
sub2 Alpha11 NA 1 4
sub2 Alpha11 NA 1 4
sub2 Alpha11 2
sub2 Alpha11 2
非常感谢您的时间和帮助。
答案 0 :(得分:2)
使用var xmlFile = @"C:\Users\lvrabel\Desktop\Crajsons\finals\Output.xml";
var uri = new Uri($"http://192.168.254.20:5050/token?content={xmlFile}", UriKind.Absolute);
using (var client = new HttpClient())
{
var result = await client.GetAsync(uri);
var respnseText = await result.Content.ReadAsStringAsync();
}
代码:
data.table
您可以以dplyr方式使用library(data.table)
fun1 <- function(x) {
na.ind = is.na(x$FixationSeq)
na.vals= rleidv(rleidv(na.ind)[na.ind])
x$SaccadeCount = NA
x$SaccadeCount[na.ind] = na.vals
na.rle = rle(na.vals)
x$SaccadeDuration = NA
x$SaccadeDuration[na.ind] = rep(na.rle$lengths, na.rle$lengths)
return(x)
}
setDT(df)[, fun1(.SD) ,by = .(Name, StimulusName)]
:
fun1
结果:
ans<-
df %>% group_by(Name, StimulusName) %>% dplyr::do(.data = ., fun1(.))
# Name StimulusName FixationSeq SaccadeCount SaccadeDuration
#1: sub1 Alpha11 2 NA NA
#2: sub1 Alpha11 2 NA NA
#3: sub1 Alpha11 2 NA NA
#4: sub1 Alpha11 2 NA NA
#5: sub1 Alpha11 2 NA NA
#6: sub1 Alpha11 2 NA NA
#7: sub1 Alpha11 2 NA NA
#8: sub1 Alpha11 2 NA NA
#9: sub1 Alpha11 2 NA NA
#10: sub1 Alpha11 2 NA NA
#11: sub1 Alpha11 2 NA NA
#12: sub1 Alpha11 2 NA NA
#13: sub1 Alpha11 2 NA NA
#14: sub1 Alpha11 2 NA NA
#15: sub1 Alpha11 2 NA NA
#16: sub1 Alpha11 2 NA NA
#17: sub1 Alpha11 2 NA NA
#18: sub1 Alpha11 2 NA NA
#19: sub1 Alpha11 2 NA NA
#20: sub1 Alpha11 2 NA NA
#21: sub1 Alpha11 2 NA NA
#22: sub1 Alpha11 NA 1 5
#23: sub1 Alpha11 NA 1 5
#24: sub1 Alpha11 NA 1 5
#25: sub1 Alpha11 NA 1 5
#26: sub1 Alpha11 NA 1 5
#27: sub1 Alpha1 9 NA NA
#28: sub1 Alpha1 9 NA NA
#29: sub1 Alpha1 9 NA NA
#30: sub1 Alpha1 9 NA NA
#31: sub1 Alpha1 9 NA NA
#32: sub1 Alpha1 9 NA NA
#33: sub1 Alpha1 9 NA NA
# Name StimulusName FixationSeq SaccadeCount SaccadeDuration
,该功能可以为每个组完成任务。fun1
和Name
中定义了组StimulusName
,?rle
?rleidv
值预填充新列,然后在需要的地方添加新值。答案 1 :(得分:1)
这应该做到。不过,也许有一种更简单的方法。第一个 mutant (突变)指示 NA 段的开始。 group_by 和第二个 muting 会计算每个细分的 NA 。
library(dplyr)
df %>% mutate(SaccadeCount = cumsum(ifelse(is.na(FixationSeq) &
!is.na(lag(FixationSeq)), 1,0)) * is.na(FixationSeq)) %>%
group_by(SaccadeCount) %>%
mutate(SaccadeDuration = n()) %>%
ungroup() %>%
mutate(SaccadeDuration = SaccadeDuration * is.na(FixationSeq))
答案 2 :(得分:1)
使用dplyr
:
df %>%
group_by(Name, StimulusName) %>%
mutate(x = is.na(FixationSeq),
count = cumsum(c(TRUE, diff(x) != 0L) & x) * x,
dur = NA_integer_) %>%
group_by(Name, StimulusName, count) %>%
mutate(dur = replace(dur, as.logical(count), n()))
对应的(更简洁的)data.table
版本:
library(data.table)
setDT(df)
df[ , count := ({
x <- is.na(FixationSeq)
.(cumsum(c(TRUE, diff(x) != 0L) & x) * x)}), by = .(Name, StimulusName)]
df[as.logical(count), dur := .N, by = .(Name, StimulusName, count)]
Name StimulusName FixationSeq count dur 1: sub1 Alpha11 2 0 NA 2: sub1 Alpha11 2 0 NA 3: sub1 Alpha11 2 0 NA 4: sub1 Alpha11 2 0 NA 5: sub1 Alpha11 NA 1 4 6: sub1 Alpha11 NA 1 4 7: sub1 Alpha11 NA 1 4 8: sub1 Alpha11 NA 1 4 9: sub1 Alpha11 3 0 NA 10: sub1 Alpha11 3 0 NA 11: sub1 Alpha11 3 0 NA 12: sub1 Alpha11 3 0 NA 13: sub1 Alpha11 3 0 NA 14: sub1 Alpha11 NA 2 5 15: sub1 Alpha11 NA 2 5 16: sub1 Alpha11 NA 2 5 17: sub1 Alpha11 NA 2 5 18: sub1 Alpha11 NA 2 5 19: sub1 Alpha12 1 0 NA 20: sub1 Alpha12 NA 1 2 21: sub1 Alpha12 NA 1 2 22: sub1 Alpha12 2 0 NA 23: sub1 Alpha12 NA 2 1 24: sub2 Alpha11 NA 1 4 25: sub2 Alpha11 NA 1 4 26: sub2 Alpha11 NA 1 4 27: sub2 Alpha11 NA 1 4 28: sub2 Alpha11 2 0 NA 29: sub2 Alpha11 2 0 NA Name StimulusName FixationSeq count dur
如果需要,将count == 0
更改为NA
:
df[count == 0, count := NA]
如问题所示,我不会将其更改为'blank'(""
),因为这会将列强制为character
,并使数字无用,无法进行进一步的分析。
cumsum(c(TRUE, diff(x) != 0L) & x) * x
部分:
v <- c(1, 1, NA, NA, 1, NA, NA, NA)
x <- is.na(v)
x
# [1] FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE
diff(x)
# [1] 0 1 0 -1 1 0 0
diff(x) != 0L
# [1] FALSE TRUE FALSE TRUE TRUE FALSE FALSE
c(TRUE, diff(x) != 0L) & x
# [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE FALSE
cumsum(c(TRUE, diff(x) != 0L) & x)
# [1] 0 0 1 1 1 2 2 2
cumsum(c(TRUE, diff(x) != 0L) & x) * x
# [1] 0 0 1 1 0 2 2 2
希望其余部分相当简单。