我有一个数据框如下
SPI_ICONVERTICALSPACING
对于每一列,我想保持该值,如果它与该chr上面的行一致,只有当该值为1或-1时。如果没有协议,我想将该值转换为零。
例如(不使用上面的dput)
structure(list(chr = 1, leftPos = 240000, OC_AH_026C.res = 0,
OC_AH_026C.1.res = 0, OC_AH_026C.2.res = 0, OC_AH_026T.res = 0,
OC_AH_058T.res = 0, OC_AH_084C.res = 0, OC_AH_084T.res = 0,
OC_AH_086C.res = 0, OC_AH_086C.1.res = 0, OC_AH_086C.2.res = 0,
OC_AH_086C.3.res = 0, OC_AH_086T.res = 0, OC_AH_088C.res = 0,
OC_AH_088T.res = 0, OC_AH_094C.res = 0, OC_AH_094C.1.res = 0,
OC_AH_094C.2.res = 0, OC_AH_094C.3.res = 0, OC_AH_094C.4.res = 0,
OC_AH_094C.5.res = 0, OC_AH_094C.6.res = 0, OC_AH_094C.7.res = 0,
OC_AH_094T.res = 0, OC_AH_096C.res = 0, OC_AH_096T.res = 0,
OC_AH_100C.res = 0, OC_AH_100C.1.res = 0, OC_AH_100T.res = 0,
OC_AH_127C.res = 0, OC_AH_127T.res = 0, OC_AH_133C.res = 0,
OC_AH_133T.res = 0, OC_ED_008C.res = 0, OC_ED_008C.1.res = 0,
OC_ED_008C.2.res = 0, OC_ED_008C.3.res = 0, OC_ED_008T.res = 0,
OC_ED_016C.res = 0, OC_ED_016T.res = 0, OC_ED_031C.res = 0,
OC_ED_031T.res = 0, OC_ED_036C.res = 0, OC_ED_036T.res = 0,
OC_GS_001C.res = 0, OC_GS_001T.res = 0, OC_QE_062C.res = 0,
OC_QE_062T.res = 0, OC_RS_010C.res = 0, OC_RS_010T.res = 0,
OC_RS_027C.res = 0, OC_RS_027C.1.res = 0, OC_RS_027C.2.res = 0,
OC_RS_027T.res = 0, OC_SH_051C.res = 0, OC_SH_051T.res = 0,
OC_ST_014C.res = 0, OC_ST_014C.1.res = 0, OC_ST_014T.res = 0,
OC_ST_016T.res = 0, OC_ST_020C.res = 0, OC_ST_020T.res = 0,
OC_ST_024C.res = 0, OC_ST_024T.res = 0, OC_ST_033C.res = 0,
OC_ST_033T.res = 0, OC_ST_034C.res = 0, OC_ST_034C.1.res = 0,
OC_ST_034C.2.res = 0, OC_ST_034T.res = 0, OC_ST_035C.res = 0,
OC_ST_035T.res = 0, OC_ST_036C.res = 0, OC_ST_036T.res = 0,
OC_ST_037T.res = 0, OC_ST_040C.res = 0, OC_ST_040T.res = 0,
OC_WG_001T.res = 0, OC_WG_002C.res = 0, OC_WG_002T.res = 0,
OC_WG_005C.res = 0, OC_WG_005T.res = 0, OC_WG_006C.res = 0,
OC_WG_006T.res = 0, OC_WG_009T.res = 0, OC_WG_019C.res = 0,
OC_WG_019T.res = 0, Means.res = 0, sd.res = 0, ind = 1L), .Names = c("chr",
"leftPos", "OC_AH_026C.res", "OC_AH_026C.1.res", "OC_AH_026C.2.res",
"OC_AH_026T.res", "OC_AH_058T.res", "OC_AH_084C.res", "OC_AH_084T.res",
"OC_AH_086C.res", "OC_AH_086C.1.res", "OC_AH_086C.2.res", "OC_AH_086C.3.res",
"OC_AH_086T.res", "OC_AH_088C.res", "OC_AH_088T.res", "OC_AH_094C.res",
"OC_AH_094C.1.res", "OC_AH_094C.2.res", "OC_AH_094C.3.res", "OC_AH_094C.4.res",
"OC_AH_094C.5.res", "OC_AH_094C.6.res", "OC_AH_094C.7.res", "OC_AH_094T.res",
"OC_AH_096C.res", "OC_AH_096T.res", "OC_AH_100C.res", "OC_AH_100C.1.res",
"OC_AH_100T.res", "OC_AH_127C.res", "OC_AH_127T.res", "OC_AH_133C.res",
"OC_AH_133T.res", "OC_ED_008C.res", "OC_ED_008C.1.res", "OC_ED_008C.2.res",
"OC_ED_008C.3.res", "OC_ED_008T.res", "OC_ED_016C.res", "OC_ED_016T.res",
"OC_ED_031C.res", "OC_ED_031T.res", "OC_ED_036C.res", "OC_ED_036T.res",
"OC_GS_001C.res", "OC_GS_001T.res", "OC_QE_062C.res", "OC_QE_062T.res",
"OC_RS_010C.res", "OC_RS_010T.res", "OC_RS_027C.res", "OC_RS_027C.1.res",
"OC_RS_027C.2.res", "OC_RS_027T.res", "OC_SH_051C.res", "OC_SH_051T.res",
"OC_ST_014C.res", "OC_ST_014C.1.res", "OC_ST_014T.res", "OC_ST_016T.res",
"OC_ST_020C.res", "OC_ST_020T.res", "OC_ST_024C.res", "OC_ST_024T.res",
"OC_ST_033C.res", "OC_ST_033T.res", "OC_ST_034C.res", "OC_ST_034C.1.res",
"OC_ST_034C.2.res", "OC_ST_034T.res", "OC_ST_035C.res", "OC_ST_035T.res",
"OC_ST_036C.res", "OC_ST_036T.res", "OC_ST_037T.res", "OC_ST_040C.res",
"OC_ST_040T.res", "OC_WG_001T.res", "OC_WG_002C.res", "OC_WG_002T.res",
"OC_WG_005C.res", "OC_WG_005T.res", "OC_WG_006C.res", "OC_WG_006T.res",
"OC_WG_009T.res", "OC_WG_019C.res", "OC_WG_019T.res", "Means.res",
"sd.res", "ind"), class = c("data.table", "data.frame"), row.names = c(NA,
-1L), .internal.selfref = <pointer: 0x103006f78>)
应该成为
chr leftPos OC_030_ST.res
1 4324 0
1 23433 1
1 34436 1
1 64755 1
3 234 1
3 354 0
4 1666 0
4 4565 0
5 34777 1
7 2345 1
7 4567 1
我有一个数据框(称为Final)曾经有一个名为Def的列,其中包含一列中的所有res值,所以我可以做类似的事情
chr leftPos OC_030_ST.res
1 4324 0
1 23433 1
1 34436 1
1 64755 1
3 234 0
3 354 0
4 1666 0
4 4565 0
5 34777 0
7 2345 1
7 4567 1
但假设我需要申请,我不知道如何使用它。
我试过了:
ContZ<-setDT(Final)[,ind:=rleid(Def)][, if(.N>1) .SD, .(chr, ind)][, ind:=NULL][]
但是当我尝试MeOut<-lapply(df_list2res,function (col){
ContZ<-setDT(df_list2res)[,ind:=rleid(col)][, if(.N>1) .SD, .(chr, ind)][, ind:=NULL][]
})
View(MeOut)
我怀疑这是因为我已经摆脱了行而不是替换值,尽管我无法确定......
答案 0 :(得分:0)
这是dplyr中的简单示例。
library(dplyr)
#create a simple version of your df
df<- data.frame(c(1,1,1,1,3,3,4,4,5,7,7),c(0,1,1,1,1,0,0,0,1,1,1))
names(df) <- c("chr","OC_030_ST.res")
df2 <- df%>%
mutate(last=lag(chr))%>%
mutate(OC_030_ST.res =ifelse(chr == last | is.na(last),
ifelse(OC_030_ST.res == 1|OC_030_ST.res == -1,OC_030_ST.res,0),0))%>%
select(-last)
df2
这里的逻辑是,如果当前值后面的chr值等于最后一个和,则“OC_030_ST.res”值为1或-1,则该值将被保留。在所有其他情况下,该值将重置为0.如果这不是您想要的逻辑,请告诉我。
注意,第一项是特殊情况(因为第一行不能有延迟),因此is.na(最后一次)检查会捕获这个。
编辑:我意识到您可能希望将其应用于数据框中的多个列。以下将允许您这样做
df<- data.frame(c(1,1,1,1,3,3,4,4,5,7,7),c(0,1,1,1,1,0,0,0,1,1,1),c(0,1,1,1,1,0,0,0,1,1,1))
names(df) <- c("chr","OC_030_ST.res","OC_031_ST.res")
df$count<-1:nrow(df)
make0 <- function(x) {ifelse(x == 1 | x == -1, x <-0, x <- x)}
dftemp <- df%>%
mutate(last=lag(chr))%>%
mutate(flag=ifelse(chr == last | is.na(last),0,1))%>%
dplyr::filter(flag==1)%>%
#the below column range will need to be changed to the columns you actually need to change
mutate_each(funs(make0),2:3)%>%
select(-c(flag,last))
dftemp
#update the original dataframe with the modified values
df[match(dftemp$count, df$count), ] <- dftemp
df <- subset(df,select=-count)
df