如果在应用于数据帧的函数中与上面的行不匹配,如何更改行的值

时间:2015-11-17 16:37:57

标签: r

我有一个数据框如下

SPI_ICONVERTICALSPACING

对于每一列,我想保持该值,如果它与该chr上面的行一致,只有当该值为1或-1时。如果没有协议,我想将该值转换为零。

例如(不使用上面的dput)

structure(list(chr = 1, leftPos = 240000, OC_AH_026C.res = 0, 
    OC_AH_026C.1.res = 0, OC_AH_026C.2.res = 0, OC_AH_026T.res = 0, 
    OC_AH_058T.res = 0, OC_AH_084C.res = 0, OC_AH_084T.res = 0, 
    OC_AH_086C.res = 0, OC_AH_086C.1.res = 0, OC_AH_086C.2.res = 0, 
    OC_AH_086C.3.res = 0, OC_AH_086T.res = 0, OC_AH_088C.res = 0, 
    OC_AH_088T.res = 0, OC_AH_094C.res = 0, OC_AH_094C.1.res = 0, 
    OC_AH_094C.2.res = 0, OC_AH_094C.3.res = 0, OC_AH_094C.4.res = 0, 
    OC_AH_094C.5.res = 0, OC_AH_094C.6.res = 0, OC_AH_094C.7.res = 0, 
    OC_AH_094T.res = 0, OC_AH_096C.res = 0, OC_AH_096T.res = 0, 
    OC_AH_100C.res = 0, OC_AH_100C.1.res = 0, OC_AH_100T.res = 0, 
    OC_AH_127C.res = 0, OC_AH_127T.res = 0, OC_AH_133C.res = 0, 
    OC_AH_133T.res = 0, OC_ED_008C.res = 0, OC_ED_008C.1.res = 0, 
    OC_ED_008C.2.res = 0, OC_ED_008C.3.res = 0, OC_ED_008T.res = 0, 
    OC_ED_016C.res = 0, OC_ED_016T.res = 0, OC_ED_031C.res = 0, 
    OC_ED_031T.res = 0, OC_ED_036C.res = 0, OC_ED_036T.res = 0, 
    OC_GS_001C.res = 0, OC_GS_001T.res = 0, OC_QE_062C.res = 0, 
    OC_QE_062T.res = 0, OC_RS_010C.res = 0, OC_RS_010T.res = 0, 
    OC_RS_027C.res = 0, OC_RS_027C.1.res = 0, OC_RS_027C.2.res = 0, 
    OC_RS_027T.res = 0, OC_SH_051C.res = 0, OC_SH_051T.res = 0, 
    OC_ST_014C.res = 0, OC_ST_014C.1.res = 0, OC_ST_014T.res = 0, 
    OC_ST_016T.res = 0, OC_ST_020C.res = 0, OC_ST_020T.res = 0, 
    OC_ST_024C.res = 0, OC_ST_024T.res = 0, OC_ST_033C.res = 0, 
    OC_ST_033T.res = 0, OC_ST_034C.res = 0, OC_ST_034C.1.res = 0, 
    OC_ST_034C.2.res = 0, OC_ST_034T.res = 0, OC_ST_035C.res = 0, 
    OC_ST_035T.res = 0, OC_ST_036C.res = 0, OC_ST_036T.res = 0, 
    OC_ST_037T.res = 0, OC_ST_040C.res = 0, OC_ST_040T.res = 0, 
    OC_WG_001T.res = 0, OC_WG_002C.res = 0, OC_WG_002T.res = 0, 
    OC_WG_005C.res = 0, OC_WG_005T.res = 0, OC_WG_006C.res = 0, 
    OC_WG_006T.res = 0, OC_WG_009T.res = 0, OC_WG_019C.res = 0, 
    OC_WG_019T.res = 0, Means.res = 0, sd.res = 0, ind = 1L), .Names = c("chr", 
"leftPos", "OC_AH_026C.res", "OC_AH_026C.1.res", "OC_AH_026C.2.res", 
"OC_AH_026T.res", "OC_AH_058T.res", "OC_AH_084C.res", "OC_AH_084T.res", 
"OC_AH_086C.res", "OC_AH_086C.1.res", "OC_AH_086C.2.res", "OC_AH_086C.3.res", 
"OC_AH_086T.res", "OC_AH_088C.res", "OC_AH_088T.res", "OC_AH_094C.res", 
"OC_AH_094C.1.res", "OC_AH_094C.2.res", "OC_AH_094C.3.res", "OC_AH_094C.4.res", 
"OC_AH_094C.5.res", "OC_AH_094C.6.res", "OC_AH_094C.7.res", "OC_AH_094T.res", 
"OC_AH_096C.res", "OC_AH_096T.res", "OC_AH_100C.res", "OC_AH_100C.1.res", 
"OC_AH_100T.res", "OC_AH_127C.res", "OC_AH_127T.res", "OC_AH_133C.res", 
"OC_AH_133T.res", "OC_ED_008C.res", "OC_ED_008C.1.res", "OC_ED_008C.2.res", 
"OC_ED_008C.3.res", "OC_ED_008T.res", "OC_ED_016C.res", "OC_ED_016T.res", 
"OC_ED_031C.res", "OC_ED_031T.res", "OC_ED_036C.res", "OC_ED_036T.res", 
"OC_GS_001C.res", "OC_GS_001T.res", "OC_QE_062C.res", "OC_QE_062T.res", 
"OC_RS_010C.res", "OC_RS_010T.res", "OC_RS_027C.res", "OC_RS_027C.1.res", 
"OC_RS_027C.2.res", "OC_RS_027T.res", "OC_SH_051C.res", "OC_SH_051T.res", 
"OC_ST_014C.res", "OC_ST_014C.1.res", "OC_ST_014T.res", "OC_ST_016T.res", 
"OC_ST_020C.res", "OC_ST_020T.res", "OC_ST_024C.res", "OC_ST_024T.res", 
"OC_ST_033C.res", "OC_ST_033T.res", "OC_ST_034C.res", "OC_ST_034C.1.res", 
"OC_ST_034C.2.res", "OC_ST_034T.res", "OC_ST_035C.res", "OC_ST_035T.res", 
"OC_ST_036C.res", "OC_ST_036T.res", "OC_ST_037T.res", "OC_ST_040C.res", 
"OC_ST_040T.res", "OC_WG_001T.res", "OC_WG_002C.res", "OC_WG_002T.res", 
"OC_WG_005C.res", "OC_WG_005T.res", "OC_WG_006C.res", "OC_WG_006T.res", 
"OC_WG_009T.res", "OC_WG_019C.res", "OC_WG_019T.res", "Means.res", 
"sd.res", "ind"), class = c("data.table", "data.frame"), row.names = c(NA, 
-1L), .internal.selfref = <pointer: 0x103006f78>)

应该成为

chr       leftPos     OC_030_ST.res
1           4324            0
1           23433           1
1           34436           1
1           64755           1
3           234             1
3           354             0
4           1666            0
4           4565            0
5           34777           1
7           2345            1
7           4567            1

我有一个数据框(称为Final)曾经有一个名为Def的列,其中包含一列中的所有res值,所以我可以做类似的事情

 chr       leftPos     OC_030_ST.res
1           4324            0
1           23433           1
1           34436           1
1           64755           1
3           234             0
3           354             0
4           1666            0
4           4565            0
5           34777           0
7           2345            1
7           4567            1

但假设我需要申请,我不知道如何使用它。

我试过了:

ContZ<-setDT(Final)[,ind:=rleid(Def)][, if(.N>1) .SD, .(chr, ind)][, ind:=NULL][]

但是当我尝试MeOut<-lapply(df_list2res,function (col){ ContZ<-setDT(df_list2res)[,ind:=rleid(col)][, if(.N>1) .SD, .(chr, ind)][, ind:=NULL][] })

时,我收到了错误消息
View(MeOut)

我怀疑这是因为我已经摆脱了行而不是替换值,尽管我无法确定......

1 个答案:

答案 0 :(得分:0)

这是dplyr中的简单示例。

library(dplyr)

#create a simple version of your df
df<- data.frame(c(1,1,1,1,3,3,4,4,5,7,7),c(0,1,1,1,1,0,0,0,1,1,1))                                          
names(df) <- c("chr","OC_030_ST.res")

df2 <- df%>%
  mutate(last=lag(chr))%>%
  mutate(OC_030_ST.res =ifelse(chr == last | is.na(last),
                   ifelse(OC_030_ST.res == 1|OC_030_ST.res == -1,OC_030_ST.res,0),0))%>%
  select(-last)
df2

这里的逻辑是,如果当前值后面的chr值等于最后一个,则“OC_030_ST.res”值为1或-1,则该值将被保留。在所有其他情况下,该值将重置为0.如果这不是您想要的逻辑,请告诉我。

注意,第一项是特殊情况(因为第一行不能有延迟),因此is.na(最后一次)检查会捕获这个。

编辑:我意识到您可能希望将其应用于数据框中的多个列。以下将允许您这样做

df<- data.frame(c(1,1,1,1,3,3,4,4,5,7,7),c(0,1,1,1,1,0,0,0,1,1,1),c(0,1,1,1,1,0,0,0,1,1,1))                                          
names(df) <- c("chr","OC_030_ST.res","OC_031_ST.res")

df$count<-1:nrow(df)


make0 <- function(x) {ifelse(x == 1 | x == -1, x <-0, x <- x)}
dftemp <- df%>%
  mutate(last=lag(chr))%>%
  mutate(flag=ifelse(chr == last | is.na(last),0,1))%>%
  dplyr::filter(flag==1)%>%
  #the below column range will need to be changed to the columns you actually need to change
  mutate_each(funs(make0),2:3)%>%
  select(-c(flag,last))
dftemp

#update the original dataframe with the modified values
df[match(dftemp$count, df$count), ] <- dftemp
df <- subset(df,select=-count)
df