如何找到连续数字串的索引?

时间:2016-11-01 20:59:02

标签: r

我有一个数据框,我想找到5个或更多零值的每个组合的起始和结束索引。

基本上,如果字符串是

1 2 0 0 0 0 0 0 4 2 22 41 0 0 0 0 0 5 6 0 0 0 4

我希望:

startindiex endindex
   3           8
   13          17

使用rle

library(bio3d)
runs <- rle(t$precip == 0)
runs2 <- rle2(t$precip == 0)

myruns = which(runs2$values == TRUE & runs2$lengths >= 5)

但我不知道如何获得这些指数。实际数据 -

    structure(list(time = structure(c(1147579200, 1147579500, 1147579800, 
1147580100, 1147580400, 1147580700, 1147581000, 1147581300, 1147581600, 
1147581900, 1147582200, 1147582500, 1147582800, 1147583100, 1147583400, 
1147583700, 1147584000, 1147584300, 1147584600, 1147584900, 1147585200, 
1147585500, 1147585800, 1147586100, 1147586400, 1147586700, 1147587000, 
1147587300, 1147587600, 1147587900, 1147588200, 1147588500, 1147588800, 
1147589100, 1147589400, 1147589700, 1147590000, 1147590300, 1147590600, 
1147590900, 1147591200, 1147591500, 1147591800, 1147592100, 1147592400, 
1147592700, 1147593000, 1147593300, 1147593600, 1147593900, 1147594200, 
1147594500, 1147594800, 1147595100, 1147595400, 1147595700, 1147596000, 
1147596300, 1147596600, 1147596900, 1147597200, 1147597500, 1147597800, 
1147598100, 1147598400, 1147598700, 1147599000, 1147599300, 1147599600, 
1147599900, 1147600200, 1147600500, 1147600800, 1147601100, 1147601400, 
1147601700, 1147602000, 1147602300, 1147602600, 1147602900, 1147603200, 
1147603500, 1147603800, 1147604100, 1147604400, 1147604700, 1147605000, 
1147605300, 1147605600, 1147605900, 1147606200, 1147606500, 1147606800, 
1147607100, 1147607400, 1147607700, 1147608000, 1147608300, 1147608600, 
1147608900, 1147609200, 1147609500, 1147609800, 1147610100, 1147610400, 
1147610700, 1147611000, 1147611300, 1147611600, 1147611900, 1147612200, 
1147612500, 1147612800, 1147613100, 1147613400, 1147613700, 1147614000, 
1147614300, 1147614600, 1147614900, 1147615200, 1147615500, 1147615800, 
1147616100, 1147616400, 1147616700, 1147617000, 1147617300, 1147617600, 
1147617900, 1147618200, 1147618500, 1147618800, 1147619100, 1147619400, 
1147619700, 1147620000, 1147620300, 1147620600, 1147620900, 1147621200, 
1147621500, 1147621800, 1147622100, 1147622400, 1147622700, 1147623000, 
1147623300, 1147623600, 1147623900, 1147624200, 1147624500, 1147624800, 
1147625100, 1147625400, 1147625700, 1147626000, 1147626300, 1147626600, 
1147626900, 1147627200, 1147627500, 1147627800, 1147628100, 1147628400, 
1147628700, 1147629000, 1147629300, 1147629600, 1147629900, 1147630200, 
1147630500, 1147630800, 1147631100, 1147631400, 1147631700, 1147632000, 
1147632300, 1147632600, 1147632900, 1147633200, 1147633500, 1147633800, 
1147634100, 1147634400, 1147634700, 1147635000, 1147635300, 1147635600, 
1147635900, 1147636200, 1147636500, 1147636800, 1147637100, 1147637400, 
1147637700, 1147638000, 1147638300, 1147638600, 1147638900, 1147639200, 
1147639500, 1147639800, 1147640100, 1147640400, 1147640700, 1147641000, 
1147641300, 1147641600, 1147641900, 1147642200, 1147642500, 1147642800, 
1147643100, 1147643400, 1147643700, 1147644000, 1147644300, 1147644600, 
1147644900, 1147645200, 1147645500, 1147645800, 1147646100, 1147646400, 
1147646700, 1147647000, 1147647300, 1147647600, 1147647900, 1147648200, 
1147648500, 1147648800, 1147649100, 1147649400, 1147649700, 1147650000, 
1147650300, 1147650600, 1147650900, 1147651200, 1147651500, 1147651800, 
1147652100, 1147652400, 1147652700, 1147653000, 1147653300, 1147653600, 
1147653900, 1147654200, 1147654500, 1147654800, 1147655100, 1147655400, 
1147655700, 1147656000, 1147656300, 1147656600, 1147656900, 1147657200, 
1147657500, 1147657800, 1147658100, 1147658400, 1147658700, 1147659000, 
1147659300, 1147659600, 1147659900, 1147660200, 1147660500, 1147660800, 
1147661100, 1147661400, 1147661700, 1147662000, 1147662300, 1147662600, 
1147662900, 1147663200, 1147663500, 1147663800, 1147664100, 1147664400, 
1147664700, 1147665000, 1147665300, 1147665600, 1147665900, 1147666200, 
1147666500, 1147666800, 1147667100, 1147667400, 1147667700, 1147668000, 
1147668300, 1147668600, 1147668900, 1147669200, 1147669500, 1147669800, 
1147670100, 1147670400, 1147670700, 1147671000, 1147671300, 1147671600, 
1147671900, 1147672200, 1147672500, 1147672800, 1147673100, 1147673400, 
1147673700, 1147674000, 1147674300, 1147674600, 1147674900, 1147675200, 
1147675500, 1147675800, 1147676100, 1147676400, 1147676700, 1147677000, 
1147677300, 1147677600, 1147677900, 1147678200, 1147678500, 1147678800, 
1147679100, 1147679400, 1147679700, 1147680000, 1147680300, 1147680600, 
1147680900, 1147681200, 1147681500, 1147681800, 1147682100, 1147682400, 
1147682700, 1147683000, 1147683300, 1147683600, 1147683900, 1147684200, 
1147684500, 1147684800, 1147685100, 1147685400, 1147685700, 1147686000, 
1147686300, 1147686600, 1147686900, 1147687200, 1147687500, 1147687800, 
1147688100, 1147688400, 1147688700, 1147689000, 1147689300, 1147689600, 
1147689900, 1147690200, 1147690500, 1147690800, 1147691100, 1147691400, 
1147691700, 1147692000, 1147692300, 1147692600, 1147692900, 1147693200, 
1147693500, 1147693800, 1147694100, 1147694400, 1147694700, 1147695000, 
1147695300, 1147695600, 1147695900, 1147696200, 1147696500, 1147696800, 
1147697100, 1147697400, 1147697700, 1147698000, 1147698300, 1147698600, 
1147698900, 1147699200, 1147699500, 1147699800, 1147700100, 1147700400, 
1147700700, 1147701000, 1147701300, 1147701600, 1147701900, 1147702200, 
1147702500, 1147702800, 1147703100, 1147703400, 1147703700, 1147704000, 
1147704300, 1147704600, 1147704900, 1147705200, 1147705500, 1147705800, 
1147706100, 1147706400, 1147706700, 1147707000, 1147707300, 1147707600, 
1147707900, 1147708200, 1147708500, 1147708800, 1147709100, 1147709400, 
1147709700, 1147710000, 1147710300, 1147710600, 1147710900, 1147711200, 
1147711500, 1147711800, 1147712100, 1147712400, 1147712700, 1147713000, 
1147713300, 1147713600, 1147713900, 1147714200, 1147714500, 1147714800, 
1147715100, 1147715400, 1147715700, 1147716000, 1147716300, 1147716600, 
1147716900, 1147717200, 1147717500, 1147717800, 1147718100, 1147718400, 
1147718700, 1147719000, 1147719300, 1147719600, 1147719900, 1147720200, 
1147720500, 1147720800, 1147721100, 1147721400, 1147721700, 1147722000, 
1147722300, 1147722600, 1147722900, 1147723200, 1147723500, 1147723800, 
1147724100, 1147724400, 1147724700, 1147725000, 1147725300, 1147725600, 
1147725900, 1147726200, 1147726500, 1147726800, 1147727100, 1147727400, 
1147727700, 1147728000, 1147728300, 1147728600, 1147728900, 1147729200, 
1147729500, 1147729800, 1147730100, 1147730400, 1147730700, 1147731000, 
1147731300, 1147731600, 1147731900, 1147732200, 1147732500, 1147732800, 
1147733100, 1147733400, 1147733700, 1147734000, 1147734300, 1147734600, 
1147734900, 1147735200, 1147735500, 1147735800, 1147736100, 1147736400, 
1147736700, 1147737000, 1147737300, 1147737600, 1147737900, 1147738200, 
1147738500, 1147738800, 1147739100, 1147739400, 1147739700, 1147740000, 
1147740300, 1147740600, 1147740900, 1147741200, 1147741500, 1147741800, 
1147742100, 1147742400, 1147742700, 1147743000, 1147743300, 1147743600, 
1147743900, 1147744200, 1147744500, 1147744800, 1147745100, 1147745400, 
1147745700, 1147746000, 1147746300, 1147746600, 1147746900, 1147747200, 
1147747500, 1147747800, 1147748100, 1147748400, 1147748700, 1147749000, 
1147749300, 1147749600, 1147749900, 1147750200, 1147750500, 1147750800, 
1147751100, 1147751400, 1147751700, 1147752000, 1147752300, 1147752600, 
1147752900, 1147753200, 1147753500, 1147753800, 1147754100, 1147754400, 
1147754700, 1147755000, 1147755300, 1147755600, 1147755900, 1147756200, 
1147756500, 1147756800, 1147757100, 1147757400, 1147757700, 1147758000, 
1147758300, 1147758600, 1147758900, 1147759200, 1147759500, 1147759800, 
1147760100, 1147760400, 1147760700, 1147761000, 1147761300, 1147761600, 
1147761900, 1147762200, 1147762500, 1147762800, 1147763100, 1147763400, 
1147763700, 1147764000, 1147764300, 1147764600, 1147764900, 1147765200, 
1147765500, 1147765800, 1147766100, 1147766400, 1147766700, 1147767000, 
1147767300, 1147767600, 1147767900, 1147768200, 1147768500, 1147768800, 
1147769100, 1147769400, 1147769700, 1147770000, 1147770300, 1147770600, 
1147770900, 1147771200, 1147771500, 1147771800, 1147772100, 1147772400, 
1147772700, 1147773000, 1147773300, 1147773600, 1147773900, 1147774200, 
1147774500, 1147774800, 1147775100, 1147775400, 1147775700, 1147776000, 
1147776300, 1147776600, 1147776900, 1147777200, 1147777500, 1147777800, 
1147778100, 1147778400, 1147778700, 1147779000, 1147779300, 1147779600, 
1147779900, 1147780200, 1147780500, 1147780800, 1147781100, 1147781400, 
1147781700, 1147782000, 1147782300, 1147782600, 1147782900, 1147783200, 
1147783500, 1147783800, 1147784100, 1147784400, 1147784700, 1147785000, 
1147785300, 1147785600, 1147785900, 1147786200, 1147786500, 1147786800, 
1147787100, 1147787400, 1147787700, 1147788000, 1147788300, 1147788600, 
1147788900, 1147789200, 1147789500, 1147789800, 1147790100, 1147790400, 
1147790700, 1147791000, 1147791300, 1147791600, 1147791900, 1147792200, 
1147792500, 1147792800, 1147793100, 1147793400, 1147793700, 1147794000, 
1147794300, 1147794600, 1147794900, 1147795200, 1147795500, 1147795800, 
1147796100, 1147796400, 1147796700, 1147797000, 1147797300, 1147797600, 
1147797900, 1147798200, 1147798500, 1147798800, 1147799100, 1147799400, 
1147799700, 1147800000, 1147800300, 1147800600, 1147800900, 1147801200, 
1147801500), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    precip = c(2.02, 2.18, 1.09, 0.29, 0.28, 0.03, 0.04, 0.07, 
    0.23, 0.43, 0.65, 0.69, 0.68, 0.73, 1.2, 1.5, 1.89, 1.5, 
    1.12, 1.12, 0.52, 0.69, 1.16, 1.36, 1.22, 1.59, 1.58, 1.46, 
    1.42, 0.84, 0.28, 0.18, 0.28, 0.5, 0.5, 0.43, 0.28, 0.1, 
    0.01, 0, 0.06, 0.06, 0.02, 0.02, 0.08, 0.07, 0.07, 0.04, 
    0.04, 0.12, 0.31, 0.35, 0.41, 0.3, 0.3, 0.38, 0.53, 0.41, 
    0.2, 0.26, 0.26, 0.27, 0.27, 0.37, 0.35, 0.55, 0.32, 0.31, 
    0.67, 0.82, 1.08, 1.35, 1.78, 2.35, 2.31, 2.09, 2.1, 1.92, 
    1.89, 1.57, 1.3, 0.99, 1.04, 1.69, 1.69, 2.67, 3.4, 4.8, 
    4.93, 5.7, 6.66, 6.67, 6.83, 6.18, 5.62, 5.05, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 1.64, 1.69, 1.83, 1.96, 2.04, 2.05, 
    2.06, 1.88, 1.94, 2.07, 2.35, 2.64, 2.68, 3.09, 3.26, 2.32, 
    1.9, 1.72, 1.76, 3.45, 3.46, 2.65, 3.36, 2.75, 2.38, 1.67, 
    1.4, 1.38, 1.04, 0.81, 0.64, 0.79, 0.62, 0.42, 0.43, 0.38, 
    0.31, 0.24, 0.09, 0.07, 0.07, 0.1, 0.16, 0.17, 0.17, 0.14, 
    0.06, 0.06, 0.13, 0.32, 0.45, 0.49, 0.37, 0.28, 0.21, 0.21, 
    0.12, 0.11, 0.06, 0.03, 0.06, 0.06, 0.03, 0.05, 0, 0.09, 
    0.05, 0.03, 0.04, 0.04, 0.24, 0.33, 0.54, 0.74, 1.69, 2.21, 
    2.36, 2.38, 3.23, 2.05, 2.1, 1.88, 2.77, 2.47, 2.45, 2.72, 
    2.72, 2.79, 3.08, 3.19, 2.91, 2.94, 1.63, 1.55, 2.22, 1.43, 
    1.58, 1.13, 0.83, 0.81, 1.07, 1.63, 2.26, 2.32, 1.45, 0.84, 
    0.81, 0.8, 0.93, 0.73, 1.09, 1.13, 0.7, 0.69, 0.65, 0.61, 
    0.56, 0.4, 0.23, 0.05, 0.01, 0.01, 0.04, 0.09, 0.12, 0.18, 
    0.48, 0.58, 0.61, 0.78, 0.56, 0.32, 0.1, 0.1, 0.09, 0.18, 
    0.44, 0.8, 0.94, 0.73, 0.51, 0.5, 0.2, 0.25, 0.28, 0.23, 
    0.13, 0.13, 0.21, 0.22, 0.25, 0.25, 0.15, 0.01, 0.01, 0.02, 
    0.02, 0.04, 0.02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.01, 0, 0.01, 
    0.01, 0.02, 0.02, 0.03, 0.03, 0.05, 0.03, 0.01, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03, 
    0.08, 0.17, 0.15, 0.07, 0.03, 0.03, 0.03, 0.04, 0.14, 0.29, 
    0.42, 0.62, 0.51, 0.38, 0.35, 0.26, 0.14, 0.14, 0.18, 0.19, 
    0.26, 0.18, 0.2, 0.21, 0.11, 0.14, 0.24, 0.26, 0.39, 0.44, 
    0.43, 0.48, 0.51, 0.53, 0.51, 0.94, 1.25, 1.1, 1.05, 0.56, 
    0.13, 0.02, 0.03, 0.13, 0.36, 0.36, 0.4, 0.46, 0.57, 0.31, 
    0.09, 0.09, 0.05, 0.05, 0.19, 0.09, 0.05, 0.23, 0.25, 0.16, 
    0.18, 0.17, 0.31, 0.57, 0.64, 0.77, 1.03, 1.05, 1.18, 0.98, 
    0.75, 0.36, 0.2, 0.12, 0.15, 0.15, 0.16, 0.15, 0.08, 0.03, 
    0.03, 0.02, 0.02, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0.01, 0.03, 0.03, 0.01, 0.01, 0, 0.01, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.01, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0.03, 0.07, 0.14, 0.21, 0.31, 
    0.39, 0.6, 0.6, 0.56, 0.48, 0.5, 0.51, 0.52, 0.74, 0.55, 
    0.47, 0.38, 0.39, 0.54, 0.58, 0.65, 0.85, 0.98, 0.85, 0.65, 
    0.62, 0.48, 0.39, 0.33, 0.31, 0.58, 0.59, 0.77, 1.07, 2.57, 
    3.06, 2.94, 1.32, 1.24, 1.13, 0.91, 1.11, 0.4, 0.05, 0.02, 
    0.01, 0, 0, 0, 0.07, 0.09, 0.01, 0, 0, 0, 0, 0, 0.01, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0.01, 0, 0, 0, 0, 0, 0, 0, 0.3, 0.44, 0.41, 0.2, 0.19, 
    0.08, 0.03, 0.05, 0.05, 0.02, 0.04, 0.01, 0.02, 0, 0.01, 
    0.02, 0.01, 0.04, 0.06, 0.06, 0.13, 0.12, 0.15, 0.23, 0.18, 
    0.24, 0.36, 0.41, 0.4, 0.21, 0.19, 0.23, 0.05, 0.35, 0.35, 
    0.78, 0.88, 0.77, 0.88, 0.9, 0.89, 0.79, 0.55, 0.41, 0.37, 
    0.25, 0.06, 0.06, 0, 0.03, 0.03, 0.14, 0.16, 0.34, 0.43, 
    0.54, 0.61, 0.57, 0.63, 0.59, 0.37, 0.38, 0.41, 0.55, 1.05, 
    2.38, 2.33, 2.46, 3, 2.99, 3.54, 4, 4.23, 4.25, 4.51, 4.44, 
    4.39, 3.93, 3.5, 3.2, 3.1, 2.66, 1.93, 0.83, 0.83, 0.77, 
    1.07, 1.89, 1.77, 1.04, 0.87, 0.86, 0.52, 0.44, 0.35, 0.3, 
    0.29, 0.23, 0.23, 0.15, 0.11, 0.15, 0.18, 0.22, 0.2, 0.21, 
    0.22, 0.34, 0.55, 0.69, 1.01, 1.3, 1.47, 0.63, 1.48, 2.05, 
    2.96, 3.77, 3.7, 3.18, 3.12, 1.67, 1.29, 1.22, 1.33, 1.55, 
    1.55, 1.88, 1.89, 1.98, 2.28, 2.53, 2.38, 2.78, 2.79, 3.11, 
    3.56, 3.2, 3.36, 2.84, 2.44, 2.48, 2.37, 2.37, 1.76, 2.36, 
    2.69, 2.53, 2.51, 1.86, 1.26, 1.27, 1.65, 1.62)), .Names = c("time", 
"precip"), row.names = 459121:459862, class = "data.frame")

4 个答案:

答案 0 :(得分:7)

使用data.table。浮点数的平等测试很混乱,所以我只是使用了一个阈值:

library(data.table)
setDT(DF)

res = DF[, if (precip[1] < .001 & .N >= 5) 
  .(
    start = .I[1], 
    end = .I[.N]
  )
, by=rleid(precip < .001)]


   rleid start end
1:     4    97 108
2:     8   268 295
3:    12   307 326
4:    14   411 429
5:    18   437 450
6:    20   452 463
7:    24   516 520
8:    26   522 580
9:    28   582 588

它是如何工作的。来自data.table包的 rleid正在进行繁重的工作;见?rleid.I是行号; .Nby=组中的行数。有关常规data.table介绍材料,请参阅the package vignettes

Dplyr analogue The conventional way to use rleid in dplyr只是借用它:

# need data.table installed
library(dplyr)
DF %>% mutate(rn = row_number()) %>% 
  group_by(g = data.table::rleid(precip < .001)) %>% 
  filter(precip[1] < .001, n() >= 5) %>% 
  summarise(start = first(rn), end = last(rn))


# A tibble: 9 x 3
      g start   end
  <int> <int> <int>
1     4    97   108
2     8   268   295
3    12   307   326
4    14   411   429
5    18   437   450
6    20   452   463
7    24   516   520
8    26   522   580
9    28   582   588

答案 1 :(得分:3)

使用dplyr,

library(dplyr)

# calculate run lengths for whether precip is 0; use a tolerance if you like
rle(df$precip == 0) %>% 
    unclass() %>%    # convert rle object to normal list so...
    data.frame() %>%    # ...it can become a data.frame for dplyr
    mutate(stop = cumsum(lengths),    # stopping indices are just a cumulative sum
           start = stop - lengths + 1L) %>%    # subtract run length to get starting indices (h/t @Jota)
    filter(values, lengths >= 5)    # filter to rows which are TRUE (0) and are 5 or longer

##   lengths values stop start
## 1      12   TRUE  108    97
## 2      28   TRUE  295   268
## 3      20   TRUE  326   307
## 4      19   TRUE  429   411
## 5      14   TRUE  450   437
## 6      12   TRUE  463   452
## 7       5   TRUE  520   516
## 8      59   TRUE  580   522
## 9       7   TRUE  588   582

由于在stop之前计算start似乎更简单,因此列将按此排序。如果您想按顺序使用它们,只需点击%>% select(start, stop)

答案 2 :(得分:0)

这不是 Attempt6: sqlUpdate(con,MyDF,tablename="[database2].[dbo].[MyTable]") Error in odbcTableExists(channel, tablename) : ‘[database2].[dbo].[MyTable]’: table not found on channel Attempt7: sqlSave(con,MyDF,tablename="MyTable",fast=F,append=F) 解决方案,并且不如@Frank优雅,但这里有一个使用base和regex:

dplyr

答案 3 :(得分:0)

在基数R(其中d是您的数据)中:

rl <- rle(d$precip)$lengths
x <- which(rl>=5)
r <- data.frame(start=cumsum(rl)[x-1]+1, end=cumsum(rl)[x])

# start end
# 1    97 108
# 2   268 295
# 3   307 326
# 4   411 429
# 5   437 450
# 6   452 463
# 7   516 520
# 8   522 580
# 9   582 588