使用R

时间:2019-04-05 15:46:49

标签: r dataframe dplyr

我有这个样本数据集(df

structure(list(from = c("(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452", "(122) 212-3452", 
"(122) 212-3452", "(122) 212-3452", "(122) 212-3452"), to = c("(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542", "(700) 890-6542", "(700) 890-6542", "(700) 890-6542", 
"(700) 890-6542"), extension = c("9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls", "9205 - Raw IRS Calls", "9205 - Raw IRS Calls", 
"9205 - Raw IRS Calls"), forwarded_to = c(NA, "(323) 457-2039", 
"(310) 598-1753", "(818) 900-0706", "(818) 539-7811", "(213) 344-4965", 
"(213) 458-7662", "(818) 208-3012", "(818) 293-0175", "(818) 284-4238", 
"(818) 206-9506", "(310) 299-7340", "(310) 405-0875", "(213) 260-9113", 
"(213) 805-5208", "(818) 887-3058", "(424) 271-2141", "(213) 218-6579", 
"(818) 638-9466", "(213) 784-7164", "(323) 457-2038", "(213) 805-6959", 
"(228) 285-7898", "(213) 341-1055", "(213) 568-0979", "(213) 344-4905", 
"(818) 459-3811", NA, "(323) 457-2039", "(310) 598-1753", "(818) 900-0706", 
"(818) 539-7811", "(213) 344-4965", "(213) 458-7662", "(818) 208-3012", 
"(818) 293-0175", "(818) 284-4238", "(818) 206-9506", "(310) 299-7340", 
"(310) 405-0875", "(213) 260-9113", "(213) 805-5208", "(818) 887-3058", 
"(424) 271-2141", "(213) 218-6579", "(818) 638-9466", "(213) 784-7164", 
"(323) 457-2038", "(213) 805-6959", "(228) 285-7898", "(213) 341-1055", 
"(213) 568-0979", "(213) 344-4905", "(818) 459-3811"), date = c("Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", "Mon 12/31/2018", 
"Mon 12/31/2018"), time = structure(c(55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 55440, 
55440, 55440, 55440, 55440, 55440, 55440, 55440, 55500, 55500, 
55500, 55500, 55500, 55500, 55500, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 55320, 
55320, 55320, 55320, 55320, 55320, 55320, 55320, 55380, 55380, 
55380, 55380, 55380, 55380, 55380), class = c("hms", "difftime"
), units = "secs"), action = c("Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "Phone Call", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", "FindMe", 
"FindMe", "FindMe", "FindMe"), action_result = c("Accepted", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "IP Phone Offline", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "Stopped", 
"Stopped", "IP Phone Offline", "Stopped", "Stopped", "Call connected", 
"Stopped", "Accepted", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"IP Phone Offline", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "No Answer", "No Answer", "No Answer", "No Answer", 
"No Answer", "Stopped", "Stopped", "IP Phone Offline", "Call connected", 
"Stopped", "Stopped", "Stopped"), result_description = c("The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
NA, NA, "The call connected to and was accepted by this number.", 
NA, "The call connected to and was accepted by this number.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"This DigitalLine was either not plugged in or did not have an internet connection.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
"The phone number you dialed was not answered.", "The phone number you dialed was not answered.", 
NA, NA, "This DigitalLine was either not plugged in or did not have an internet connection.", 
"The call connected to and was accepted by this number.", NA, 
NA, NA), duration = structure(c(297, 52, 52, 51, 51, 51, 51, 
51, 51, 0, 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, 9, 9, 0, 9, 
9, 236, 9, 71, 52, 52, 52, 51, 51, 51, 51, 51, 0, 51, 51, 51, 
51, 51, 51, 51, 51, 51, 51, 7, 7, 0, 13, 7, 7, 7), class = c("hms", 
"difftime"), units = "secs"), ID = c(19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L, 19L), CallConnected = c(FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, 
FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE, FALSE), who_answered = c("NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "(213) 344-4905", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", 
"NA", "NA", "NA", "NA", "NA", "NA", "NA", "(213) 341-1055", "NA", 
"NA", "NA")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-54L))

这显示了拨打电话和接听电话的电话号码的通话数据。 完整的数据集包含更多的数字对,但是为了简洁起见,我只发布了一对数字

我想在每对数字中找到TRUECallConnected的数量,who_answered中的第一个电话号码,以及与{{1 }}。我曾考虑过使用Call Connectedgroup_by,但我想不出办法。

所需的输出如下:

summarise

4 个答案:

答案 0 :(得分:2)

我认为您已经接近解决方案。这是我的版本:

df %>%
  group_by(from, to) %>%
  filter(CallConnected) %>%
  summarise(CallConnected = n(),
            WhoAnswered = first(who_answered),
            Duration = first(duration))

答案 1 :(得分:2)

使用dplyr,我们可以通过取CallConnected值中的sum来计算TRUE的数量,找到非“ NA” who_answered列的第一个值以及TRUE中的第一个duration值。

library(dplyr)

df %>%
  group_by(from, to) %>%
  summarise(count_call = sum(CallConnected), 
            who_answered = who_answered[which.max(who_answered != "NA")],
            Duration = duration[which.max(CallConnected)])


#  from           to             count_call who_answered   Duration
#  <chr>          <chr>               <int> <chr>          <time>  
#1 (122) 212-3452 (700) 890-6542          2 (213) 344-4905 03'56" 

答案 2 :(得分:1)

也许是这样吗?

output <- df %>%
  filter(CallConnected == TRUE) %>%
  group_by(from, to) %>%
  summarise(duration = sum(duration), firstWhoAnswered = first(who_answered), CallConnected = n())

答案 3 :(得分:0)

您也可以这样做-

buildscript {
    ext {
        springBootVersion = '2.1.3.RELEASE'
    }
    repositories {
        mavenCentral()
    }
    dependencies {
        classpath("org.springframework.boot:spring-boot-gradle-plugin:${springBootVersion}")
    }
}

apply from: new File(rootDir.parentFile, 'buildscripts/publish.gradle')
apply from: new File(rootDir.parentFile, 'buildscripts/coverage.gradle')

apply plugin: 'java'
apply plugin: 'org.springframework.boot'
apply plugin: 'io.spring.dependency-management'

group = 'com.app.libs.messaging'
version = '0.0.3'
sourceCompatibility = 1.8
targetCompatibility = 1.8

repositories {
    mavenCentral()
    maven { url "https://repo.spring.io/snapshot" }
    maven { url "https://repo.spring.io/milestone" }
}

ext {
    springCloudVersion = 'Greenwich.SR1'
    jacksonVersion = '2.9.6'
    lombokVersion = '1.16.18'
    jooqVersion = '3.11.2'
    flywayVersion = '5.2.1'
}

dependencies {

    compile 'org.aspectj:aspectjweaver'
    compile 'org.springframework:spring-tx'
    compile 'org.springframework.cloud:spring-cloud-stream'
    compile 'org.springframework.cloud:spring-cloud-stream-binder-kafka'
    compile 'org.apache.commons:commons-lang3:3.5'
    compile 'org.javassist:javassist:3.22.0-GA'
    compile "org.flywaydb:flyway-core:${flywayVersion}"

    compileOnly "org.jooq:jooq:${jooqVersion}"
    compileOnly "org.projectlombok:lombok:${lombokVersion}"

    testCompileOnly "org.projectlombok:lombok:${lombokVersion}"

    testCompile 'org.springframework.boot:spring-boot-starter-test'
    testCompile 'org.springframework.boot:spring-boot-starter-json'
    testCompile 'org.springframework.cloud:spring-cloud-stream-test-support'

    testCompile "com.fasterxml.jackson.core:jackson-core:${jacksonVersion}"
    testCompile "com.fasterxml.jackson.core:jackson-databind:${jacksonVersion}"

    testCompile 'junit:junit:4.12'
    testCompile "org.jooq:jooq:${jooqVersion}"
    testCompile "org.flywaydb:flyway-core:${flywayVersion}"

    testCompile 'org.postgresql:postgresql:42.2.2'
    testCompile 'com.opentable.components:otj-pg-embedded:0.13.0'
    testCompile 'org.springframework:spring-jdbc'
}

dependencyManagement {
    // disable maven exclusion to enhance gradle import performance
    // https://github.com/spring-gradle-plugins/dependency-management-plugin/issues/153
    applyMavenExclusions = false

    imports {
        mavenBom "org.springframework.cloud:spring-cloud-dependencies:${springCloudVersion}"
    }
}

jar.enabled = true
bootJar.enabled = false