关于特定条件下group_by的总和

时间:2019-03-02 15:08:21

标签: r dataframe dplyr

如果要为组中特定条件的总和添加一列。

data <- data.frame(id=c(rep(1, 4), rep(2, 3), rep(3, 3), rep(4,4)), 
               condition=c(1, 1, 0, 0, 1, 1, 0, 1, 1, 0,1,1,0,0),
               count=c(1, 2, 0, 0, 1, 2, 0, 1, 2, 0,1,2,0,0), 
               firstexosure=c(1, 0, 0, 0, 1, 0, 0, 1, 0, 0,1,0,0,0), 
               outcome=c(0, 0, 0, 1, 0, 0, 1, 0, 0, 1,0,0,1,0), 
               time=c(100, 250, 220, 300, 240, 380, 150, 200, 320, 360,100,210,220,235) ) 




data<-data%>%group_by(id,condition)%>%summarise(sum= sum(time))

我想再增加一列。

data <- data.frame(id=c(rep(1, 4), rep(2, 3), rep(3, 3), rep(4,4)), 
               condition=c(1, 1, 0, 0, 1, 1, 0, 1, 1, 0,1,1,0,0),
               count=c(1, 2, 0, 0, 1, 2, 0, 1, 2, 0,1,2,0,0), 
               firstexosure=c(1, 0, 0, 0, 1, 0, 0, 1, 0, 0,1,0,0,0), 
               outcome=c(0, 0, 0, 1, 0, 0, 1, 0, 0, 1,0,0,1,0), 
               time=c(100, 250, 220, 300, 240, 380, 150, 200, 320, 360,100,210,220,235),
               sum=c(350, 350,520,520,620,620,150,520,520,360,310,310,455,455)) 

我怎么用R写这个?

1 个答案:

答案 0 :(得分:1)

许多方法可以做到这一点。

我们可以使用基数R。

#include <iostream>
#include <math.h>
#include <typeinfo>

struct X {
    static constexpr double eval(double x) { return x; };
};

template<class L, class R>
struct MULT {
    static constexpr double eval(double x) {
        return L::eval(x) * R::eval(x);
    }
};

template<class L, class R>
struct DIV {
    static constexpr double eval(double x) {
        return L::eval(x) / R::eval(x);
    }
};

template <bool is_int>
struct SELECT {
    typedef long type;
};

template <>
struct SELECT<true> {
    typedef int type;
};

template<class F, int X>
struct DEDUCTOR {
    typedef typename SELECT<-32768 < F::eval(X) && F::eval(X) < 32767>::type result;
};

int main() {
//    typedef MULT<MULT<X, X>, X> F; // works
    typedef DIV<DIV<X, X>, X> F; // doesn't work
    typedef DEDUCTOR<F, 0> deductor;

    std::cout << typeid(deductor::result).name() << std::endl;
    return 0;
}

或者我们可以使用data$sum <- ave(data$time, data$id, data$condition, FUN = sum) data # id condition count firstexosure outcome time sum # 1 1 1 1 1 0 100 350 # 2 1 1 2 0 0 250 350 # 3 1 0 0 0 0 220 520 # 4 1 0 0 0 1 300 520 # 5 2 1 1 1 0 240 620 # 6 2 1 2 0 0 380 620 # 7 2 0 0 0 1 150 150 # 8 3 1 1 1 0 200 520 # 9 3 1 2 0 0 320 520 # 10 3 0 0 0 1 360 360 # 11 4 1 1 1 0 100 310 # 12 4 1 2 0 0 210 310 # 13 4 0 0 0 1 220 455 # 14 4 0 0 0 0 235 455 包。

data.table