计算R

时间:2017-07-27 08:37:33

标签: r group-by missing-data

很抱歉,如果这个问题相当简单

我是R的新手,我希望按组计算some_column列中缺失值的数量,这些值在我的数据集中替换为0值,然后获取具有该值的组最多0个值。这样做(使用包dplyr):

missing_data <- group_by(some_data,some_group, count=sum(some_column==0))

但奇怪的是,我在count列中的整个数据集中都有相同的数字,就好像数据集没有分组一样。有人有想法

好的,我明白了

missing_data%&gt;%group_by(some_group)%&gt;%汇总(count = sum(some_column == 0))

3 个答案:

答案 0 :(得分:1)

保持dplyr动词:

missing_data <- filter(some_data, some_column == 0) %>%
  group_by(some_group) %>%       
  summarise(count = n()) %>%
  arrange(desc(count))

答案 1 :(得分:0)

这是一个使用mtcars dataframe

的示例
original output:
1   0   2   2   0   0   3   3   3   3   3   3   3   3   0
0   0   2   0   0   4   0   0   3   3   3   0   0   0   5
0   2   2   2   2   0   0   0   0   3   0   5   5   5   5
6   0   2   2   0   7   0   0   0   0   5   5   0   0   5
0   0   2   2   0   0   0   0   5   0   0   0   8   0   5
9   0   2   0   10  0   0   0   5   0   0   5   0   5   5
9   9   0   0   10  0   5   5   5   5   5   5   5   5   0
9   0   0   10  10  10  0   0   5   5   5   0   0   5   5
9   9   0   10  10  10  0   11  0   5   0   0   12  0   0
0   0   13  0   0   10  10  0   5   5   0   12  12  0   0

output that come:
1   0   2   2   0   0   3   3   3   3   3   3   3   3   0   
0   0   2   0   0   4   0   0   3   3   3   0   0   0   5   
0   2   2   2   2   0   0   0   0   3   0   5   5   5   5   
6   0   2   2   0   7   0   0   0   0   5   5   0   0   5   
0   0   2   2   0   0   0   0   5   0   0   0   9   0   5   
10   0   2   0   11   0   0   0   5   0   0   5   0   5   5   
10   10   0   0   11   0   5   5   5   5   5   5   5   5   0   
10   0   0   11   11   11   0   0   5   5   5   0   0   5   5   
10   10   0   11   11   11   0   12   0   5   0   0   13   0   0   
0   0   14   0   0   11   11   0   5   5   0   13   13   0   0   
package percolate;

public class Count
{
    int i,j,count=0;
    /*int[][] matrix = {  {1,1,1,0,0,0,0,1,1},
                        {1,1,1,1,0,1,1,1,1},
                        {0,1,1,0,0,1,1,0,0},
                        {1,1,0,1,0,0,1,1,0},
                        {1,1,1,0,1,0,1,1,0},
                        {0,0,1,1,0,0,1,1,1},
                        {0,1,1,1,1,1,0,0,0},
                        {0,0,0,1,0,0,1,1,1},
                        {1,0,0,1,0,1,1,1,0}};*/
    int[][] matrix = {
        {1  ,0  ,1  ,1  ,0  ,0  ,1  ,1  ,1  ,1  ,1  ,1  ,1  ,1  ,0},
        {0  ,0  ,1  ,0  ,0  ,1  ,0  ,0  ,1  ,1  ,1  ,0  ,0  ,0  ,1},
        {0  ,1  ,1  ,1  ,1  ,0  ,0  ,0  ,0  ,1  ,0  ,1  ,1  ,1, 1},
        {1  ,0  ,1  ,1  ,0  ,1  ,0  ,0  ,0  ,0  ,1  ,1, 0,  0,  1},
        {0  ,0  ,1  ,1  ,0  ,0  ,0  ,0  ,1  ,0  ,0, 0,  1,  0,  1},
        {1  ,0  ,1  ,0  ,1  ,0  ,0  ,0  ,1  ,0  ,0  ,1, 0   ,1, 1},
        {1  ,1  ,0  ,0  ,1  ,0  ,1  ,1  ,1  ,1  ,1  ,1, 1   ,1  ,0},
        {1  ,0  ,0  ,1  ,1  ,1  ,0  ,0  ,1  ,1  ,1  ,0, 0   ,1, 1},
        {1  ,1  ,0  ,1  ,1  ,1  ,0  ,1  ,0  ,1  ,0  ,0  ,1, 0,  0},
        {0  ,0  ,1  ,0  ,0  ,1  ,1  ,0  ,1  ,1  ,0  ,1, 1   ,0, 0},

};
    int row =10,col =15;
int[][] label = new int [row][col];

    private void operation(int i,int j)
    {
        // TODO Auto-generated method stub

        if(i==0 && j==0)
        {
            count=count+1;
            label[i][j]=count;
        }
        else if (((i-1)>=0) && j==0) 
        {
            left(i,j);  
        }
        else if (((j-1)>=0)&& i==0)
        {
            above(i,j);
        }
        else 
        {
        aboveleft(i,j); 
        }


    }
    private void check() 
    {
        // TODO Auto-generated method stub
        for (int i = 0; i < row; i++) 
        {
        for (int j = 0; j < col; j++)
        {
            if(matrix[i][j]==0)
            {
                label[i][j]=matrix[i][j];
            }
            else
            {
                operation(i,j);
            }
        }   
        }

    }
    private void left(int a,int b)
    {
        // TODO Auto-generated method stub
        if(matrix[a-1][b]!=0)
        {
            label[a][b]=label[a-1][b];
        }
        else
        {
            count=count+1;
            label[a][b]=count;
        }

    }
    private void above(int a,int b) 
    {
        // TODO Auto-generated method stub
          if (matrix[a][b-1]!=0)
          {
              label[a][b]=label[a][b-1];
          }
          else
          {
            count=count+1;
            label[a][b]=count;
          }
    }
    private void aboveleft(int a,int b)
    {
        // TODO Auto-generated method stub
        if (matrix[a][b-1]!=0 && matrix[a-1][b]==0)
        {
            label[a][b]=label[a][b-1];
        }
        else if (matrix[a-1][b]!=0 && matrix[a][b-1]==0)
        {
            label[a][b]=label[a-1][b];
        }
        else if (matrix[a][b-1]==0 && matrix[a-1][b]==0)
        {
            count=count+1;
            label[a][b]=count;
        }
        else
        {
            checklabel(a, b);
        }

    }
    private void checklabel(int a, int b)
    {
        // TODO Auto-generated method stub

        if(label[a-1][b]>label[a][b-1])
        {
            label[a][b]=label[a][b-1];
            int neww=label[a][b-1];
            int old=label[a-1][b];
            nonzero(old,neww);
            count=count-1;
        }
        else if (label[a-1][b]==label[a][b-1])
        {
            label[a][b]=label[a-1][b];

        }
        else
        {
            label[a][b]=label[a-1][b];
            int neww=label[a-1][b];
            int old=label[a][b-1];
            nonzero(old,neww);
            count=count-1;
        }

    }
    private void nonzero(int ol,int nw)
    {
        // TODO Auto-generated method stub
        for (int i = 0; i < row; i++) 
        {
            for (int j = 0; j < col; j++)
            {
                if (label[i][j]==ol)
                {
                    label[i][j]=nw;
                    backtrace(i,j);
                }
            }
        }
    }
    private void backtrace(int a,int b)
    {
        // TODO Auto-generated method stub
        for (int i = a; i < row; i++) 
        {
        for (int j = b; j < col; j++)
        {

        }   
        }

    }
    private void output() 
    {
        // TODO Auto-generated method stub
        for (int i = 0; i < row; i++)
        {
            for (int j = 0; j < col; j++)
            {
                System.out.print(label[i][j]+"   ");
            }
            System.out.println();
        }

    }
public static void main(String[] args) 
{
    Count a=new Count();
   a.check();
   a.output();
}
}

答案 2 :(得分:0)

这里终于得到了答案

missing_data %>% group_by(some_group) %>% summarise(count=sum(some_column==0)) %>% arrange(desc(count))