我想计算每个ID的零和一次的次数。我有一个包含> 500个唯一ID的列,每个ID都有不同的次数,出现零和1。谢谢!
我在R中使用for循环
答案 0 :(得分:2)
我想这样的事情可以帮到你:
#Example dataframe
dummy=data.frame(ID=c(10101,11110101,11111))
#Separate every character in ID column
Sepdummy=strsplit(as.character(dummy$ID), split="")
#Count how many times a value is repeated
dummy$Zeroes=unlist(lapply(Sepdummy, function(x) sum(as.numeric(x)==0)))
dummy$Ones=unlist(lapply(Sepdummy, function(x) sum(as.numeric(x)==1)))
输出如下:
ID Zeroes Ones
10101 2 3
11110101 2 6
11111 0 5
如果您的ID不是数字,则上述操作无效。为此,您可以使用str_count()
包中的stringr
(如本文其他地方所述):
library(stringr)
#Example dataframe
dummy=data.frame(ID=c(10101,11110101,11111,"asd0110001df"))
#Count using str_count and add the results to the original dummy dataframe, so the results are all viewed in the same table.
dummy$Zeroes=str_count(dummy$ID, "0")
dummy$Ones=str_count(dummy$ID, "1")
答案 1 :(得分:2)
在库 stringr 中,您可以使用函数 str_count(),它可以计算字符串中字符的出现次数。
library(stringr)
str_count("abracadabra", "a") # return 5
str_count("0010110", "0") # return 4
str_count("001d021", "0|1") # return 5
str_count(c("001", "123", "salut")) # return (3, 1, 0)
答案 2 :(得分:1)
另类猜测,也许您的数据框看起来像这样?
library(dplyr)
set.seed(1)
data.df <- data.frame(id=c(rep(1,10),rep(2,10)), value=rbinom(20,1,.5))
count.df <- data.df%>%group_by(id)%>%summarize(ones=sum(value==1),zeros=sum(value==0))%>%ungroup()%>%as.data.frame
答案 3 :(得分:1)
使用上面给出的矢量:
将vect转换为可行的数据帧:
data=data.frame(matrix(vect,,2,byrow=T))
with(data,table(ID,Treatment))
Treatment
ID 0 1
100a002 16 8
100a003 18 6
数据:
data=read.table(text=" ID Treatment
100a002 1
100a002 0
100a002 0
100a002 0
100a002 1
100a002 1
100a002 1
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 0
100a002 1
100a002 1
100a002 1
100a002 1
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 0
100a003 1
100a003 1
100a003 1
100a003 1
100a003 1
100a003 1",h=T,stringsAsFactors=F)
答案 4 :(得分:0)
最有效的方法
dummy<- data.frame(id=c(rep(1,10),rep(2,10)), value=rbinom(20,1,.5))
library(data.table)
setDT(dummy)[, list(count_of_one = length(which(value==1)),count_of_zeroes = length(which(value==0))), by = id]
输出
id count_of_one count_of_zeroes
1: 1 5 5
2: 2 6 4