R唯一值计数

时间:2014-08-05 14:46:07

标签: r count unique

我有一个包含4列的data.frame。第2列具有每个人的唯一ID(" Cofecha"),第4列具有给定个人所属的图(" Plot")。 data.frame中每个人都有多次出现。我试图做两件事:(1)获得独特个体的数量,然后(2)找出每个情节中有多少独特个体出现。我可以找到总独特个体的数量(摘录中有4个),但我无法弄清楚如何计算每个情节的个体数量。任何帮助将不胜感激!

摘自原始data.frame:

  

dx1< - structure(list(Year = c(1920L,1921L,1921L,1922L,1922L,1923L,   1923L,1924L,1924L,1924L,1925L,1925L,1925L,1926L,1926L,   1926L,1927L,1927L,1927L,1927L,1928L,1928L,1928L,1928L,   1929L),Cofecha =结构(c(69L,166L,69L,166L,69L,166L,   69L,166L,69L,50L,166L,69L,50L,166L,69L,50L,166L,232L,   69L,50L,166L,232L,69L,50L,166L),.标签= c(" LB1A002",   " LB1A003"," LB1A101"," LB1A102"," LB1A103"," LB1A212"," LB1A228& #34 ;,   " LB1A231"," LB1A233"," LB1B001"," LB1B002"," LB1B003"," LB1B210& #34 ;,   " LB1B216"," LB2A001"," LB2A002"," LB2A003"," LB2A004"," LB2A008& #34 ;,   " LB2A009"," LB2A011"," LB2B001"," LB2B005"," LB2B008"," LB2B101& #34 ;,   " LB2B102"," LB2B103"," LB2C003"," LB2C004"," LB2C008"," LB2C009& #34 ;,   " LB2C010"," LB2C001"," LB2D005"," LB2D006"," LB2D007"," LB2D008& #34 ;,   " LB2D009"," LB2D010"," LB2D101"," SM1A005"," SM1A101"," SM1A301& #34 ;,   " SM1A302"," SM1B003"," SM1C005"," SM1C302"," SM1D006"," SM2A004& #34 ;,   " SM2A005"," SM2A007"," SM2A210"," SM2A301"," SM2B001"," SM2B005& #34 ;,   " SM2B006"," SM2B101"," SM2C005"," SM2C101"," SM2C301"," SM2D006& #34 ;,   " SM2D101"," SM2D221"," IR1A004"," IR1A009"," IR1A206"," IR1B001& #34 ;,   " IR1B004"," IR1B005"," IR1B301"," IR1B302"," IR1C005"," IR1C006& #34 ;,   " IR1C007"," IR1C008"," IR1C204"," IR1C205"," IR1D002"," IR1D101& #34 ;,   " IR2A003"," IR2A101"," IR2A211"," IR2A234"," IR2B002"," IR2B005& #34 ;,   " IR2B101"," IR2B201"," IR2B210"," IR2B229"," IR2C230"," IR2C256& #34 ;,   " IR2C301"," IR2C302"," IR2C002"," IR2C009"," IR2C101"," IR2C204& #34 ;,   " IR2C215"," IR2D227"," IR2D228"," IR2D237"," IR2D254"," IR2D301& #34 ;,   " IR2D302"," IR2D003"," IR2D006"," IR2D009"," IR2D011"," IR2D207& #34 ;,   " IR2D216"," JA1A101"," JA1A224"," JA1A301"," JA1B004"," JA1B101& #34 ;,   " JA1B102"," JA1B219"," JA1B233"," JA1C002"," JA1C232"," JA1D001& #34 ;,   " JA1D101"," JA2A101"," JA2A102"," JA2A206"," JA2A209"," JA2A210& #34 ;,   " JA2A004"," JA2A005"," JA2A006"," JA2A007"," JA2A008"," JA2B005& #34 ;,   " JA2B206"," JA2C001"," JA2C002"," JA2C007"," JA2C101"," JA2C202& #34 ;,   " JA3N007"," JA3N008"," JA3N009"," JA3N010"," JA3N011"," JA3N012& #34 ;,   " JA3N001"," JA3N002"," JA3N003"," JA3N004"," JA3N005"," JA3N006& #34 ;,   " SF5A007"," SF5B223"," SF5B227"," SF5B228"," SF5B301"," SF5B302& #34 ;,   " SF5C201"," SF5C214"," SF5C216"," SF5C301"," SF5C303"," SF5D004& #34 ;,   " SF5D101"," SF5D207"," AP1A001"," AP1A004"," AP1A005"," AP1A006& #34 ;,   " AP1A008"," AP1A009"," AP1A010"," AP1A101"," AP1B005"," AP1B007& #34 ;,   " AP1B011"," AP1B101"," AP1B102"," AP1C006"," AP1C007"," AP1C010& #34 ;,   " AP1C011"," AP1C001"," AP1C002"," AP1D001"," AP1D005"," AP1D007& #34 ;,   " AP1D008"," AP1D009"," AP1D010"," AP1D011"," AP1D012"," AP1D013& #34 ;,   " AP1D101"," AP1D102"," AP1D103"," AP1D104"," AP1C004"," AP1C005& #34 ;,   " AP2A001"," AP2A002"," AP2A003"," AP2B001"," AP2B003"," AP2B004& #34 ;,   " AP2B101"," AP2B102"," AP2C001"," AP2C002"," AP2C003"," AP2C004& #34 ;,   " AP2C005"," AP2C007"," AP2C008"," AP2C102"," AP2C103"," AP2C104& #34 ;,   " AP2D001"," AP2D002"," AP2D005"," AP2D006"," AP2D009"," AP2D101& #34 ;,   " AP2D102"," AP2D103"," AP3A003"," AP3A005"," AP3A008"," AP3A014& #34 ;,   " AP3A015"," AP3A101"," AP3A102"," AP3B101"," AP3B102"," AP3B103& #34 ;,   " AP3B104"," AP3B003"," AP3B007"," AP3B010"," AP3B012"," AP3C003& #34 ;,   " AP3C004"," AP3C006"," AP3C007"," AP3C009"," AP3C011"," AP3C101& #34 ;,   " AP3C102"," AP3C103"," AP3C104"," AP3C105"," AP3D006"," AP3D011& #34 ;,   " AP3D101"," AP3D102"," BF1A101"," BF1A102"," BF1A103"," BF1A104& #34 ;,   " BF1B003"," BF1B005"," BF1B006"," BF1B007"," BF1B101"," BF1C007& #34 ;,   " BF1C101"," BF1C102"," BF1D003"," BF1D007"," BF1D010"," BF1D101& #34 ;,   " BF1D102"," BF1D103"," BF1D210"," BF2A001"," BF2A002"," BF2B001& #34 ;,   " BF2B214"," BF2B219"," BF2C001"," BF2C004"," BF2C008"," BF2C101& #34 ;,   " BF2C102"," BF2C201"," BF2C205"," BF2C213"," BF2C219"," BF2C301& #34 ;,   " BF2D004"," BF2D013"," BF2D014"," BF2D015"," BF3A001"," BF3A002& #34 ;,   " BF3A004"," BF3A005"," BF3A007"," BF3A008"," BF3A009"," BF3A101& #34 ;,   " BF3B003"," BF3B101"," BF3C002"," BF3C003"," BF3C007"," BF3C009& #34 ;,   " BF3C010"," BF3D002"," BF3D003"," BF3D004"," BF3D009"," BF3D010& #34;   ),class =" factor"),AvgBaiTenyr = c(3.1292,2.3011,3.07395,   2.374,3.4236,2.34095,3.50005,2.3903,3.68825,2.2265,2.3547575,   3.69255,2.3487,2.417,3.57705,2.32715,2.39665,2.6338,3.433,   2.2573,2.37235,2.6384,3.496335,2.28685,2.26055),Plot = c(" IR1",   " AP1"," IR1"," AP1"," IR1"," AP1"," IR1& #34;," AP1"," IR1"," SM2",   " AP1"," IR1"," SM2"," AP1"," IR1"," SM2& #34;," AP1"," AP3"," IR1",   " SM2"," AP1"," AP3"," IR1"," SM2"," AP1& #34;)),. Name = c("年",   " Cofecha"," AvgBaiTenyr"," Plot"),row.names = c(323L,326L,   331L,335L,341L,345L,351L,355L,361L,365L,366L,372L,376L,   377L,383L,387L,388L,391L,396L,400L,401L,404L,409L,413L,   414L),class =" data.frame")

用于查找独特个体的代码:

  

dx2< - (unique(dx1 $ Cofecha))

我也试过" table"但它只给了我每个人出现多少次的计数,而不是每个情节发生了多少独特个体:

  

表(DX1 $ Cofecha)

有没有办法根据名字的前3个字符计算每个人?前3个字符相当于绘图ID。

3 个答案:

答案 0 :(得分:2)

你可以尝试

with(dx1, tapply(as.character(Cofecha), list(Plot), FUN=function(x) length(unique(x))))
#    AP1 AP3 IR1 SM2 
#     1   1   1   1 

或者

library(data.table)
setDT(dx1)[, list(UniqueIDs= length(unique(Cofecha))), by=Plot]
#      Plot UniqueIDs
#   1:  IR1         1
#   2:  AP1         1
#   3:  SM2         1
#   4:  AP3         1

答案 1 :(得分:1)

使用子字符串:

table(substr(dx1$Cofecha, 1,3))

给了我

AP1 AP3 IR1 SM2 
  9   2   9   5

用独特的方式计算每一次:

table(substr(unique(dx1$Cofecha), 1,3))
# AP1 AP3 IR1 SM2 
#   1   1   1   1

答案 2 :(得分:1)

这是另一种方法:

library(dplyr)
dx1 %>%
  group_by(Plot) %>%
  summarize(distint_IDs = n_distinct(Cofecha))

#Source: local data frame [4 x 2]
#
#  Plot distint_IDs
#1  AP1           1
#2  AP3           1
#3  IR1           1
#4  SM2           1

还有一种使用基础R的可能性:

unlist(lapply(split(dx1, dx1$Plot), function(x) length(unique(x$Cofecha))))
#AP1 AP3 IR1 SM2 
#  1   1   1   1