如何从行数据计算组均值?

时间:2019-03-27 03:17:36

标签: r

[我必须使用以下数据解决一些问题:

c(ages)
  [1] 29 29 19 25 22 29 24 23 28 33 30 21 22 27 32 25 25 23 33 22 31 32 26 27 23 27 16 21 18 17 27 23 29 26 25 27 26 29
 [39] 25 26 22 31 21 22 19 25 29 21 21 25 24 33 25 28 23 26 23 23 28 26 22 26 26 28 23 29 31 28 23 23 21 27 20 24 27 20
 [77] 30 27 21 29 21 24 27 23 30 24 26 29 24 30 24 23 28 25 33 26 24 25 26 32

c(genders)
  [1] "male"   "female" "female" "female" "male"   "male"   "male"   "male"   "female" "female" "female" "male"  
 [13] "male"   "male"   "male"   "male"   "female" "female" "male"   "male"   "female" "female" "female" "female"
 [25] "female" "female" "female" "female" "female" "female" "female" "female" "female" "female" "male"   "male"  
 [37] "female" "female" "male"   "male"   "female" "male"   "female" "male"   "male"   "male"   "male"   "male"  
 [49] "female" "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"   "male"  
 [61] "female" "male"   "male"   "female" "male"   "female" "male"   "male"   "female" "female" "male"   "male"  
 [73] "female" "male"   "female" "male"   "male"   "female" "male"   "female" "female" "female" "female" "male"  
 [85] "male"   "male"   "female" "female" "male"   "male"   "female" "male"   "female" "male"   "female" "female"
 [97] "male"   "female" "male"   "male" 

我以为如果使用mean(ages[genders=male]),我可以获得男性的平均年龄,但是却收到以下错误消息

  

均值错误(年龄[gender = male]):找不到对象“ male”

我还应该执行其他步骤吗?我在做什么错??

供参考:我要解决的问题的完整列表如下:

  1. 男人平均比女人年轻吗?
  2. 样本中有多少女性?
  3. 样本中有多少人?
  4. 样本中最小的男人几岁?
  5. 样本中最年轻的女人多大?

1 个答案:

答案 0 :(得分:0)

欢迎来到。

首先,您的数据似乎是按行排列的。我对这种结构不太熟悉。 因此,在将数据读入如下行之后:

df = read.csv("give the complete path of your file", header = F)

dput(df)是:

structure(list(V1 = structure(1:2, .Label = c("ages", "genders"), class = "factor"), V2 = structure(1:2, .Label = c("29", "male"), class = "factor"), V3 = structure(1:2, .Label = c("29", "female"), class = "factor"), V4 = structure(1:2, .Label = c("19", "female"), class = "factor"), V5 = structure(1:2, .Label = c("25", "female"), class = "factor"), V6 = structure(1:2, .Label = c("22", "male"), class = "factor"), V7 = structure(1:2, .Label = c("29", "male"), class = "factor"), V8 = structure(1:2, .Label = c("24", "female"), class = "factor"), V9 = structure(1:2, .Label = c("23", "female"), class = "factor"), V10 = structure(1:2, .Label = c("28", "female"), class = "factor"), V11 = structure(1:2, .Label = c("33", "male"), class = "factor"), V12 = structure(1:2, .Label = c("30", "male"), class = "factor"), V13 = structure(1:2, .Label = c("21", "male"), class = "factor"), V14 = structure(1:2, .Label = c("22", "male"), class = "factor"), V15 = structure(1:2, .Label = c("27", "female"), class = "factor"), V16 = structure(1:2, .Label = c("32", "female"), class = "factor"), V17 = structure(1:2, .Label = c("25", "female"), class = "factor"), V18 = structure(1:2, .Label = c("25", "male"), class = "factor"), V19 = structure(1:2, .Label = c("23", "male"), class = "factor"), V20 = structure(1:2, .Label = c("33", "male"), class = "factor"), V21 = structure(1:2, .Label = c("22", "male"), class = "factor"), V22 = structure(1:2, .Label = c("31", "male"), class = "factor"), V23 = structure(1:2, .Label = c("32", "female"), class = "factor"), V24 = structure(1:2, .Label = c("26", "female"), class = "factor"), V25 = structure(1:2, .Label = c("27", "male"), class = "factor"), V26 = structure(1:2, .Label = c("23", "male"), class = "factor"), V27 = structure(1:2, .Label = c("27", "female"), class = "factor"), V28 = structure(1:2, .Label = c("16", "female"), class = "factor"), V29 = structure(1:2, .Label = c("21", "female"), class = "factor"), V30 = structure(1:2, .Label = c("18", "female"), class = "factor"), V31 = structure(1:2, .Label = c("17", "female"), class = "factor"), V32 = structure(1:2, .Label = c("27", "female"), class = "factor"), V33 = structure(1:2, .Label = c("23", "female"), class = "factor"), V34 = structure(1:2, .Label = c("29", "female"), class = "factor"), V35 = structure(1:2, .Label = c("26", "female"), class = "factor"), V36 = structure(1:2, .Label = c("25", "female"), class = "factor"), V37 = structure(1:2, .Label = c("27", "female"), class = "factor"), V38 = structure(1:2, .Label = c("26", "female"), class = "factor"), V39 = structure(1:2, .Label = c("29", "female"), class = "factor"), V40 = structure(1:2, .Label = c("25", "female"), class = "factor"), V41 = structure(1:2, .Label = c("26", "male"), class = "factor"), V42 = structure(1:2, .Label = c("22", "male"), class = "factor"), V43 = structure(1:2, .Label = c("31", "female"), class = "factor"), V44 = structure(1:2, .Label = c("21", "female"), class = "factor"), V45 = structure(1:2, .Label = c("22", "male"), class = "factor"), V46 = structure(1:2, .Label = c("19", "male"), class = "factor"), V47 = structure(1:2, .Label = c("25", "female"), class = "factor"), V48 = structure(1:2, .Label = c("29", "male"), class = "factor"), V49 = structure(1:2, .Label = c("21", "female"), class = "factor"), V50 = structure(1:2, .Label = c("21", "male"), class = "factor"), V51 = structure(1:2, .Label = c("25", "male"), class = "factor"), V52 = structure(1:2, .Label = c("24", "male"), class = "factor"), V53 = structure(1:2, .Label = c("33", "male"), class = "factor"), V54 = structure(1:2, .Label = c("25", "male"), class = "factor"), V55 = structure(1:2, .Label = c("28", "female"), class = "factor"), V56 = structure(1:2, .Label = c("23", "male"), class = "factor"), V57 = structure(1:2, .Label = c("26", "male"), class = "factor"), V58 = structure(1:2, .Label = c("23", "male"), class = "factor"), V59 = structure(1:2, .Label = c("23", "male"), class = "factor"), V60 = structure(1:2, .Label = c("28", "female"), class = "factor"), V61 = structure(1:2, .Label = c("26", "male"), class = "factor"), V62 = structure(1:2, .Label = c("22", "male"), class = "factor"), V63 = structure(1:2, .Label = c("26", "male"), class = "factor"), V64 = structure(1:2, .Label = c("26", "male"), class = "factor"), V65 = structure(1:2, .Label = c("28", "male"), class = "factor"), V66 = structure(1:2, .Label = c("23", "male"), class = "factor"), V67 = structure(1:2, .Label = c("29", "female"), class = "factor"), V68 = structure(1:2, .Label = c("31", "male"), class = "factor"), V69 = structure(1:2, .Label = c("28", "male"), class = "factor"), V70 = structure(1:2, .Label = c("23", "female"), class = "factor"), V71 = structure(1:2, .Label = c("23", "male"), class = "factor"), V72 = structure(1:2, .Label = c("21", "female"), class = "factor"), V73 = structure(1:2, .Label = c("27", "male"), class = "factor"), V74 = structure(1:2, .Label = c("20", "male"), class = "factor"), V75 = structure(1:2, .Label = c("24", "female"), class = "factor"), V76 = structure(1:2, .Label = c("27", "female"), class = "factor"), V77 = structure(1:2, .Label = c("20", "male"), class = "factor"), V78 = structure(1:2, .Label = c("30", "male"), class = "factor"), V79 = structure(1:2, .Label = c("27", "female"), class = "factor"), V80 = structure(1:2, .Label = c("21", "male"), class = "factor"), V81 = structure(1:2, .Label = c("29", "female"), class = "factor"), V82 = structure(1:2, .Label = c("21", "male"), class = "factor"), V83 = structure(1:2, .Label = c("24", "male"), class = "factor"), V84 = structure(1:2, .Label = c("27", "female"), class = "factor"), V85 = structure(1:2, .Label = c("23", "male"), class = "factor"), V86 = structure(1:2, .Label = c("30", "female"), class = "factor"), V87 = structure(1:2, .Label = c("24", "female"), class = "factor"), V88 = structure(1:2, .Label = c("26", "female"), class = "factor"), V89 = structure(1:2, .Label = c("29", "female"), class = "factor"), V90 = structure(1:2, .Label = c("24", "male"), class = "factor"), V91 = structure(1:2, .Label = c("30", "male"), class = "factor"), V92 = structure(1:2, .Label = c("24", "male"), class = "factor"), V93 = structure(1:2, .Label = c("23", "female"), class = "factor"), V94 = structure(1:2, .Label = c("28", "female"), class = "factor"), V95 = structure(1:2, .Label = c("25", "male"), class = "factor"), V96 = structure(1:2, .Label = c("33", "male"), class = "factor"), V97 = structure(1:2, .Label = c("26", "female"), class = "factor"), V98 = structure(1:2, .Label = c("24", "male"), class = "factor"), V99 = structure(1:2, .Label = c("25", "female"), class = "factor"), V100 = structure(1:2, .Label = c("26", "male"), class = "factor"), V101 = structure(1:2, .Label = c("32", "female"), class = "factor")), class = "data.frame", row.names = c(NA, -2L))

我试图通过将行数据转换为列数据帧来使事情变得更容易。此转换基于@Ricardo Oliveros-Ramos在此处共享的代码:Reading a CSV file organized horizontally

为方便起见,我将其复制到此处:

read.tcsv = function(file, header=TRUE, sep=",", ...) 
{

   n = max(count.fields(file, sep=sep), na.rm=TRUE)
   x = readLines(file)

   .splitvar = function(x, sep, n) {
   var = unlist(strsplit(x, split=sep))
   length(var) = n
   return(var)
 }

x = do.call(cbind, lapply(x, .splitvar, sep=sep, n=n))
x = apply(x, 1, paste, collapse=sep) 
out = read.csv(text=x, sep=sep, header=header, ...)
return(out)

}

然后,您只需将以上功能运行到csv行明智文件中即可:

df2 = read.tcsv("give the complete path of your file")

作为参考,其结论如下:

dput(df2)

structure(list(ages = c(29L, 29L, 19L, 25L, 22L, 29L, 24L, 23L, 28L, 33L, 30L, 21L, 22L, 27L, 32L, 25L, 25L, 23L, 33L, 22L, 31L, 32L, 26L, 27L, 23L, 27L, 16L, 21L, 18L, 17L, 27L, 23L, 29L, 26L, 25L, 27L, 26L, 29L, 25L, 26L, 22L, 31L, 21L, 22L, 19L, 25L, 29L, 21L, 21L, 25L, 24L, 33L, 25L, 28L, 23L, 26L, 23L, 23L, 28L, 26L, 22L, 26L, 26L, 28L, 23L, 29L, 31L, 28L, 23L, 23L, 21L, 27L, 20L, 24L, 27L, 20L, 30L, 27L, 21L, 29L, 21L, 24L, 27L, 23L, 30L, 24L, 26L, 29L, 24L, 30L, 24L, 23L, 28L, 25L, 33L, 26L, 24L, 25L, 26L, 32L), genders = structure(c(2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("female", "male"), class = "factor")), class = "data.frame", row.names = c(NA, -100L))

下一步,也是更重要的一步,是要实现的功能:

# for mean by gender using the base R functionality
mean(df2$ages[which(df2$genders == 'male')])
mean(df2$ages[which(df2$genders == 'female')])

请注意此处的 == 符号,表示比较 而不是在男性周围的''来指定此数据是字符类型。

# for finding the youngest by gender
min(df2$ages[which(df2$genders == 'male')])
min(df2$ages[which(df2$genders == 'female')])

最后,要计算小组中有多少男人或多少女人:

sum(df2$genders == 'male')
sum(df2$genders == 'female') 

这将检查是否相等,并对总和为TRUE的情况进行计数。