[我必须使用以下数据解决一些问题:
c(ages)
[1] 29 29 19 25 22 29 24 23 28 33 30 21 22 27 32 25 25 23 33 22 31 32 26 27 23 27 16 21 18 17 27 23 29 26 25 27 26 29
[39] 25 26 22 31 21 22 19 25 29 21 21 25 24 33 25 28 23 26 23 23 28 26 22 26 26 28 23 29 31 28 23 23 21 27 20 24 27 20
[77] 30 27 21 29 21 24 27 23 30 24 26 29 24 30 24 23 28 25 33 26 24 25 26 32
c(genders)
[1] "male" "female" "female" "female" "male" "male" "male" "male" "female" "female" "female" "male"
[13] "male" "male" "male" "male" "female" "female" "male" "male" "female" "female" "female" "female"
[25] "female" "female" "female" "female" "female" "female" "female" "female" "female" "female" "male" "male"
[37] "female" "female" "male" "male" "female" "male" "female" "male" "male" "male" "male" "male"
[49] "female" "male" "male" "male" "male" "female" "male" "male" "male" "male" "male" "male"
[61] "female" "male" "male" "female" "male" "female" "male" "male" "female" "female" "male" "male"
[73] "female" "male" "female" "male" "male" "female" "male" "female" "female" "female" "female" "male"
[85] "male" "male" "female" "female" "male" "male" "female" "male" "female" "male" "female" "female"
[97] "male" "female" "male" "male"
我以为如果使用mean(ages[genders=male])
,我可以获得男性的平均年龄,但是却收到以下错误消息
均值错误(年龄[gender = male]):找不到对象“ male”
我还应该执行其他步骤吗?我在做什么错??
供参考:我要解决的问题的完整列表如下:
答案 0 :(得分:0)
欢迎来到。
首先,您的数据似乎是按行排列的。我对这种结构不太熟悉。 因此,在将数据读入如下行之后:
df = read.csv("give the complete path of your file", header = F)
dput(df)是:
structure(list(V1 = structure(1:2, .Label = c("ages", "genders"), class = "factor"), V2 = structure(1:2, .Label = c("29", "male"), class = "factor"), V3 = structure(1:2, .Label = c("29", "female"), class = "factor"), V4 = structure(1:2, .Label = c("19", "female"), class = "factor"), V5 = structure(1:2, .Label = c("25", "female"), class = "factor"), V6 = structure(1:2, .Label = c("22", "male"), class = "factor"), V7 = structure(1:2, .Label = c("29", "male"), class = "factor"), V8 = structure(1:2, .Label = c("24", "female"), class = "factor"), V9 = structure(1:2, .Label = c("23", "female"), class = "factor"), V10 = structure(1:2, .Label = c("28", "female"), class = "factor"), V11 = structure(1:2, .Label = c("33", "male"), class = "factor"), V12 = structure(1:2, .Label = c("30", "male"), class = "factor"), V13 = structure(1:2, .Label = c("21", "male"), class = "factor"), V14 = structure(1:2, .Label = c("22", "male"), class = "factor"), V15 = structure(1:2, .Label = c("27", "female"), class = "factor"), V16 = structure(1:2, .Label = c("32", "female"), class = "factor"), V17 = structure(1:2, .Label = c("25", "female"), class = "factor"), V18 = structure(1:2, .Label = c("25", "male"), class = "factor"), V19 = structure(1:2, .Label = c("23", "male"), class = "factor"), V20 = structure(1:2, .Label = c("33", "male"), class = "factor"), V21 = structure(1:2, .Label = c("22", "male"), class = "factor"), V22 = structure(1:2, .Label = c("31", "male"), class = "factor"), V23 = structure(1:2, .Label = c("32", "female"), class = "factor"), V24 = structure(1:2, .Label = c("26", "female"), class = "factor"), V25 = structure(1:2, .Label = c("27", "male"), class = "factor"), V26 = structure(1:2, .Label = c("23", "male"), class = "factor"), V27 = structure(1:2, .Label = c("27", "female"), class = "factor"), V28 = structure(1:2, .Label = c("16", "female"), class = "factor"), V29 = structure(1:2, .Label = c("21", "female"), class = "factor"), V30 = structure(1:2, .Label = c("18", "female"), class = "factor"), V31 = structure(1:2, .Label = c("17", "female"), class = "factor"), V32 = structure(1:2, .Label = c("27", "female"), class = "factor"), V33 = structure(1:2, .Label = c("23", "female"), class = "factor"), V34 = structure(1:2, .Label = c("29", "female"), class = "factor"), V35 = structure(1:2, .Label = c("26", "female"), class = "factor"), V36 = structure(1:2, .Label = c("25", "female"), class = "factor"), V37 = structure(1:2, .Label = c("27", "female"), class = "factor"), V38 = structure(1:2, .Label = c("26", "female"), class = "factor"), V39 = structure(1:2, .Label = c("29", "female"), class = "factor"), V40 = structure(1:2, .Label = c("25", "female"), class = "factor"), V41 = structure(1:2, .Label = c("26", "male"), class = "factor"), V42 = structure(1:2, .Label = c("22", "male"), class = "factor"), V43 = structure(1:2, .Label = c("31", "female"), class = "factor"), V44 = structure(1:2, .Label = c("21", "female"), class = "factor"), V45 = structure(1:2, .Label = c("22", "male"), class = "factor"), V46 = structure(1:2, .Label = c("19", "male"), class = "factor"), V47 = structure(1:2, .Label = c("25", "female"), class = "factor"), V48 = structure(1:2, .Label = c("29", "male"), class = "factor"), V49 = structure(1:2, .Label = c("21", "female"), class = "factor"), V50 = structure(1:2, .Label = c("21", "male"), class = "factor"), V51 = structure(1:2, .Label = c("25", "male"), class = "factor"), V52 = structure(1:2, .Label = c("24", "male"), class = "factor"), V53 = structure(1:2, .Label = c("33", "male"), class = "factor"), V54 = structure(1:2, .Label = c("25", "male"), class = "factor"), V55 = structure(1:2, .Label = c("28", "female"), class = "factor"), V56 = structure(1:2, .Label = c("23", "male"), class = "factor"), V57 = structure(1:2, .Label = c("26", "male"), class = "factor"), V58 = structure(1:2, .Label = c("23", "male"), class = "factor"), V59 = structure(1:2, .Label = c("23", "male"), class = "factor"), V60 = structure(1:2, .Label = c("28", "female"), class = "factor"), V61 = structure(1:2, .Label = c("26", "male"), class = "factor"), V62 = structure(1:2, .Label = c("22", "male"), class = "factor"), V63 = structure(1:2, .Label = c("26", "male"), class = "factor"), V64 = structure(1:2, .Label = c("26", "male"), class = "factor"), V65 = structure(1:2, .Label = c("28", "male"), class = "factor"), V66 = structure(1:2, .Label = c("23", "male"), class = "factor"), V67 = structure(1:2, .Label = c("29", "female"), class = "factor"), V68 = structure(1:2, .Label = c("31", "male"), class = "factor"), V69 = structure(1:2, .Label = c("28", "male"), class = "factor"), V70 = structure(1:2, .Label = c("23", "female"), class = "factor"), V71 = structure(1:2, .Label = c("23", "male"), class = "factor"), V72 = structure(1:2, .Label = c("21", "female"), class = "factor"), V73 = structure(1:2, .Label = c("27", "male"), class = "factor"), V74 = structure(1:2, .Label = c("20", "male"), class = "factor"), V75 = structure(1:2, .Label = c("24", "female"), class = "factor"), V76 = structure(1:2, .Label = c("27", "female"), class = "factor"), V77 = structure(1:2, .Label = c("20", "male"), class = "factor"), V78 = structure(1:2, .Label = c("30", "male"), class = "factor"), V79 = structure(1:2, .Label = c("27", "female"), class = "factor"), V80 = structure(1:2, .Label = c("21", "male"), class = "factor"), V81 = structure(1:2, .Label = c("29", "female"), class = "factor"), V82 = structure(1:2, .Label = c("21", "male"), class = "factor"), V83 = structure(1:2, .Label = c("24", "male"), class = "factor"), V84 = structure(1:2, .Label = c("27", "female"), class = "factor"), V85 = structure(1:2, .Label = c("23", "male"), class = "factor"), V86 = structure(1:2, .Label = c("30", "female"), class = "factor"), V87 = structure(1:2, .Label = c("24", "female"), class = "factor"), V88 = structure(1:2, .Label = c("26", "female"), class = "factor"), V89 = structure(1:2, .Label = c("29", "female"), class = "factor"), V90 = structure(1:2, .Label = c("24", "male"), class = "factor"), V91 = structure(1:2, .Label = c("30", "male"), class = "factor"), V92 = structure(1:2, .Label = c("24", "male"), class = "factor"), V93 = structure(1:2, .Label = c("23", "female"), class = "factor"), V94 = structure(1:2, .Label = c("28", "female"), class = "factor"), V95 = structure(1:2, .Label = c("25", "male"), class = "factor"), V96 = structure(1:2, .Label = c("33", "male"), class = "factor"), V97 = structure(1:2, .Label = c("26", "female"), class = "factor"), V98 = structure(1:2, .Label = c("24", "male"), class = "factor"), V99 = structure(1:2, .Label = c("25", "female"), class = "factor"), V100 = structure(1:2, .Label = c("26", "male"), class = "factor"), V101 = structure(1:2, .Label = c("32", "female"), class = "factor")), class = "data.frame", row.names = c(NA, -2L))
我试图通过将行数据转换为列数据帧来使事情变得更容易。此转换基于@Ricardo Oliveros-Ramos在此处共享的代码:Reading a CSV file organized horizontally
为方便起见,我将其复制到此处:
read.tcsv = function(file, header=TRUE, sep=",", ...)
{
n = max(count.fields(file, sep=sep), na.rm=TRUE)
x = readLines(file)
.splitvar = function(x, sep, n) {
var = unlist(strsplit(x, split=sep))
length(var) = n
return(var)
}
x = do.call(cbind, lapply(x, .splitvar, sep=sep, n=n))
x = apply(x, 1, paste, collapse=sep)
out = read.csv(text=x, sep=sep, header=header, ...)
return(out)
}
然后,您只需将以上功能运行到csv行明智文件中即可:
df2 = read.tcsv("give the complete path of your file")
作为参考,其结论如下:
dput(df2)
structure(list(ages = c(29L, 29L, 19L, 25L, 22L, 29L, 24L, 23L, 28L, 33L, 30L, 21L, 22L, 27L, 32L, 25L, 25L, 23L, 33L, 22L, 31L, 32L, 26L, 27L, 23L, 27L, 16L, 21L, 18L, 17L, 27L, 23L, 29L, 26L, 25L, 27L, 26L, 29L, 25L, 26L, 22L, 31L, 21L, 22L, 19L, 25L, 29L, 21L, 21L, 25L, 24L, 33L, 25L, 28L, 23L, 26L, 23L, 23L, 28L, 26L, 22L, 26L, 26L, 28L, 23L, 29L, 31L, 28L, 23L, 23L, 21L, 27L, 20L, 24L, 27L, 20L, 30L, 27L, 21L, 29L, 21L, 24L, 27L, 23L, 30L, 24L, 26L, 29L, 24L, 30L, 24L, 23L, 28L, 25L, 33L, 26L, 24L, 25L, 26L, 32L), genders = structure(c(2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L), .Label = c("female", "male"), class = "factor")), class = "data.frame", row.names = c(NA, -100L))
下一步,也是更重要的一步,是要实现的功能:
# for mean by gender using the base R functionality
mean(df2$ages[which(df2$genders == 'male')])
mean(df2$ages[which(df2$genders == 'female')])
请注意此处的 == 符号,表示比较 而不是在男性周围的''来指定此数据是字符类型。
# for finding the youngest by gender
min(df2$ages[which(df2$genders == 'male')])
min(df2$ages[which(df2$genders == 'female')])
最后,要计算小组中有多少男人或多少女人:
sum(df2$genders == 'male')
sum(df2$genders == 'female')
这将检查是否相等,并对总和为TRUE的情况进行计数。