我正在尝试计算5种变量(WING,WINGPRI,WEIGHT,BEAK和TARSUS)中df中每个物种的性别差异。 我已经尝试了几十个代码(我是一个初学者),但我不断收到错误消息,或者代码没有给我想要的输出结果,等等。
这与我所获得的差不多:
library(tidyverse)
library(broom)
df %>%
select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>%
gather(key = variable, value=value, -SEX, -SPECIES) %>%
group_by(SEX, SPECIES, variable) %>%
summarise(value = list(value)) %>%
na.rm=TRUE %>%
tibble::rowid_to_column() %>%
spread(SPECIES, value) %>%
group_by(variable) %>%
mutate(p_value = t.test(unlist("F"), unlist("M"))$p.value,
t_value = t.test(unlist("F"), unlist("M"))$statistic)
但是在解决了许多错误之后,我无法超越了这个Error: is.data.frame(df) is not TRUE
任何有关如何解决此代码或完全不同的代码的想法都可以得到我的帮助
> dput(sample)
structure(list(RING = c("A264874", "A432586", "O92477", "B9124",
"C95571", "A395011", "C88213", "C58443", "A95422", "C58409"),
SPECIES = c("CARDUELIS CARDUELIS", "SYLVIA ATRICAPILLA",
"ESTRILDA ASTRILD", "ALCEDO ATTHIS", "CHLORIS CHLORIS", "FRINGILLA COELEBS",
"SYLVIA ATRICAPILLA", "CHLORIS CHLORIS", "SYLVIA ATRICAPILLA",
"PARUS MAJOR"), SEX = c("U", "M", "F", "F", "F", "F", "F",
"M", "F", "M"), AGE = c(2L, 3L, 3L, 3L, 4L, 2L, 4L, 4L, 6L,
3L), FAT = c(0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 1L, 0L), WEIGHT = c(148,
185, 85, 32, 225, 20, 245, 22, 197, 19), WING = c(775, 69,
45, 76, 82, 84, 77, 83, 69, 72), WINGPRI = c(58L, NA, 32L,
NA, NA, NA, 57L, 64L, 52L, 54L), BEAK = c(156, 132, 86, NA,
NA, 138, 125, 13, 145, 125), TARSUS = c(148, 199, 146, NA,
NA, 178, 18, 177, 207, 205), BROODPATCH = c(0L, NA, 0L, 0L,
0L, NA, 0L, 0L, 0L, 0L), MUSCLE = c(2L, 3L, 2L, 2L, 2L, 2L,
2L, 1L, 2L, 2L), PROGRAM = c("MAI", "MAI", "MAI", "MIGRA<c7><c3>O",
"PEEC", "MAI", "MAI", "PEEC", "MAI", "MIGRA<c7><c3>O")), .Names = c("RING",
"SPECIES", "SEX", "AGE", "FAT", "WEIGHT", "WING", "WINGPRI",
"BEAK", "TARSUS", "BROODPATCH", "MUSCLE", "PROGRAM"), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
答案 0 :(得分:1)
请考虑随R的每次安装一起提供的base
软件包,并在R的每个会话中加载它,而不需要base::library()
。具体来说,使用其by
(tapply
的面向对象包装器)按 SPECIES 子集数据帧,并使用{{1 }}。
由于OP的数据对sapply
的观察不足,因此下面将生成数据集
t.test
输出
set.seed(10102018)
species_df <- data.frame(
SPECIES = sample(unique(df$SPECIES), 500, replace=TRUE),
SEX = sample(c("F", "M"), 500, replace=TRUE),
WING = runif(500) * 100,
WINGPRI = runif(500) * 100,
WEIGHT = runif(500) * 100,
BEAK = runif(500) * 100,
TARSUS = runif(500) * 100,
stringsAsFactors = FALSE
)
# NAMED LIST OF MATRICES
mat_list <- by(species_df, species_df$SPECIES, function(sub){
sapply(c("WING", "WINGPRI", "WEIGHT", "BEAK", "TARSUS"), function(col) {
tryCatch({
output <- t.test(sub[sub$SEX=="F", col], sub[sub$SEX=="M", col])
c(p_value = output$p.value, t_value = output$statistic)
}, warning = function(w) c(p_value=NA, t_value=NA)
, error = function(e) c(p_value=NA, t_value=NA))
})
})
答案 1 :(得分:0)
以下是基于您发布的数据的示例
library(tidyverse)
library(broom)
df %>%
select(SPECIES, SEX, WING, WINGPRI, WEIGHT, BEAK, TARSUS) %>%
gather(key = variable, value=value, -SEX, -SPECIES) %>%
filter(SEX != "U") %>%
group_by(variable) %>%
summarise(tt = list(tidy(t.test(value ~ SEX)))) %>%
unnest()
# # A tibble: 5 x 11
# variable estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative
# <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
# 1 BEAK 33.5 124. 90 0.822 0.483 2.47 -113. 180. Welch Two Sample t-test two.sided
# 2 TARSUS -56.4 137. 194. -1.33 0.270 3.25 -186. 73.2 Welch Two Sample t-test two.sided
# 3 WEIGHT 58.7 134 75.3 0.857 0.436 4.32 -126. 243. Welch Two Sample t-test two.sided
# 4 WING -2.5 72.2 74.7 -0.346 0.740 6.87 -19.6 14.6 Welch Two Sample t-test two.sided
# 5 WINGPRI -12 47 59 -1.31 0.281 2.99 -41.1 17.1 Welch Two Sample t-test two.sided
我也无法按SPECIES
分组,因为我没有足够的观察力来t.test
起作用。
请注意,t.test
会自动忽略NA
的值,但是您可以在执行测试之前在代码中添加... %>% na.omit() %>% ...
。
注意,tt = list(tidy(t.test(value ~ SEX)))
将根据您的分组自动使用适当的子数据集。但是,如果使用tt = list(tidy(t.test(value ~ SEX, data= .)))
,则每次都会使用整个数据集。
答案 2 :(得分:0)
你好同乡,
您正在尝试在列表上运行代码,并假设它是数据帧。我不尝试将df
首先转换为数据框,然后它们运行代码。我建议您尝试一下此示例,看看它如何进行:
# Creates single vectors for test
RING = c("A264874", "A432586", "O92477", "B9124",
"C95571", "A395011", "C88213", "C58443", "A95422", "C58409")
SPECIES = c("CARDUELIS CARDUELIS", "SYLVIA ATRICAPILLA",
"ESTRILDA ASTRILD", "ALCEDO ATTHIS", "CHLORIS CHLORIS", "FRINGILLA COELEBS",
"SYLVIA ATRICAPILLA", "CHLORIS CHLORIS", "SYLVIA ATRICAPILLA",
"PARUS MAJOR")
SEX = c("U", "M", "F", "F", "F", "F", "F",
"M", "F", "M")
AGE = c(2L, 3L, 3L, 3L, 4L, 2L, 4L, 4L, 6L,3L)
FAT = c(0L, 0L, 0L, NA, 0L, 0L, 0L, 0L, 1L, 0L)
WEIGHT = c(148,185, 85, 32, 225, 20, 245, 22, 197, 19)
WING = c(775, 69, 45, 76, 82, 84, 77, 83, 69, 72)
#List them all
df <- list(WEIGHT,FAT,AGE,SEX,SPECIES,RING,WING)
# Create dataframe
data.frame <- do.call(cbind.data.frame, df)
colnames(data.frame) <- c("WEIGHT","FAT","AGE","SEX","SPECIES","RING","WING")
看看您的代码现在是否可以使用
答案 3 :(得分:0)
使用另一个库:
library(matrixTests)
vars <- c("WING", "WINGPRI", "WEIGHT", "BEAK", "TARSUS")
col_t_welch(sample[sample$SEX=="F", vars], sample[sample$SEX=="M", vars])
obs.x obs.y obs.tot mean.x mean.y mean.diff var.x var.y stderr df statistic pvalue conf.low conf.high alternative mean.null conf.level
WING 6 3 9 72.16667 74.66667 -2.50000 204.5667 54.33333 7.225341 6.873827 -0.3460044 0.7396874 -19.64900 14.64900 two.sided 0 0.95
WINGPRI 3 2 5 47.00000 59.00000 -12.00000 175.0000 50.00000 9.128709 2.985075 -1.3145341 0.2805428 -41.13398 17.13398 two.sided 0 0.95
WEIGHT 6 3 9 134.00000 75.33333 58.66667 10074.4000 9022.33333 68.458097 4.318219 0.8569719 0.4364231 -126.00493 243.33827 two.sided 0 0.95
BEAK 4 3 7 123.50000 90.00000 33.50000 693.6667 4459.00000 40.740029 2.471493 0.8222871 0.4826893 -113.34042 180.34042 two.sided 0 0.95
TARSUS 4 3 7 137.25000 193.66667 -56.41667 6940.9167 217.33333 42.516745 3.247234 -1.3269282 0.2701275 -186.07859 73.24525 two.sided 0 0.95
就像提到的@AntoniosK一样-也无法使用您提供的数据集按物种划分。