library(Lahman)
library(tidyverse)
data("Fielding")
data(Salaries)
# First, I need to merge two datasets
merged.df <- merge(Fielding, Salaries, by = "playerID", na.rm = TRUE)
merged.df.2002 <- merged.df[merged.df$yearID.x == "2002",]
# Let's try tapply
mean.salary <- tapply(merged.df.2002$POS, merged.df.2002$salary, mean, na.rm = TRUE)
# So it gives me an error
# In mean.default(X[[i]], ...) :
# argument is not numeric or logical: returning NA
class(merged.df.2002$POS)
class(merged.df.2002$salary)
# Very likely POS column is factor for some reason.
# Coerce them through
merged.df.2002$POS <- as.numeric(as.character(merged.df.2002$POS))
# Warning message:
# NAs introduced by coercion
merged.df.2002$salary <- as.numeric(as.character(merged.df.2002$salary))
#as.numeric(merged.df.2002$salary)
class(merged.df.2002$salary)
# Let's try tapply again
mean.salary <- tapply(merged.df.2002$POS, merged.df.2002$salary, mean, na.rm
= TRUE)
mean.salary
60000 62500 63500 65000 67000 67500 68000 68750 70000
71000 72500 77500 78000 80000 82000 82500
NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
NaN NaN NaN NaN NaN NaN
有什么想法吗?非常感谢!
答案 0 :(得分:0)
嗯,这很简单,但我搞砸了
merged.df <- merge(Fielding, Salaries)
# So, my mistake was that I merged only by playerid.
merged.df.2002 <- merged.df[merged.df$yearID == 2002, ]
# we pick 2002 year from the merged dataset
# use tapply for mean
merged.df.mean <- tapply(merged.df.2002$salary, merged.df.2002$POS, mean,
na.rm = TRUE)
# 1B 2B 3B C OF P SS
# 2543845 1400543 1547836 1787933 2659230 2150887 1979732