我试图将+解释为All Pro,*作为Pro Bowl,然后缺少这两个中的任何一个默认为名称。
我的问题似乎与我的ifelse的“其他”部分有关。它不是插入字符串而是插入数字。
# libraries
library(plyr)
library(dplyr)
library(XML)
library(stringr)
# file names
model_no <- "pfr_003"
model_name <- "fantasy_football_"
image_name <- paste(model_name, model_no, ".RData", sep="")
# parameters
first_season <- 2011
last_season <- 2013
# seasons
num_seasons <- as.numeric(last_season - first_season + 1)
seasons <- seq(first_season, last_season, by=1)
pfr <- data.frame()
for (i in 1:num_seasons) {
url <- paste("http://www.pro-football-reference.com/years/", seasons[i],"/fantasy.htm", sep = "")
df <- readHTMLTable(url,which=1, header=FALSE)
df$season = seasons[i]
df <- df[c(2, 3, 4, 5, 6, 20, 25)]
pfr <- rbind(pfr, df)
rm(df)
print(seasons[i])
}
names(pfr) <- c("NameInfo", "Team", "Age", "G", "GS", "Pos", "Year")
pfr <- pfr[pfr$Team != "Tm", ]
pfr <- pfr[pfr$Name != "Passing", ]
pfr$AllPro <- ifelse(is.na(str_locate(string=pfr$NameInfo, '[+]')[,1]) == TRUE, 0, 1)
pfr$ProBowl <- ifelse(is.na(str_locate(string=pfr$NameInfo, '[*]')[,1]) == TRUE, 0, 1)
# Everything above is cool
# This ifelse works just fine
pfr$test1 <- ifelse(pfr$AllPro == 1, "AP", ifelse(pfr$ProBowl == 1, "PB", "None"))
# but when I want to strip the + an * from the NameInfo field I come across an issue
# it works fine for anyone that is AP or PB, but instead of the "else" portion
# defaulting to the NameInfo field it inserts a (seemingly random) number
pfr$test2 <- ifelse(pfr$AllPro == 1, str_sub(pfr$Name, end=str_locate(string=pfr$Name, '[+]')[,1]-2),
ifelse(pfr$ProBowl == 1, str_sub(pfr$Name, end=str_locate(string=pfr$Name, '[*]')[,1]-1),
pfr$NameInfo))
非常感谢任何帮助。
感谢。
答案 0 :(得分:1)
不是随机数,而是因子级别。您的pfr $ NameInfo是一个因素。如果您想要一个字符串,请将最后一个更改为as.character(pfr$NameInfo)
。