我正在尝试找到一种有效的方法来替换代码中的 for 循环。我找到了一种解决方法,但我确信有一种更“R 友好”的方式来做到这一点。
df<-structure(list(YY = c(2020, 2020, 2021, 2021, 2021), DD = c(6,
13, 19, 28, 3), MM = c("Fev", "Fev", "Fev", "Fev", "Mar"), Date = structure(c(18664,
18671, 18677, 18686, 18689), class = "Date"), `ID (FIFA)` = c("FRA D1",
"FRA D1", "FRA D1", "FRA D1", "FRA D1"), Country = c("France",
"France", "France", "France", "France"), League = c("Ligue 1",
"Ligue 1", "Ligue 1", "Ligue 1", "Ligue 1"), Season = c("2020/2021",
"2020/2021", "2020/2021", "2020/2021", "2020/2021"), HOME = c("Lyon",
"Lyon", "Brest", "Marseille", "Lyon"), AWAY = c("Strasbourg",
"Montpellier", "Lyon", "Lyon", "Rennes"), `Final Scores` = c(3,
1, 2, 1, 1), ...12 = c(0, 2, 3, 1, 0), ...13 = c("H", "A", "A",
"D", "H"), ...14 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), `ET/Pen/Awd` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
`1st Half Scores` = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), ...17 = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), ...18 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), ...19 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `2nd Half Scores` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...21 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...22 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), ...23 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), FTMoneyline...24 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...25 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...26 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), `Payout, %...27` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), FTMoneyline...28 = c(1.45,
1.38, 6.5, 5, 1.61), ...29 = c(5.25, 5.5, 4.85, 4.2, 4.4),
...30 = c(8, 8, 1.5, 1.7, 5.9), `Payout, %...31` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), `FT TG 2.5...32` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...33 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), `FT TG 2.5...34` = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), ...35 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_), OUTCOME = c(1, 3,
3, 2, 1), REFGAME = c("G236038", "G236098", "G236155", "G236247",
"G236276"), m1 = c(4.9, 4.96, 4.28333333333333, 3.63333333333333,
3.97), sd = c(3.28899680753874, 3.34287301583533, 2.54771139129481,
1.72143351115671, 2.17708520733572), indxcol = c(1L, 1L,
3L, 3L, 1L), sp1 = c(-3.8, -4.12, 1.65, 0.8, -2.79), sp2 = c(2.75,
2.5, -3.35, -2.5, 1.5), ovr = c(0.513136288998362, 3.14558629776023,
2.66983875231297, 2.63305322128851, 1.78822651188164), wavg = c(0.357142857142857,
0.369623655913978, 0.377431906614786, 0.385321100917431,
0.36943744752309), ospr = c(5.51724137931035, 5.79710144927536,
4.33333333333333, 2.94117647058824, 3.66459627329193)), row.names = c(NA,
-5L), class = c("tbl_df", "tbl", "data.frame"))
我的脚本的目标是计算每个主队在之前的比赛中有多少次是博彩公司的最低小数赔率(赔率在 col df[28:30]),以及它实际对应的次数游戏的结果。 这样我就可以得到一个准确率(r/t*100)。
这是我要替换的 for 循环部分的代码:
if(is.null(nrow(df))|nrow(df)==0){
r<-0
return(r)
}
r<- 0
t<- 0
for(i in nrow(df):1){
if(grep(team,df[i,])==9 & which.min(df[i,28:30])[[1]]==1){
t<-t+1
if(df[i,"OUTCOME"]==1){
r<-r+1
}
} else if(grep(team,df[i,])==10 & which.min(df[i,28:30])[[1]]==3){
t<-t+1
if(df[i,"OUTCOME"]==3){
r<-r+1
}
}
}
if(r==0 | t==0){
return(0)
}
return(r/t*100)
这是我找到的解决方法,但它看起来并不理想:
r<- 0
t<- 0
df1<- df[grepl(team,df$HOME) & apply(df[28:30],1,which.min)==1,]
if(is.null(nrow(df1))|nrow(df1)==0){
r<-0
return(r)
}
r1 <-df1[df1["OUTCOME"]==1,]
df2<- df[grepl(team,df$AWAY) & apply(df[28:30],1,which.min)==3,]
if(is.null(nrow(df2))|nrow(df2)==0){
r<-0
return(r)
}
r2<- df2[df2["OUTCOME"]==3,]
t<- sum(nrow(df1),nrow(df2))
r<-sum(nrow(r1),nrow(r2))
if(r==0 | t==0){
return(0)
}
return(r/t*100)
我怎样才能以“R 友好”的方式更新它?
根据@DaveArmstrong 的评论,下面是最后一个 group_by
之后的输出:
structure(list(obs = c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L
), place = c("home", "away", "home", "away", "home", "away",
"home", "away", "home", "away"), team = c("Lyon", "Strasbourg",
"Lyon", "Montpellier", "Brest", "Lyon", "Marseille", "Lyon",
"Lyon", "Rennes"), r = c(1, 0, 0, 0, 0, 1, 0, 0, 1, 0)), row.names = c(NA,
-10L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
team = c("Brest", "Lyon", "Marseille", "Montpellier", "Rennes",
"Strasbourg"), .rows = list(5L, c(1L, 3L, 6L, 8L, 9L), 7L,
4L, 10L, 2L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS 10.16
Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] neuralnet_1.44.2 class_7.3-15 gmodels_2.18.1 viridis_0.5.1 viridisLite_0.3.0
[6] plotly_4.9.1 gridExtra_2.3 slackr_1.4.2 qqplotr_0.0.4 ggplot2_3.3.0
[11] odds.converter_1.4.8 e1071_1.7-2 tibble_2.1.3 readxl_1.3.1 dplyr_0.8.5
[16] plyr_1.8.4 tidyr_1.1.3
loaded via a namespace (and not attached):
[1] Rcpp_1.0.3 cellranger_1.1.0 pillar_1.4.3 compiler_3.6.1 DEoptimR_1.0-8 tools_3.6.1
[7] digest_0.6.25 jsonlite_1.6.1 lifecycle_0.2.0 gtable_0.3.0 pkgconfig_2.0.3 rlang_0.4.10
[13] cli_2.0.2 rstudioapi_0.11 yaml_2.2.0 withr_2.1.2 httr_1.4.0 gtools_3.8.2
[19] htmlwidgets_1.5.1 vctrs_0.3.6 grid_3.6.1 tidyselect_1.1.0 data.table_1.12.6 glue_1.3.1
[25] robustbase_0.93-7 R6_2.4.1 fansi_0.4.1 gdata_2.18.0 purrr_0.3.3 magrittr_1.5
[31] ellipsis_0.3.0 htmltools_0.4.0 scales_1.1.0 MASS_7.3-51.4 assertthat_0.2.1 colorspace_1.4-1
[37] utf8_1.1.4 lazyeval_0.2.2 munsell_0.5.0 crayon_1.3.4
答案 0 :(得分:2)
这是一个选项:
names(df)[c(28:30)] <- c("odds1", "odds2", "odds3")
df2 <- df %>%
rowwise %>%
mutate(min_2830 = which.min(c(odds1, odds2, odds3))) %>%
ungroup %>%
group_by(HOME) %>%
mutate(r_home = as.numeric(min_2830 == 1 & OUTCOME == 1)) %>%
ungroup %>%
group_by(AWAY) %>%
mutate(r_away = as.numeric(min_2830 == 3 & OUTCOME == 3)) %>%
select(HOME, AWAY, r_home, r_away) %>%
set_names(c("team_home", "team_away", "r_home", "r_away")) %>%
ungroup %>%
mutate(obs=1:n()) %>%
pivot_longer(-obs, names_pattern="(.*)_(.*)",
names_to = c(".value", "place")) %>%
group_by(team) %>%
summarise(r = cumsum(r)) %>%
mutate(game = seq_along(r),
pct = (r/game)*100)
df2
# # A tibble: 10 x 4
# # Groups: team [6]
# team r game pct
# <chr> <dbl> <int> <dbl>
# 1 Brest 0 1 0
# 2 Lyon 1 1 100
# 3 Lyon 1 2 50
# 4 Lyon 2 3 66.7
# 5 Lyon 2 4 50
# 6 Lyon 3 5 60
# 7 Marseille 0 1 0
# 8 Montpellier 0 1 0
# 9 Rennes 0 1 0
# 10 Strasbourg 0 1 0
sessionInfo()
R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] modelr_0.1.8 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.3 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2
[8] tibble_3.0.5 ggplot2_3.3.2 tidyverse_1.3.0
loaded via a namespace (and not attached):
[1] Rcpp_1.0.6 cellranger_1.1.0 pillar_1.4.7 compiler_4.0.3 dbplyr_1.4.3 tools_4.0.3
[7] jsonlite_1.7.1 lubridate_1.7.9 lifecycle_0.2.0 nlme_3.1-149 gtable_0.3.0 lattice_0.20-41
[13] pkgconfig_2.0.3 rlang_0.4.10 reprex_0.3.0 cli_2.2.0 DBI_1.1.0 rstudioapi_0.11
[19] haven_2.3.1 withr_2.3.0 xml2_1.3.2 httr_1.4.2 fs_1.4.1 generics_0.1.0
[25] vctrs_0.3.6 hms_0.5.3 grid_4.0.3 tidyselect_1.1.0 glue_1.4.2 R6_2.5.0
[31] fansi_0.4.2 readxl_1.3.1 magrittr_2.0.1 backports_1.1.10 scales_1.1.1 ellipsis_0.3.1
[37] rvest_0.3.6 assertthat_0.2.1 colorspace_1.4-1 utf8_1.1.4 stringi_1.5.3 munsell_0.5.0
[43] broom_0.5.6 crayon_1.3.4