Question

我正在进行信用卡潜在客户识别案例研究。我必须用其相应的WOE值替换所有列的值。我可以用2-3步完成。但是，我想知道是否有办法在一次拍摄中做到这一点。

Answer 1

您可能需要查看woe package（如果WOE代表证据权重）。

以下是文档中的相关代码段：

library(woe)
res_woe <- woe(Data = mtcars, Independent = "cyl", Continuous = FALSE, Dependent = "am", C_Bin = 10, Bad = 0, Good = 1)

Answer 2

使用记分卡包，使用woebin(),woebin_plot(),woebin_ply(),iv()函数很简单。

temp <- credit_data

library(scorecard)

bins <- woebin(dt = temp,y = "targetvariable")

woebin_plot(bins$Income)

WOE_temp <- woebin_ply(temp,bins)

View(WOE_temp)

View(temp[is.na(temp$No.of.dependents),])

IV_values <- iv(dt = temp,y = "target variable")
(IV_values)

Answer 3

嗨，请按照以下步骤操作：-

第1步：使用信息包计算祸患和四：-

库（模糊连接）

图书馆（信息）

IV <-
  Information::create_infotables(data = test_df,
                                 y = "label_column",
                                 parallel =
                                   TRUE)

在“ y”中，我们需要分配标签，在“ data”中，我们需要分配数据框。

步骤2：使用以下功能：- 这是我自己的自定义编写函数，用使用信息包计算的祸患来替换数据框中的实际值：-

woe_replace <- function(df_orig, IV) {
  df <- cbind(df_orig)
  df_clmtyp <- data.frame(clmtyp = sapply(df, class))
  df_col_typ <-
    data.frame(clmnm = colnames(df), clmtyp = df_clmtyp$clmtyp)
  for (rownm in 1:nrow(df_col_typ)) {
    colmn_nm <- toString(df_col_typ[rownm, "clmnm"])    
    if(colmn_nm %in% names(IV$Tables)){
    column_woe_df <- cbind(data.frame(IV$Tables[[toString(df_col_typ[rownm, "clmnm"])]]))
    if (df_col_typ[rownm, "clmtyp"] == "factor" | df_col_typ[rownm, "clmtyp"] == "character") {
      df <-
        dplyr::inner_join(
          df,
          column_woe_df[,c(colmn_nm,"WOE")],
          by = colmn_nm,
          type = "inner",
          match = "all"
        )
      df[colmn_nm]<-NULL
      colnames(df)[colnames(df)=="WOE"]<-colmn_nm
    } else if (df_col_typ[rownm, "clmtyp"] == "numeric" | df_col_typ[rownm, "clmtyp"] == "integer") {
      column_woe_df$lv<-as.numeric(str_sub(
        column_woe_df[,colmn_nm],
        regexpr("\\[", column_woe_df[,colmn_nm]) + 1,
        regexpr(",", column_woe_df[,colmn_nm]) - 1
      ))
      column_woe_df$uv<-as.numeric(str_sub(
        column_woe_df[,colmn_nm],
        regexpr(",", column_woe_df[,colmn_nm]) + 1,
        regexpr("\\]", column_woe_df[,colmn_nm]) - 1
      ))
      column_woe_df[colmn_nm]<-NULL      
      column_woe_df<-column_woe_df[,c("lv","uv","WOE")]      
      colnames(df)[colnames(df)==colmn_nm]<-"WOE_temp2381111111111111697"      
      df <-
        fuzzy_inner_join(
          df,
          column_woe_df[,c("lv","uv","WOE")],
          by = c("WOE_temp2381111111111111697"="lv","WOE_temp2381111111111111697"="uv"),
          match_fun=list(`>=`,`<=`) 
        )      
      df["WOE_temp2381111111111111697"]<-NULL      
      df["lv"]<-NULL      
      df["uv"]<-NULL      
      colnames(df)[colnames(df)=="WOE"]<-colmn_nm      
    }}
  }
  return(df)
}

函数调用：-

test_df_woe <- woe_replace(test_df, IV)

或超级一杆：-

test_df_woe <- woe_replace(test_df,Information::create_infotables(data = test_df, y = "label_column",parallel =TRUE))

如何用R中的1个镜头中的WOE值替换列的值

3 个答案: