如何用R中的1个镜头中的WOE值替换列的值

时间:2018-02-19 06:31:15

标签: r

我正在进行信用卡潜在客户识别案例研究。我必须用其相应的WOE值替换所有列的值。我可以用2-3步完成。但是,我想知道是否有办法在一次拍摄中做到这一点。

3 个答案:

答案 0 :(得分:1)

您可能需要查看woe package(如果WOE代表证据权重)。

以下是文档中的相关代码段:

library(woe)
res_woe <- woe(Data = mtcars, Independent = "cyl", Continuous = FALSE, Dependent = "am", C_Bin = 10, Bad = 0, Good = 1)

答案 1 :(得分:1)

使用记分卡包,使用woebin(),woebin_plot(),woebin_ply(),iv()函数很简单。

temp <- credit_data

library(scorecard)

bins <- woebin(dt = temp,y = "targetvariable")

woebin_plot(bins$Income)

WOE_temp <- woebin_ply(temp,bins)

View(WOE_temp)

View(temp[is.na(temp$No.of.dependents),])

IV_values <- iv(dt = temp,y = "target variable")
(IV_values)

答案 2 :(得分:0)

嗨,请按照以下步骤操作:-

第1步:使用信息包计算祸患和四:-

库(模糊连接)

图书馆(信息)

IV <-
  Information::create_infotables(data = test_df,
                                 y = "label_column",
                                 parallel =
                                   TRUE)

在“ y”中,我们需要分配标签,在“ data”中,我们需要分配数据框。

步骤2:使用以下功能:- 这是我自己的自定义编写函数,用使用信息包计算的祸患来替换数据框中的实际值:-

woe_replace <- function(df_orig, IV) {
  df <- cbind(df_orig)
  df_clmtyp <- data.frame(clmtyp = sapply(df, class))
  df_col_typ <-
    data.frame(clmnm = colnames(df), clmtyp = df_clmtyp$clmtyp)
  for (rownm in 1:nrow(df_col_typ)) {
    colmn_nm <- toString(df_col_typ[rownm, "clmnm"])    
    if(colmn_nm %in% names(IV$Tables)){
    column_woe_df <- cbind(data.frame(IV$Tables[[toString(df_col_typ[rownm, "clmnm"])]]))
    if (df_col_typ[rownm, "clmtyp"] == "factor" | df_col_typ[rownm, "clmtyp"] == "character") {
      df <-
        dplyr::inner_join(
          df,
          column_woe_df[,c(colmn_nm,"WOE")],
          by = colmn_nm,
          type = "inner",
          match = "all"
        )
      df[colmn_nm]<-NULL
      colnames(df)[colnames(df)=="WOE"]<-colmn_nm
    } else if (df_col_typ[rownm, "clmtyp"] == "numeric" | df_col_typ[rownm, "clmtyp"] == "integer") {
      column_woe_df$lv<-as.numeric(str_sub(
        column_woe_df[,colmn_nm],
        regexpr("\\[", column_woe_df[,colmn_nm]) + 1,
        regexpr(",", column_woe_df[,colmn_nm]) - 1
      ))
      column_woe_df$uv<-as.numeric(str_sub(
        column_woe_df[,colmn_nm],
        regexpr(",", column_woe_df[,colmn_nm]) + 1,
        regexpr("\\]", column_woe_df[,colmn_nm]) - 1
      ))
      column_woe_df[colmn_nm]<-NULL      
      column_woe_df<-column_woe_df[,c("lv","uv","WOE")]      
      colnames(df)[colnames(df)==colmn_nm]<-"WOE_temp2381111111111111697"      
      df <-
        fuzzy_inner_join(
          df,
          column_woe_df[,c("lv","uv","WOE")],
          by = c("WOE_temp2381111111111111697"="lv","WOE_temp2381111111111111697"="uv"),
          match_fun=list(`>=`,`<=`) 
        )      
      df["WOE_temp2381111111111111697"]<-NULL      
      df["lv"]<-NULL      
      df["uv"]<-NULL      
      colnames(df)[colnames(df)=="WOE"]<-colmn_nm      
    }}
  }
  return(df)
}

函数调用:-

test_df_woe <- woe_replace(test_df, IV)

或超级一杆:-

test_df_woe <- woe_replace(test_df,Information::create_infotables(data = test_df, y = "label_column",parallel =TRUE))