尝试添加新列时的SettingWithCopyWarning

时间:2020-10-16 19:26:42

标签: python pandas sklearn-pandas

我收到一个SettingWithCopyWarning @:

pred_data[pred_col_name] = model_rf.predict(pred_data[features])

每当我尝试在以下函数中添加列时。我尝试使用loc [:,“ TEXT]分配列,但是没有运气。我假设有某种方法可以自动创建和分配列名。我缺少什么?

def create_train_predict(train_data, pred_data, target, features, model_type):
    y = train_data[target]
    X = train_data[features]
    train_X, val_X, train_y, val_y = train_test_split(X, y)

    if model_type == "dt":
        model_dt = DecisionTreeRegressor(random_state=1)
        model_dt.fit(train_X, train_y)
        pred_col_name = "dt_" + str(pred_data) + "_preds"
        pred_data[pred_col_name] = model_dt.predict(pred_data[features])
        print(mean_absolute_error(val_y, model_dt.predict(val_X)))
        return pred_data

    elif model_type == "rf":
        model_rf = DecisionTreeRegressor(random_state=1)
        model_rf.fit(X, y)
        pred_col_name = "rf_" + str(pred_data) + "_preds"
        pred_data[pred_col_name] = model_rf.predict(pred_data[features])
        print(mean_absolute_error(val_y, model_rf.predict(val_X)))
        return pred_data

完整错误:

/Users/jackmaling/PycharmProjects/AlphaDNA/digital_financial_tree_model/dt_and_rf_mod.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_data[str(pred_col_name)] = model_rf.predict(pred_data[features])

完整代码:

# Data Import
Fin_Actual_URLs_to2018 = pd.read_csv("/Users/.../Fin.Actual.URLs.to2018.csv")
URL_2019_Test_Data = pd.read_csv("/Users/.../URL.2019.Test.Data.csv")


# Cleaning Data
def drop_clean(df):
    df.dropna(axis=0)
    df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
    return df

# Predicrtion-Generating Function
def create_train_predict(train_data, pred_data, target, features, model_type):
    y = train_data[target]
    X = train_data[features]
    train_X, val_X, train_y, val_y = train_test_split(X, y)

    if model_type == "dt":
        model_dt = DecisionTreeRegressor(random_state=1)
        model_dt.fit(train_X, train_y)
        pred_col_name = "dt_" + str(pred_data) + "_preds"
        pred_data[pred_col_name] = model_dt.predict(pred_data[features])
        print(mean_absolute_error(val_y, model_dt.predict(val_X)))
        return pred_data

    elif model_type == "rf":
        model_rf = DecisionTreeRegressor(random_state=1)
        model_rf.fit(X, y)
        pred_col_name = "rf_" + str(pred_data) + "_preds"
        pred_data[pred_col_name] = model_rf.predict(pred_data[features])
        print(mean_absolute_error(val_y, model_rf.predict(val_X)))
        return pred_data


Fin_Actual_URLs_to2018 = drop_clean(Fin_Actual_URLs_to2018)
URL_2019_Pred_Data = drop_clean(URL_2019_Test_Data)

features = ["QoQ.Sum.Total.Page.Visit.Delta.Prop", "QoQ.Mean.Weighted.Pageviews.Delta.Prop", "QoQ.Mean.Weighted.Time.on.Site.Delta.Prop", "QoQ.Weighted.Bounce.Rate.Delta.Prop", "QoQ.Sum.Unique.Visitors.Delta.Prop"]

Fin_Actual_URL_Preds = create_train_predict(Fin_Actual_URLs_to2018, URL_2019_Pred_Data, "rev_beat_miss", features, "rf")
pd.DataFrame.to_csv(Fin_Actual_URL_Preds, "/Users/.../Fin_Actual_URL_Preds.csv")

0 个答案:

没有答案