我收到一个SettingWithCopyWarning @:
pred_data[pred_col_name] = model_rf.predict(pred_data[features])
每当我尝试在以下函数中添加列时。我尝试使用loc [:,“ TEXT]分配列,但是没有运气。我假设有某种方法可以自动创建和分配列名。我缺少什么?
def create_train_predict(train_data, pred_data, target, features, model_type):
y = train_data[target]
X = train_data[features]
train_X, val_X, train_y, val_y = train_test_split(X, y)
if model_type == "dt":
model_dt = DecisionTreeRegressor(random_state=1)
model_dt.fit(train_X, train_y)
pred_col_name = "dt_" + str(pred_data) + "_preds"
pred_data[pred_col_name] = model_dt.predict(pred_data[features])
print(mean_absolute_error(val_y, model_dt.predict(val_X)))
return pred_data
elif model_type == "rf":
model_rf = DecisionTreeRegressor(random_state=1)
model_rf.fit(X, y)
pred_col_name = "rf_" + str(pred_data) + "_preds"
pred_data[pred_col_name] = model_rf.predict(pred_data[features])
print(mean_absolute_error(val_y, model_rf.predict(val_X)))
return pred_data
完整错误:
/Users/jackmaling/PycharmProjects/AlphaDNA/digital_financial_tree_model/dt_and_rf_mod.py:47: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
pred_data[str(pred_col_name)] = model_rf.predict(pred_data[features])
完整代码:
# Data Import
Fin_Actual_URLs_to2018 = pd.read_csv("/Users/.../Fin.Actual.URLs.to2018.csv")
URL_2019_Test_Data = pd.read_csv("/Users/.../URL.2019.Test.Data.csv")
# Cleaning Data
def drop_clean(df):
df.dropna(axis=0)
df = df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]
return df
# Predicrtion-Generating Function
def create_train_predict(train_data, pred_data, target, features, model_type):
y = train_data[target]
X = train_data[features]
train_X, val_X, train_y, val_y = train_test_split(X, y)
if model_type == "dt":
model_dt = DecisionTreeRegressor(random_state=1)
model_dt.fit(train_X, train_y)
pred_col_name = "dt_" + str(pred_data) + "_preds"
pred_data[pred_col_name] = model_dt.predict(pred_data[features])
print(mean_absolute_error(val_y, model_dt.predict(val_X)))
return pred_data
elif model_type == "rf":
model_rf = DecisionTreeRegressor(random_state=1)
model_rf.fit(X, y)
pred_col_name = "rf_" + str(pred_data) + "_preds"
pred_data[pred_col_name] = model_rf.predict(pred_data[features])
print(mean_absolute_error(val_y, model_rf.predict(val_X)))
return pred_data
Fin_Actual_URLs_to2018 = drop_clean(Fin_Actual_URLs_to2018)
URL_2019_Pred_Data = drop_clean(URL_2019_Test_Data)
features = ["QoQ.Sum.Total.Page.Visit.Delta.Prop", "QoQ.Mean.Weighted.Pageviews.Delta.Prop", "QoQ.Mean.Weighted.Time.on.Site.Delta.Prop", "QoQ.Weighted.Bounce.Rate.Delta.Prop", "QoQ.Sum.Unique.Visitors.Delta.Prop"]
Fin_Actual_URL_Preds = create_train_predict(Fin_Actual_URLs_to2018, URL_2019_Pred_Data, "rev_beat_miss", features, "rf")
pd.DataFrame.to_csv(Fin_Actual_URL_Preds, "/Users/.../Fin_Actual_URL_Preds.csv")