我正在运行一个while循环以实现向后消除,这将删除/迭代删除具有最大值的属性(建议:使用p值<0.05的while循环)
thefile=input("Enter filename:")
separator=input("Enter symbol of separator (comma, semicolon etc):")
targetvar=input("Enter name of target variable for prediction "
"(note: this is case sensitive): ") # MEDV for Boston.csv
# Test with boston.csv and other CSVs
# Load Dataset
dataframe=pd.read_csv(thefile,sep=separator)
# Show first few data points of the dataset
print("This is the preview of the loaded data:")
df = pd.read_csv(thefile,sep=separator)
X = dataframe.loc[:,dataframe.columns != targetvar].values
y = dataframe.iloc[:,targetvarloc].values
df.head()
#Backward Elimination
cols = list(X.columns)
pmax = 1
while (len(cols)>0):
p= []
X_1 = X[cols]
X_1 = sm.add_constant(X_1)
model = sm.OLS(y,X_1).fit()
p = pd.Series(model.pvalues.values[1:],index = cols)
pmax = max(p)
feature_with_p_max = p.idxmax()
if(pmax>0.05):
cols.remove(feature_with_p_max)
else:
break
selected_features_BE = cols
print(selected_features_BE)
Traceback (most recent call last)
<ipython-input-96-3f41adb6e314> in <module>()
----> 1 cols = list(X.columns)
2 pmax = 1
3 while (len(cols)>0):
4 p= []
5 X_1 = X[cols]
AttributeError: 'numpy.ndarray' object has no attribute 'columns'