gitlab链接:脚本文件和csv文件位于https://gitlab.com/pbmbjoshi/lineargradient
我正在编写一个python脚本来解决房屋预测示例。
住房价格计算为每平方英尺4000卢比(假设基本价格)。额外的卧室(多于一个)的额外费用为1%,每层楼高(在第一层以上)的额外费用为3%,建筑物的使用年限(按年计算)可享受2%的折扣,距离的折扣(以千米为单位)可享受6%的折扣)从最近的火车站
当我在数据上应用线性梯度时,只有第一个参数值(基本价格)接近才能校正一个,而其他参数则完全不正确。谁能帮助我。
def linear_regression_with_multiple_variables():
print("linear_regression_with_multiple_variables")
filePath = r'LinearGradient_Example.csv'
data = pd.read_csv(filePath)
x1 = np.asarray(data['Total Area'])
x2 = np.asarray(data['# of bed rooms'])
x3 = np.asarray(data['Floor Count'])
x4 = np.asarray(data['Age Of The Building'])
x5 = np.asarray(data['Distance from nearest railway station'])
y = np.asarray(data['Final Price'])
x1_max = max(x1);
x2_max = max(x2);
x3_max = max(x3);
x4_max = max(x4);
x5_max = max(x5);
x1 = x1 / x1_max;
x2 = x2 / x2_max;
x3 = x3 / x3_max;
x4 = x4 / x4_max;
x5 = x5 / x5_max;
theta_1 = 0
theta_2 = 0
theta_3 = 0
theta_4 = 0
theta_5 = 0
number_of_training_sets = len(x1)
max_number_of_iterations = 1000000
learning_rate = 0.001
iteration_counter = 1;
precision = 0.001
while iteration_counter <= max_number_of_iterations:
old_theta_1 = theta_1
old_theta_2 = theta_2
old_theta_3 = theta_3
old_theta_4 = theta_4
old_theta_5 = theta_5
der_x1 = 0;
der_x2 = 0;
der_x3 = 0;
der_x4 = 0;
der_x5 = 0;
for i in range(0, number_of_training_sets):
diff_val = theta_1 * x1[i] + theta_2 * x2[i] + theta_3 * x3[i] + theta_4 * x4[i] + theta_5 * x5[i] - y[i]
der_x1 = der_x1 + diff_val * x1[i]
der_x2 = der_x2 + diff_val * x2[i]
der_x3 = der_x3 + diff_val * x3[i]
der_x4 = der_x4 + diff_val * x4[i]
der_x5 = der_x5 + diff_val * x5[i]
avg_der_x1 = der_x1 / number_of_training_sets
avg_der_x2 = der_x2 / number_of_training_sets
avg_der_x3 = der_x3 / number_of_training_sets
avg_der_x4 = der_x4 / number_of_training_sets
avg_der_x5 = der_x5 / number_of_training_sets
theta_1 = theta_1 - learning_rate * avg_der_x1
theta_2 = theta_2 - learning_rate * avg_der_x2
theta_3 = theta_3 - learning_rate * avg_der_x3
theta_4 = theta_4 - learning_rate * avg_der_x4
theta_5 = theta_5 - learning_rate * avg_der_x5
print(iteration_counter, theta_1, theta_2, theta_3, theta_4, theta_5)
iteration_counter = iteration_counter + 1
if (abs(theta_1 - old_theta_1) <= precision and
abs(theta_2 - old_theta_2) <= precision and
abs(theta_3 - old_theta_3) <= precision and
abs(theta_4 - old_theta_4) <= precision and
abs(theta_5 - old_theta_5) <= precision):
break
theta_1 = theta_1 / x1_max
theta_2 = theta_2 / x2_max
theta_3 = theta_3 / x3_max
theta_4 = theta_4 / x4_max
theta_5 = theta_5 / x5_max
print('Final values : ', theta_1, theta_2, theta_3, theta_4, theta_5)```