coordinates = [(259,168),(62,133),(143,163),(174,270),(321,385)]
斜率= 0.76083799
拦截= 77.87127406
与棕色标记的坐标对我来说是一个潜在的异常值,因此需要将其删除。截至目前,我 尝试 使用学生残差和折刀残差来移除这些异常值。但是,根据我拥有的数据集,我无法计算这些残差。
如果您可以帮助我找到残差以及如何在上述数据集中执行此操作,那将非常有用。
CODE
import numpy as np
import matplotlib.pyplot as plt
coordinates = [(259, 168), (62, 133), (143, 163), (174, 270), (321, 385)]
x=[x1[0] for x1 in coordinates]
y=[x1[1] for x1 in coordinates]
for x1,y1 in coordinates:
plt.plot(x1,y1,marker="o",color="brown")
plt.show()
# using numpy polyfit method to find regression line slope and intercept
z = np.polyfit(x,y,1)
print(z)
slope = z[0]
intercept =z[1]
newx = np.linspace(62,321,200)
newy = np.poly1d(z)
plt.plot(x,y, 'o', newx, newy(newx),color="black")
# plt.plot()
plt.plot(259,168,marker="o",color="brown")
plt.show()
#TODO
#remove the outliers and then display
答案 0 :(得分:1)
x和y在开始时放入np.ndarrays
。
输入:
import numpy as np
import matplotlib.pyplot as plt
coordinates = [(259, 168), (62, 133), (143, 163), (174, 270), (321, 385)]
x=np.array([x1[0] for x1 in coordinates]) #Placed into array
y=np.array([x1[1] for x1 in coordinates]) #Placed into array
for x1,y1 in coordinates:
plt.plot(x1,y1,marker="o",color="brown")
plt.show()
# using numpy polyfit method to find regression line slope and intercept
z = np.polyfit(x,y,1)
print(z)
slope = z[0]
intercept =z[1]
newx = np.linspace(62,321,200)
newy = np.poly1d(z)
plt.plot(x,y, 'o', newx, newy(newx),color="black")
# plt.plot()
plt.plot(259,168,marker="o",color="brown")
plt.show()
附加代码:
print("old y: " + repr(y)) #Display original array of y values
print("old x: " + repr(x))
residual_array = abs(y - (intercept + slope * x)) #Create an array of residuals
max_accept_deviation = 100 #An arbitrary value of "acceptable deviation"
mask = residual_array >= max_accept_deviation #Create an array of TRUE/FALSE values. TRUE where residual array is larger than deviation
rows_to_del = tuple(te for te in np.where(mask)[0]) #np.where converts the mask to a list of row numbers which is converted to a tuple
cleaned_y = np.delete(y,rows_to_del) #np.delete deletes all row numbers in the earlier tuple
cleaned_x = np.delete(x,rows_to_del)
print("new y: " + repr(cleaned_y)) #Print the cleaned values
print("new x: " + repr(cleaned_x))
输出:
[ 0.76083799 77.87127406]
old y: array([168, 133, 163, 270, 385])
old x: array([259, 62, 143, 174, 321])
new y: array([133, 163, 270, 385])
new x: array([ 62, 143, 174, 321])