我是Python新手,有一个我不知道如何解决的问题。
我有以下代码:
#Calculating the closest distances
df_final=pd.DataFrame()
records = df_ipos.to_dict('records') #converting dataframe to a list of dictionaries
def return_closest(df,inp_record):
"""returns the closest euclidean distanced record"""
filtered_records = df.to_dict('records')#converting dataframe to a list of dictionaries
for record in filtered_records: #iterating through dictionaries
params = ['z_LogSales','z_Leverage','z_LevR', 'z_ATR','z_ProfitMargin','z_RevenueGrowth'] #parameters to calculate euclidean distance
distance = []
for param in params:
d1,d2 = record.get(param,0),inp_record.get(param,0) # fetching value of these parameters. default is0 if not found
if d1!=d1: #checking isNan
d1 = 0
if d2!=d2:
d2 = 0
distance.append((d1 - d2)**2)
euclidean = math.sqrt(sum(distance))
record['Euclidean distance'] = round(euclidean,6) #assigning to a new key
distance_records = sorted(filtered_records,key = lambda x:x['Euclidean distance']) #sorting in increasing order
return next(filter(lambda x:x['Euclidean distance'],distance_records),None) #returning the lowest value which is not zero. Default None
for record in records:
ipo_year = record.get('IPO Year')
MainSICCode_industry48 = record.get('MainSICCode_industry48')
df = df_fundamentals[df_fundamentals['Year']==ipo_year]
if df.shape[0]>0:
df = df[df['MainSICCode_industry48_y']==MainSICCode_industry48] #filtering dataframe
df.drop(df[df['Name'] == record.get('Name')].index, inplace = True) #Droping same row having same Name
closest_record = return_closest(df,record)
if closest_record:
record['Closest Company'] = closest_record.get('Name') #adding new columns
record['Actual Distance'] = closest_record.get('Euclidean distance')
df_dist = pd.DataFrame([record]) #changing list of dictionaries back to dataframe
df_dist.replace(-np.inf, np.nan)
df_final=df_final.append(df_dist)
df_final.drop_duplicates()
df_final2 = df_final
该代码计算公司样本与一组可比较公司之间的欧式距离,并最终选择与同一行业最接近的一家。唯一的问题是我在计算期间不知道如何处理NaN值。如果一个参数(参数)为NaN,我想跳过这家公司的信息,只计算其余参数上的距离。目前,NaN被视为0。有人知道我该如何实现。
答案 0 :(得分:0)
如果param
为NaN,则可以break
的for循环退出循环,也可以continue
进入下一个参数。
for param in params:
d1,d2 = record.get(param,0),inp_record.get(param,0) # fetching value of these parameters. default is0 if not found
if d1!=d1: #checking isNan
continue # continues the loop with the next param
if d2!=d2:
continue # continues the loop with the next param
distance.append((d1 - d2)**2)
euclidean = math.sqrt(sum(distance))