使用的数据是Gapminder的数据 - > employment_above_15.csv
# defining functions for finding correlation
def correlation(x,y):
std_x = (x-x.mean())/(x.std(ddof = 0))
std_y = (y-y.mean())/(y.std(ddof=0))
return (std_x * std_y).mean()
#function for calculating correlation for each country with time
def emp_country(country,df):
emp_country=df.loc[country]
year_are=df.columns.values.astype(str).astype(int)
return correlation(emp_country.values, year_are)`def
determing_cor_level(data):
data1 = abs(data)
if data1 < 0.4:
return "Low"
elif data1 < 0.7:
return "Medium"
else:
return "High"
def corr_type(data):
if data > 0:
return "(-)ve"
elif data == 0:
return "Null"
else:
return "(+)ve"
def finding_corr(df):
corr_list=[]
corr_level=[]
corr_types=[]
for country in df.index.values.astype('str'):
corr_value=emp_country(country,df)
corr_list.append(corr_value)
corr_level.append(determing_cor_level(corr_value))
corr_types.append(corr_type(corr_value))
df1= pd.DataFrame({'Correlation': corr_list,'Correlation Strength':corr_level, 'Correlation Type' : corr_types} ,
index= df.index.values.astype('str'))
return df1
finding_corr(employment_rate).head()
finding_corr(employment_rate).groupby('Correlation Strength').count()['Correlation']
我得到以下输出: 相关强度 高99 低29 中等50 名称:Correlation,dtype:int64
但是现在我想要分类并找出有多少种具有各种相关强度的国家, 具有(+)ve和( - )ve相关性。这就是有多少具有高相关性的国家具有(+)ve相关性以及有多少具有( - )ve相关性。怎么做?
答案 0 :(得分:0)
finding_corr(employment_rate).groupby([&#39; Correlation Strength&#39;,&#39; Country&#39;])。count()[&#39; Correlation&#39;]
- 这可能会有所帮助