我有一个数据框
col1 col2 col 3 …col n
我必须在所有列之间进行所有可能的组合,并进行卡方独立性检验。
import researchpy
for i in range (0, len(corr_data.columns)):
for j in range(0, len(corr_data.columns)):
try:
col1 = corr_data.iloc[:,i]
col2 = corr_data.iloc[:,j]
# print(col1.info())
# print(col2.info())
crosstab, res = researchpy.crosstab(col1, col2, test= "chi-square")
#print(res)
r1 = res.values.tolist()
结果是这样的
` 0 1
0 Pearson Chi-square ( 9.0) = 20322.0
1 p-value = 0.0
2 Cramer's V = 1.0`
我想创建一个数据框,使其具有如下所示的Cramers值p值和chisquare值
col 1 col 2 col 3 ....... col n
col 1 20322.0
pvalue:0
cramer's v:1.0
col 2
col 3
....
col n
我尝试了此代码,但坚持了下来。任何帮助将不胜感激。
m = np.asarray(r1)
g1 = pd.DataFrame(m)
Chi_square = g1[0].values
P_value = g1[1].values
Cramers_V = g1[2].values
print(g1[2][0])
corr_mat = pd.DataFrame(index = range(0, len(corr_data.columns)), columns=range(0, len(corr_data.columns)))
for k in range(0, len(corr_data.columns)):
for l in range (0, len(corr_data.columns)):
row = int(g1[2][1])
col = int(g1[2][1])
if (row != 99999 and col != 99999):
corr_mat[col][row] = Cramers_V[k]
答案 0 :(得分:0)
尝试此代码
import pandas as pd
import researchpy as rp
import numpy as np
import itertools
# set seed for reproducibility
np.random.seed(922020)
df = pd.DataFrame(np.random.randint(3, size= (101, 4)),
columns= ['disease', 'severity', 'alive', 'status'])
def it_chi(data):
# creates a list of all possible column combinations
combs = list(itertools.combinations(data.columns.to_list(), 2))
cols = [c[1] for c in combs] # creating a list for column labels
ix = [c[0] for c in combs] # creating a list for index labels
series = [] #empty list for results
for comb in combs:
table, res = rp.crosstab(data[comb[0]], data[comb[1]], test = 'chi-square')
series.append({res['Chi-square test'][0]: res['results'][0],
res['Chi-square test'][1]: res['results'][1],
res['Chi-square test'][2]: res['results'][2]})
# np.diag() creates a matrix with the diag filled in and 0s elsewhere
frame = pd.DataFrame(np.diag(series),
columns = cols,
index = ix)
return frame
it_chi(df)