我使用scipy库进行了t检验分析,并想与自己制作的t检验函数进行交叉检查。令我惊讶的是,当我的系列不包含任何Nan值时,我的函数和scipy库给出了相同的t值和p值。如果该系列具有任何nan值,尽管我已删除了nan值,但仍存在一些差异。 有谁知道可能导致此问题的问题?
from math import sqrt
from numpy import mean
from scipy.stats import t
import numpy as np
import pandas as pd
from scipy import stats
# function for calculating the t-test for two independent samples
def independent_ttest(data1, data2, alpha):
# calculate means
mean1, mean2 = mean(data1), mean(data2)
# calculate standard errors
se1, se2 = sem(data1), sem(data2)
# standard error on the difference between the samples
sed = sqrt(se1**2.0 + se2**2.0)
# calculate the t statistic
t_stat = (mean1 - mean2) / sed
# degrees of freedom
df = len(data1) + len(data2) - 2
# calculate the critical value
cv = t.ppf(1.0 - alpha, df)
# calculate the p-value
p = (1.0 - t.cdf(abs(t_stat), df)) * 2.0
# return everything
return t_stat, df, cv, p
# calculate the t test
alpha = 0.05
x = np.arange(10.)
b = x*1.1
df_x = pd.Series(x)
df_b = pd.Series(b)
df_x_nan = df_x.replace(7.0, np.nan)
df_x_nan = df_x.replace(4.0, np.nan)
print('Whithout NaN')
t_stat, df, cv, p = independent_ttest(df_x, df_b, alpha)
t_stat_scipy, p_scipy = stats.ttest_ind(df_x,df_b, nan_policy = 'omit')
print("t-test function, t_Stat: {}".format(t_stat))
print("t-test scipy, t_Stat: {}".format(t_stat_scipy))
print("t-test function, p: {}".format(p))
print("t-test scipy, p: {}".format(p_scipy))
print('===================')
print('Whith NaN')
t_stat, df, cv, p = independent_ttest(df_x_nan.dropna(), df_b, alpha)
t_stat_scipy, p_scipy = stats.ttest_ind(df_x_nan,df_b, nan_policy = 'omit')
print("t-test function, t_Stat: {}".format(t_stat))
print("t-test scipy, t_Stat: {}".format(t_stat_scipy))
print("t-test function,p: {}".format(p))
print("t-test scipy, p: {}".format(p_scipy))
以下是输出:
Whithout NaN
t-test function, t_Stat: -0.3161627186509306
t-test scipy, t_Stat: -0.31616271865093054
t-test function, p: 0.7555158566691087
t-test scipy, p: 0.7555158566691088
===================
Whith NaN
t-test function, t_Stat: -0.2628962556410858
t-test scipy, t_Stat: -0.2623389223791333
t-test function,p: 0.7957901706958825
t-test scipy, p: 0.7962126903526476