我有一个数据框,其中有20列/项,593行(行数并不重要),如下所示:
使用这个,在心理包psych::alpha
的alpha的帮助下,测试的可靠性为0.94。如果我丢弃其中一个项目,输出也会给我cronbach的新alpha值。但是,我想知道我可以删除多少项来保留至少为0.8的alpha我使用暴力方法来创建我的数据框中存在的所有项目的组合并检查它们的alpha在(0.7,0.9)范围内。有没有更好的方法来执行此操作,因为这需要永远运行,因为项目数量太大,无法检查所有项目组合。以下是我目前的一段代码:
numberOfItems <- 20
for(i in 2:(2^numberOfItems)-1){
# ignoring the first case i.e. i=1, as it doesn't represent any model
# convert the value of i to binary, e.g. i=5 will give combination = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1
# using the binaryLogic package
combination <- as.binary(i, n=numberOfItems)
model <- c()
for(j in 1:length(combination)){
# choose which columns to consider depending on the combination
if(combination[j])
model <- c(model, j)
}
itemsToUse <- itemResponses[, c(model)]
#cat(model)
if(length(model) > 13){
alphaVal <- psych::alpha(itemsToUse)$total$raw_alpha
if(alphaVal > 0.7 && alphaVal < 0.9){
cat(alphaVal)
print(model)
}
}
}
此代码的示例输出如下:
0.8989831 1 4 5 7 8 9 10 11 13 14 15 16 17 19 20
0.899768 1 4 5 7 8 9 10 11 12 13 15 17 18 19 20
0.899937 1 4 5 7 8 9 10 11 12 13 15 16 17 19 20
0.8980605 1 4 5 7 8 9 10 11 12 13 14 15 17 19 20
以下是前10行数据:
dput(itemResponses) 结构(列表(CESD1 = c(1,2,2,0,1,0,0,0,0,1),CESD2 = c(2, 3,1,0,0,1,1,1,0,1),CESD3 = c(0,3,0,1,1,0,0,0, 0,0),CESD4 = c(1,2,0,1,0,1,1,1,0,0),CESD5 = c(0, 1,0,2,1,2,2,0,0,0),CESD6 = c(0,3,0,1,0,0,2,0, 0,0),CESD7 = c(1,2,1,1,2,0,1,0,1,0),CESD8 = c(1, 3,1,1,0,1,0,0,1,0),CESD9 = c(0,1,0,2,0,0,1,1, 0,1),CESD10 = c(0,1,0,2,0,0,1,1,0,1),CESD11 = c(0, 2,1,1,1,1,2,3,0,0),CESD12 = c(0,3,1,1,1,0,2,0, 0,0),CESD13 = c(0,3,0,2,1,2,1,0,1,0),CESD14 = c(0, 3,1,2,1,1,1,0,1,1),CESD15 = c(0,2,0,1,0,1,0,1, 1,0),CESD16 = c(0,2,2,0,0,1,1,0,0,0),CESD17 = c(0, 0,0,0,0,1,1,0,0,0),CESD18 = c(0,2,0,0,0,0,0,0, 0,1),CESD19 = c(0,3,0,0,0,0,0,1,1,0),CESD20 = c(0, 3,0,1,0,0,0,0,0,0)),. Name = c(&#34; CESD1&#34;,&#34; CESD2&#34;,&#34; CESD3&#34 ;, &#34; CESD4&#34;,&#34; CESD5&#34;,&#34; CESD6&#34;,&#34; CESD7&#34;,&#34; CESD8&#34;,&#34; CESD9& #34;,&#34; CESD10&#34;, &#34; CESD11&#34;,&#34; CESD12&#34;,&#34; CESD13&#34;,&#34; CESD14&#34;,&#34; CESD15&#34;,&#34; CESD16& #34;,&#34; CESD17&#34;, &#34; CESD18&#34;,&#34; CESD19&#34;,&#34; CESD20&#34;),row.names = c(NA,-10L),class = c(&#34; tbl_df&# 34 ;, &#34; tbl&#34;,&#34; data.frame&#34;))
答案 0 :(得分:0)
我按如下方式更改了代码,现在我正在删除固定数量的项目,并手动将numberOfItemsToDrop
的值从1更改为20。虽然它更好,但它仍然需要太长时间才能运行:(
我希望有更好的方法可以做到这一点。
numberOfItemsToDrop <- 13
combinations <- combinat::combn(20, numberOfItemsToDrop)
timesToIterate <- length(combinations)/numberOfItemsToDrop
for(i in 1:timesToIterate){
model <- combinations[,i]
itemsToUse <- itemResponses[, -c(model)]
alphaVal <- psych::alpha(itemsToUse)$total$raw_alpha
if(alphaVal < 0.82){
cat("Cronbach's alpha =",alphaVal, ", number of items dropped = ", length(model), " :: ")
print(model)
}
}
答案 1 :(得分:0)
我们的想法是用经典测试理论(CTT)中的每个项目替换所谓的歧视的alpha
计算。歧视是项目分数与“真实分数”(我们假设为行总和)的相关性。
让数据
dat <- structure(list(CESD1 = c(1, 2, 2, 0, 1, 0, 0, 0, 0, 1), CESD2 = c(2, 3, 1, 0, 0, 1, 1, 1, 0, 1),
CESD3 = c(0, 3, 0, 1, 1, 0, 0, 0, 0, 0), CESD4 = c(1, 2, 0, 1, 0, 1, 1, 1, 0, 0),
CESD5 = c(0, 1, 0, 2, 1, 2, 2, 0, 0, 0), CESD6 = c(0, 3, 0, 1, 0, 0, 2, 0, 0, 0),
CESD7 = c(1, 2, 1, 1, 2, 0, 1, 0, 1, 0), CESD8 = c(1, 3, 1, 1, 0, 1, 0, 0, 1, 0),
CESD9 = c(0, 1, 0, 2, 0, 0, 1, 1, 0, 1), CESD10 = c(0, 1, 0, 2, 0, 0, 1, 1, 0, 1),
CESD11 = c(0, 2, 1, 1, 1, 1, 2, 3, 0, 0), CESD12 = c(0, 3, 1, 1, 1, 0, 2, 0, 0, 0),
CESD13 = c(0, 3, 0, 2, 1, 2, 1, 0, 1, 0), CESD14 = c(0, 3, 1, 2, 1, 1, 1, 0, 1, 1),
CESD15 = c(0, 2, 0, 1, 0, 1, 0, 1, 1, 0), CESD16 = c(0, 2, 2, 0, 0, 1, 1, 0, 0, 0),
CESD17 = c(0, 0, 0, 0, 0, 1, 1, 0, 0, 0), CESD18 = c(0, 2, 0, 0, 0, 0, 0, 0, 0, 1),
CESD19 = c(0, 3, 0, 0, 0, 0, 0, 1, 1, 0), CESD20 = c(0, 3, 0, 1, 0, 0, 0, 0, 0, 0)),
.Names = c("CESD1", "CESD2", "CESD3", "CESD4", "CESD5", "CESD6", "CESD7", "CESD8", "CESD9",
"CESD10", "CESD11", "CESD12", "CESD13", "CESD14", "CESD15", "CESD16", "CESD17",
"CESD18", "CESD19", "CESD20"), row.names = c(NA, -10L),
class = c("tbl_df", "tbl", "data.frame"))
我们计算(1)歧视和(2)α系数。
stat <- t(sapply(1:ncol(dat), function(ii){
dd <- dat[, ii]
# discrimination is the correlation of the item to the rowsum
disc <- if(var(dd, na.rm = TRUE) > 0) cor(dd, rowSums(dat[, -ii]), use = "pairwise")
# alpha that would be obtained when we skip this item
alpha <- psych::alpha(dat[, -ii])$total$raw_alpha
c(disc, alpha)
}))
dimnames(stat) <- list(colnames(dat), c("disc", "alpha^I"))
stat <- data.frame(stat)
观察到歧视(计算效率更高)与删除此项目时获得的alpha
成反比。换句话说,当有许多高“歧视”项目(相互关联)时,alpha
最高。
plot(stat, pch = 19)
使用此信息选择要删除项目的顺序,使其低于基准(例如.9,因为玩具数据不允许标记较低):
1)删除尽可能多的项目以保持在基准之上;也就是说,从最不具有歧视性的项目开始。
stat <- stat[order(stat$disc), ]
this <- sapply(1:(nrow(stat)-2), function(ii){
ind <- match(rownames(stat)[1:ii], colnames(dat))
alpha <- psych::alpha(dat[, -ind, drop = FALSE])$total$raw_alpha
})
delete_these <- rownames(stat)[which(this > .9)]
psych::alpha(dat[, -match(delete_these, colnames(dat)), drop = FALSE])$total$raw_alpha
length(delete_these)
2)删除尽可能少的项目以保持在基准之上;也就是说,从最高分辨率的项目开始。
stat <- stat[order(stat$disc, decreasing = TRUE), ]
this <- sapply(1:(nrow(stat)-2), function(ii){
ind <- match(rownames(stat)[1:ii], colnames(dat))
alpha <- psych::alpha(dat[, -ind, drop = FALSE])$total$raw_alpha
})
delete_these <- rownames(stat)[which(this > .9)]
psych::alpha(dat[, -match(delete_these, colnames(dat)), drop = FALSE])$total$raw_alpha
length(delete_these)
请注意, 1)与(心理/教育)诊断/评估中的经典项目选择程序一致:从评估中删除项目,这些项目在歧视能力方面低于基准。 / p>