我正在为以下问题寻找更优雅的解决方案 我有一个80列的数据框,前40个cols是40个分类变量,有6个因子,最后40个cols是每个变量的权重
我希望有一个结果矩阵,即sum.weight所有变量及其相应的权重。
我找到了一个循环遍历第一个矩阵的解决方案:
for(i in 1:40){ WKC [i,]< -xtabs(VaW [,i + 40] ~VaW [,i])}
这确实有效,但它并不是最优雅的解决方案,而且,如果重量不合适,这将无法正常工作。
还有其他意思吗?
dput(head(VaW))
structure(list(V1 = structure(c(4L, NA, NA, NA, NA, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V2 = structure(c(NA,
NA, NA, NA, 5L, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V3 = structure(c(NA, NA, NA, 2L, NA, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V4 = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V5 = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), .Label = c("A", "I", "M", "P",
"Q", "R"), class = "factor"), V6 = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V7 = structure(c(NA, NA, 5L, NA, NA, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V8 = structure(c(NA,
NA, NA, NA, NA, 2L), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V9 = structure(c(2L, 1L, NA, NA,
NA, 5L), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V10 = structure(c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), .Label = c("A", "I",
"M", "P", "Q", "R"), class = "factor"), V11 = structure(c(NA,
1L, NA, NA, NA, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V12 = structure(c(NA, 2L, NA, NA,
NA, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V13 = structure(c(NA, NA, NA, NA, 5L, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V14 = structure(c(NA,
NA, NA, NA, 5L, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V15 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V16 = structure(c(NA,
4L, NA, NA, NA, 5L), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V17 = structure(c(NA, NA, 4L, NA,
NA, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V18 = structure(c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), .Label = c("A", "I",
"M", "P", "Q", "R"), class = "factor"), V19 = structure(c(NA,
NA, NA, NA, NA, 5L), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V20 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V21 = structure(c(NA,
1L, 4L, NA, NA, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V22 = structure(c(NA, NA, NA, 2L,
1L, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V23 = structure(c(NA, NA, NA, NA, 5L, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V24 = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V25 = structure(c(4L, NA, NA, NA, NA, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V26 = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V27 = structure(c(NA, NA, NA, 1L, NA, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V28 = structure(c(4L,
NA, NA, NA, NA, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V29 = structure(c(4L, NA, 4L, NA,
NA, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V30 = structure(c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), .Label = c("A", "I",
"M", "P", "Q", "R"), class = "factor"), V31 = structure(c(NA,
NA, NA, 5L, NA, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V32 = structure(c(NA, NA, NA, 3L,
NA, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V33 = structure(c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), .Label = c("A", "I",
"M", "P", "Q", "R"), class = "factor"), V34 = structure(c(NA,
NA, NA, 1L, 5L, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V35 = structure(c(NA, NA, NA, NA,
NA, 2L), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V36 = structure(c(NA, 4L, NA, 1L, NA, 5L), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V37 = structure(c(NA,
1L, 4L, NA, NA, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V38 = structure(c(NA, NA, 4L, NA,
6L, NA), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V39 = structure(c(6L, NA, NA, NA, NA, NA), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V40 = structure(c(4L,
NA, NA, NA, NA, NA), .Label = c("A", "I", "M", "P", "Q",
"R"), class = "factor"), V41 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("A",
"I", "M", "P", "Q", "R"), class = "factor"), V42 = structure(c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_
), .Label = c("A", "I", "M", "P", "Q", "R"), class = "factor"),
V43 = structure(c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), .Label = c("A", "I",
"M", "P", "Q", "R"), class = "factor"), V1.freq = c(1.03191489361702,
1.03191489361702, 1.03191489361702, 1.03191489361702, 1.03191489361702,
1.03191489361702), V2.freq = c(0.908805031446541, 0.908805031446541,
0.908805031446541, 0.908805031446541, 0.908805031446541,
0.908805031446541), V3.freq = c(1.1231884057971, 1.1231884057971,
1.1231884057971, 1.1231884057971, 1.1231884057971, 1.1231884057971
), V4.freq = c(1.12454212454212, 1.12454212454212, 1.12454212454212,
1.12454212454212, 1.12454212454212, 1.12454212454212), V5.freq = c(1,
1, 1, 1, 1, 1), V6.freq = c(0.866043613707165, 0.866043613707165,
0.866043613707165, 0.866043613707165, 0.866043613707165,
0.866043613707165), V7.freq = c(0.961904761904762, 0.961904761904762,
0.961904761904762, 0.961904761904762, 0.961904761904762,
0.961904761904762), V8.freq = c(1.07801418439716, 1.07801418439716,
1.07801418439716, 1.07801418439716, 1.07801418439716, 1.07801418439716
), V9.freq = c(0.966996699669967, 0.966996699669967, 0.966996699669967,
0.966996699669967, 0.966996699669967, 0.966996699669967),
V10.freq = c(1.01941747572816, 1.01941747572816, 1.01941747572816,
1.01941747572816, 1.01941747572816, 1.01941747572816), V11.freq = c(1.12222222222222,
1.12222222222222, 1.12222222222222, 1.12222222222222, 1.12222222222222,
1.12222222222222), V12.freq = c(1.1268115942029, 1.1268115942029,
1.1268115942029, 1.1268115942029, 1.1268115942029, 1.1268115942029
), V13.freq = c(1.00990099009901, 1.00990099009901, 1.00990099009901,
1.00990099009901, 1.00990099009901, 1.00990099009901), V14.freq = c(0.973333333333333,
0.973333333333333, 0.973333333333333, 0.973333333333333,
0.973333333333333, 0.973333333333333), V15.freq = c(0.87202380952381,
0.87202380952381, 0.87202380952381, 0.87202380952381, 0.87202380952381,
0.87202380952381), V16.freq = c(1.08988764044944, 1.08988764044944,
1.08988764044944, 1.08988764044944, 1.08988764044944, 1.08988764044944
), V17.freq = c(1.02333333333333, 1.02333333333333, 1.02333333333333,
1.02333333333333, 1.02333333333333, 1.02333333333333), V18.freq = c(0.983974358974359,
0.983974358974359, 0.983974358974359, 0.983974358974359,
0.983974358974359, 0.983974358974359), V19.freq = c(0.993265993265993,
0.993265993265993, 0.993265993265993, 0.993265993265993,
0.993265993265993, 0.993265993265993), V20.freq = c(1.03883495145631,
1.03883495145631, 1.03883495145631, 1.03883495145631, 1.03883495145631,
1.03883495145631), V21.freq = c(1.07070707070707, 1.07070707070707,
1.07070707070707, 1.07070707070707, 1.07070707070707, 1.07070707070707
), V22.freq = c(1.20689655172414, 1.20689655172414, 1.20689655172414,
1.20689655172414, 1.20689655172414, 1.20689655172414), V23.freq = c(1.05925925925926,
1.05925925925926, 1.05925925925926, 1.05925925925926, 1.05925925925926,
1.05925925925926), V24.freq = c(1.08070175438596, 1.08070175438596,
1.08070175438596, 1.08070175438596, 1.08070175438596, 1.08070175438596
), V25.freq = c(1.17602996254682, 1.17602996254682, 1.17602996254682,
1.17602996254682, 1.17602996254682, 1.17602996254682), V26.freq = c(1.00333333333333,
1.00333333333333, 1.00333333333333, 1.00333333333333, 1.00333333333333,
1.00333333333333), V27.freq = c(0.970873786407767, 0.970873786407767,
0.970873786407767, 0.970873786407767, 0.970873786407767,
0.970873786407767), V28.freq = c(0.986798679867987, 0.986798679867987,
0.986798679867987, 0.986798679867987, 0.986798679867987,
0.986798679867987), V29.freq = c(1.17894736842105, 1.17894736842105,
1.17894736842105, 1.17894736842105, 1.17894736842105, 1.17894736842105
), V30.freq = c(0.993710691823899, 0.993710691823899, 0.993710691823899,
0.993710691823899, 0.993710691823899, 0.993710691823899),
V31.freq = c(1.003367003367, 1.003367003367, 1.003367003367,
1.003367003367, 1.003367003367, 1.003367003367), V32.freq = c(0.986531986531987,
0.986531986531987, 0.986531986531987, 0.986531986531987,
0.986531986531987, 0.986531986531987), V33.freq = c(1.02456140350877,
1.02456140350877, 1.02456140350877, 1.02456140350877, 1.02456140350877,
1.02456140350877), V34.freq = c(1.01923076923077, 1.01923076923077,
1.01923076923077, 1.01923076923077, 1.01923076923077, 1.01923076923077
), V35.freq = c(1, 1, 1, 1, 1, 1), V36.freq = c(0.933333333333333,
0.933333333333333, 0.933333333333333, 0.933333333333333,
0.933333333333333, 0.933333333333333), V37.freq = c(1.10112359550562,
1.10112359550562, 1.10112359550562, 1.10112359550562, 1.10112359550562,
1.10112359550562), V38.freq = c(0.971428571428571, 0.971428571428571,
0.971428571428571, 0.971428571428571, 0.971428571428571,
0.971428571428571), V39.freq = c(1.08960573476702, 1.08960573476702,
1.08960573476702, 1.08960573476702, 1.08960573476702, 1.08960573476702
), V40.freq = c(1.02777777777778, 1.02777777777778, 1.02777777777778,
1.02777777777778, 1.02777777777778, 1.02777777777778), V41.freq = c(1.03225806451613,
1.03225806451613, 1.03225806451613, 1.03225806451613, 1.03225806451613,
1.03225806451613), V42.freq = c(0.962962962962963, 0.962962962962963,
0.962962962962963, 0.962962962962963, 0.962962962962963,
0.962962962962963), V43.freq = c(1.04040404040404, 1.04040404040404,
1.04040404040404, 1.04040404040404, 1.04040404040404, 1.04040404040404
)), row.names = c(NA, 6L), class = "data.frame")
答案 0 :(得分:1)
您的VaW
与屏幕截图中的表格不同(例如,没有Subject
列,有43个变量,重量列的名称为Vxx.freq
),因此我将使用您在VaW
输出中提供的dput
。
您的解决方案运行正常!如果您担心列排序,可以使用列名而不是列索引。下面,我使用sprintf
函数:sprintf("V%d.freq", i)
将%d
替换为i
的值。我还使用lapply
和rbind
结果。
out <- lapply(1:43, function(i) {
xtabs(VaW[, sprintf("V%d.freq", i)] ~ VaW[, sprintf("V%d", i)])
})
WKC <- do.call(rbind, out)
rownames(WKC) <- paste0("V", 1:43)
这会给你
WKC
# A I M P Q R
# V1 0.0000000 0.0000000 0.000000 1.0319149 0.0000000 0.0000000
# V2 0.0000000 0.0000000 0.000000 0.0000000 0.9088050 0.0000000
# V3 0.0000000 1.1231884 0.000000 0.0000000 0.0000000 0.0000000
# V4 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000
# V5 0.0000000 0.0000000 0.000000 0.0000000 0.0000000 0.0000000
# <snip>