我正在尝试使用" fscaret" r中的特征选择功能,但它抛出一个我找不到任何信息的错误。错误是
Error in if (mean(x[i, -i]) > mean(x[-j, j])) { :
missing value where TRUE/FALSE needed
Dput(head(data,2))如下:
structure(list(lvvalue = c(65, 55), lvtest_ECHO = c(1, 0), lvtest_MUGA = c(0,
1), lvtest_Not_Done = c(0, 0), bilirubin = c(0.4, 11), alat = c(26,
34), asat = c(7, 18), alkaline_phosphatase = c(61, 58), creatinine = c(0.8,
52), age = c(43L, 39L), sex = c(1, 1), lumpectomy = c(1L, 0L),
mastectomy = 0:1, other_surg = c(0L, 0L), quad_seg = c(1L,
0L), resaln = c(21L, 25L), posaln = c(3L, 18L), histype_idc = c(1,
1), histype_ilc = c(0, 0), histype_other = c(0, 0), ncgr_cba = c(0,
0), ncgr_md = c(0, 0), ncgr_pd = c(1, 1), ncgr_un = c(0,
0), ncgr_wd = c(0, 0), ptsize = c(3, 4), tnm_val = c(54L,
54L), ptsite_Left = c(0, 0), ptsite_Right = c(1, 1), evidis_no = c(NA_real_,
NA_real_), evidis_yes = c(NA_real_, NA_real_), fishresNegative = c(1,
1), fishresPositive = c(0, 0), wbc = c(8.6, 7.2), platelets = c(340,
261), hemoglobin = c(13.7, 12.6), neutophils = c(4.988, 4.76
), penabnAbnormal = c(0, 0), penabnNormal = c(1, 1), penabnNot_Done = c(0,
0), q1 = c(NA, 2), q2 = c(NA, 1), q3 = c(NA, 1), q4 = c(NA,
1), q5 = c(NA, 1), q6 = c(NA, 3), q7 = c(NA, 3), q8 = c(NA,
3), q9 = c(NA, 2), q10 = c(NA, 2), q11 = c(NA, 3), q12 = c(NA,
2), q13 = c(NA, 2), q14 = c(NA, 2), q15 = c(NA, 1), q16 = c(NA,
1), q17 = c(NA, 2), q18 = c(NA, 2), q19 = c(NA, 2), q20 = c(NA,
2), q21 = c(NA, 3), q22 = c(NA, 3), q23 = c(NA, 2), q24 = c(NA,
2), q25 = c(NA, 2), q26 = c(NA, 2), q27 = c(NA, 2), q28 = c(NA,
2), q29 = c(NA, 5), q30 = c(NA, 5), q31 = c(NA, 3), q32 = c(NA,
2), q33 = c(NA, 1), q34 = c(NA, 1), q35 = c(NA, 5), q36 = c(NA,
2), q37 = c(NA, 1), q38 = c(NA, 2), q39 = c(NA, 2), q40 = c(NA,
2), q41 = c(NA, 2), q42 = c(NA, 2), q43 = c(NA, 3), q44 = c(NA,
1), q45 = c(NA, 2), q46 = c(NA, 3), q47 = c(NA, 3), q48 = c(NA,
1), q49 = c(NA, 3), q50 = c(NA, 1), q51 = c(NA, 1), q52 = c(NA,
1), q53 = c(NA, 1), abdominal_ct_scan = 0:1, abdominal_mri = c(0L,
0L), abdominal_ultrasound = c(1L, 1L), bone_scan = c(1L,
1L), bone_x_ray = c(0L, 0L), chest_ct_scan = c(0L, 0L), chest_mri = c(0L,
0L), chest_x_ray = c(1L, 1L), left_breast_mammography = c(1L,
1L), not_applicable_due_to_prior_procedure = c(0L, 0L), right_breast_mammography = c(0L,
0L), ultrasound_left_breast = c(1L, 1L), ultrasound_right_breast = c(0L,
0L), ht = c(NA, 147), wt = c(NA, 7.42), eintna_Non_significant_abnormalities = c(0,
0), eintna_Not_Done = c(0, 0), eintna_Significant_abnormalities = c(0,
0), eintna_Within_Normal_Limits = c(1, 1), class = structure(c(2L,
1L), .Label = c("no", "yes"), class = "factor")), .Names = c("lvvalue",
"lvtest_ECHO", "lvtest_MUGA", "lvtest_Not_Done", "bilirubin",
"alat", "asat", "alkaline_phosphatase", "creatinine", "age",
"sex", "lumpectomy", "mastectomy", "other_surg", "quad_seg",
"resaln", "posaln", "histype_idc", "histype_ilc", "histype_other",
"ncgr_cba", "ncgr_md", "ncgr_pd", "ncgr_un", "ncgr_wd", "ptsize",
"tnm_val", "ptsite_Left", "ptsite_Right", "evidis_no", "evidis_yes",
"fishresNegative", "fishresPositive", "wbc", "platelets", "hemoglobin",
"neutophils", "penabnAbnormal", "penabnNormal", "penabnNot_Done",
"q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10",
"q11", "q12", "q13", "q14", "q15", "q16", "q17", "q18", "q19",
"q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27", "q28",
"q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37",
"q38", "q39", "q40", "q41", "q42", "q43", "q44", "q45", "q46",
"q47", "q48", "q49", "q50", "q51", "q52", "q53", "abdominal_ct_scan",
"abdominal_mri", "abdominal_ultrasound", "bone_scan", "bone_x_ray",
"chest_ct_scan", "chest_mri", "chest_x_ray", "left_breast_mammography",
"not_applicable_due_to_prior_procedure", "right_breast_mammography",
"ultrasound_left_breast", "ultrasound_right_breast", "ht", "wt",
"eintna_Non_significant_abnormalities", "eintna_Not_Done", "eintna_Significant_abnormalities",
"eintna_Within_Normal_Limits", "class"), row.names = 1:2, class = "data.frame")
我尝试的脚本是:
splitindex <- createDataPartition(test$class,p=0.75,list=F,times=1)
traindf <- test[splitindex,]
testdf <-test[-splitindex,]
myfs <- fscaret(traindf,testdf, myTimeLimit=40, preprocessData=TRUE,
Used.funcRegPred="gbm",with.labels=TRUE,
supress.output=FALSE,no.cores=2,saveModel=T)
数据确实包含很多NA和使用&#34; gbm&#34;是它可以处理缺失值。
&#34; traindf&#34;包含75%随机选择的数据。&#34; testdf&#34;其余的25%。
非常感谢任何帮助。
此致