我将数据设置为长格式,以消除异常值(我按等级和条件分组,然后按1.5 * IQR删除),但是,在将其恢复为宽格式时遇到了问题。最后两列(条件和变量(36,37)是我要用来散布数据的内容(以便BOXED_Conjunction_12成为变量)。变量1:35应该保持原样。(鉴于异常值,将引入NA)被删除;但是,异常值仅根据条件被删除,而不是完全删除。)我认为由于删除异常值而出现问题,但我认为fill = NA可以解决此问题。无法解决。 / p>
我尝试过
dat%>%spread(condition,pid.avg_rw, fill = NA)
我也尝试过使用reshape2:
dat%>%dcast((1:35)~ condition, value.var = "pid.avg_rw")
我收到错误消息
number of rows of result is not a multiple of vector length (arg 1)Aggregation function missing: defaulting to length
这是前十行的内容。
非常感谢
詹姆斯
structure(list(pid = c("ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002",
"ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002",
"ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002", "ADMIN-UCSF-bo002"
), timepoint = c(1, 2, 3, 1, 2, 3, 1, 2, 3, 1), District.ID = c(175420L,
175420L, 175420L, 175420L, 175420L, 175420L, 175420L, 175420L,
175420L, 175420L), School = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Bowers", "Bracher", "Cabrillo",
"Central Park", "Laurelwood", "Millikin", "Peterson"), class = "factor"),
Ethnicity = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L), .Label = c("American Indian or Alaskan Native", "Asian",
"Black or African American", "Blank on Purpose", "Filipino",
"Hispanic or Latino", "Pacific Islander", "Two or More Races",
"White"), class = "factor"), Age.2018 = c(10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L), Sex = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
Language.Fluency = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("English Learner", "English Only",
"IFEP-Initially Fluent", "RFEP-Redesignated"), class = "factor"),
Parent.Ed.Lvl = structure(c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L), .Label = c("College Graduate", "Declined to state/Unknown",
"Grad School/post grad trng", "High School Graduate", "Not HS Graduate",
"Some College"), class = "factor"), SpEd = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", "Yes"
), class = "factor"), SpEd.Dis = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "Autism (AUT)",
"Emotional Disturbance (ED)", "Hard of Hearing (HH)", "Intellectual Disability (ID)",
"Other Health Impairment (OHI)", "Specific Learning Disability (SLD)",
"Speech or Language Impairment (SLI)", "Visual Impairment (VI)"
), class = "factor"), Low.Income = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"),
grade = c("3", "3", "4", "3", "3", "4", "3", "3", "4", "3"
), gender = c("F", "F", "2", "F", "F", "2", "F", "F", "2",
"F"), Teacher = c("Keith, Susan", "Keith, Susan", "Lourdes Martin",
"Keith, Susan", "Keith, Susan", "Lourdes Martin", "Keith, Susan",
"Keith, Susan", "Lourdes Martin", "Keith, Susan"), time = structure(c(17113,
17263, 17417, 17113, 17263, 17417, 17113, 17263, 17417, 17113
), class = "Date"), ela.score = c(2424, 2424, NA, 2424, 2424,
NA, 2424, 2424, NA, 2424), School.Year = c("2017", "2017",
"2018", "2017", "2017", "2018", "2017", "2017", "2018", "2017"
), math.score = c(2440, 2440, NA, 2440, 2440, NA, 2440, 2440,
NA, 2440), basc = c(NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
), cohort = c("3", "3", "3", "3", "3", "3", "3", "3", "3",
"3"), attendance = c(96.1, 96.1, 100, 96.1, 96.1, 100, 96.1,
96.1, 100, 96.1), tme4 = structure(c(NA, 17655, 17655, NA,
17655, 17655, NA, 17655, 17655, NA), class = "Date"), t4.minus = c(6.39692965521615,
5.97126183979046, 5.47227067367148, 6.39692965521615, 5.97126183979046,
5.47227067367148, 6.39692965521615, 5.97126183979046, 5.47227067367148,
6.39692965521615), median_grade = c(1536.4, 1536.4, 1372.4,
1192, 1192, 1054, 986.6, 986.6, 871.6, 958.4), mad_grade = c(377.17344,
377.17344, 278.13576, 167.5338, 167.5338, 161.89992, 139.66092,
139.66092, 116.23584, 143.21916), lowerq = c(1323.7, 1323.7,
1226.2, 1102.2, 1102.2, 960.6, 902.9, 902.9, 804, 873.5),
upperq = c(1964.8, 1964.8, 1655.6, 1329.3, 1329.3, 1181.6,
1091.9, 1091.9, 964.2, 1074.1), iqr = c(641.1, 641.1, 429.4,
227.1, 227.1, 221, 189, 189, 160.2, 200.6), grade.threshold.upper = c(3888.1,
3888.1, 2943.8, 2010.6, 2010.6, 1844.6, 1658.9, 1658.9, 1444.8,
1675.9), grade.threshold.lower = c(-599.6, -599.6, -61.9999999999995,
420.9, 420.9, 297.6, 335.9, 335.9, 323.4, 271.7), mad = c(377.17344,
377.17344, 278.13576, 167.5338, 167.5338, 161.89992, 139.66092,
139.66092, 116.23584, 143.21916), z_rw = c(0.350390238376874,
0.0417183791440274, 0.171148318277673, -0.108910138097997,
-0.497500239197831, -0.365723152941879, 0.512731829784946,
-0.588322005081869, -0.0970981769116109, -0.290844134905211
), condition = c("BOXED_Conjunction_12", "BOXED_Conjunction_12",
"BOXED_Conjunction_12", "BOXED_Conjunction_4", "BOXED_Conjunction_4",
"BOXED_Conjunction_4", "BOXED_Feature_12", "BOXED_Feature_12",
"BOXED_Feature_12", "BOXED_Feature_4"), pid.avg_rw = c(2140,
1845.6, 1884.4, 1242.8, 1088.4, 973.6, 1160.4, 887.6, 910.8,
929.2), avg_rw_grade = c(1805.81052631579, 1805.81052631579,
1686.41503416856, 1286.07368421053, 1286.07368421053, 1148.48656036446,
1033.36421052632, 1033.36421052632, 982.933485193622, 1001.18526315789
), sd_grade = c(953.763652869694, 953.763652869694, 1156.80345459324,
397.333847576144, 397.333847576144, 478.193844053012, 247.762635541793,
247.762635541793, 742.892271389251, 247.504606484003)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -10L))