遵循整洁的数据标准,我的数据采用长格式,带有键和值列。某些键的值是数字,其他键是字符,因此R将整个列设置为字符类。
当我使用filter()
仅将数值数据传输到ggplot(带有键'a'的数据是数字),然后在值定义上使用as.numeric()
时,它无法正确转换 - I看看只是获取序列号而不是值。我究竟做错了什么?
filter(data, measure == "a") %>%
ggplot(aes(x = as.numeric(value),
x = as.factor(subject_instance),
color = as.factor(subject_instance))) +
geom_boxplot()
以长格式处理混合类的最佳方法是什么(即它不是我正在做的方式)。
OR
如何让ggplot正确转换为数字?
可行的示例(较大集合中的40行样本):
mydata <- structure(list(ResponseID = c("R_40LUIW7O8Lnj7Cd", "R_aXo4IXJ2eRTyThr",
"R_9sHFiKGtn4ZhNiJ", "R_0BMN3JynUPiB0dn", "R_9mqmDcAKzae6ko5",
"R_4T7qN9appsgbnxj", "R_5BeXW1ygKISxISV", "R_3JJY4UGvbzzDYTX",
"R_0AN81Cdgz7ncPDD", "R_aXo4IXJ2eRTyThr", "R_40LUIW7O8Lnj7Cd",
"R_8BOtUltxr8O6AeN", "R_40LUIW7O8Lnj7Cd", "R_1KUj25KpGbKOaGh",
"R_5BeXW1ygKISxISV", "R_0AN81Cdgz7ncPDD", "R_aXo4IXJ2eRTyThr",
"R_aXo4IXJ2eRTyThr", "R_0N8LUMfEP12P4Wh", "R_0wuddsG9KJhHkRn",
"R_1R3kGCm3vPWi4dL", "R_50W5K8wp8m1yOZ7", "R_0wuddsG9KJhHkRn",
"R_ctKujSc0Zr5fldz", "R_4SDzTFmolPaB8wt", "R_0Ng4gEnCnkCTuoB",
"R_0Ng4gEnCnkCTuoB", "R_eb5LkAh0nBVqc9n", "R_0vqNorszrDGN6MB",
"R_40LUIW7O8Lnj7Cd", "R_6s1Q2hFaqRLMKKF", "R_8BOtUltxr8O6AeN",
"R_4SDzTFmolPaB8wt", "R_3JJY4UGvbzzDYTX", "R_3JJY4UGvbzzDYTX",
"R_77mJUnh0OPtvCEl", "R_bxtLgQnlf4iaCWx", "R_6s1Q2hFaqRLMKKF",
"R_7X8L8LwKo6UdWgR", "R_9mqmDcAKzae6ko5"), ID = c("R_3I0G7xzqlA4lUmm",
"R_12m5J3hXrv8ObMa", "R_3PmEIrRgCUr0X3L", "R_YQuCAn43cgRMHy9",
"R_51GdFWDnxQ7zvpv", "R_x9g4FVQzeqAJG8h", "R_QmDHvIxNJUypJip",
"R_2cuyzZ8C4khOGs8", "R_3fUUNvffCN7GUrn", "R_12m5J3hXrv8ObMa",
"R_3I0G7xzqlA4lUmm", "R_xgbhYoALaqQ9TDX", "R_3I0G7xzqlA4lUmm",
"R_28I21bSyxgRcyGo", "R_QmDHvIxNJUypJip", "R_3fUUNvffCN7GUrn",
"R_12m5J3hXrv8ObMa", "R_12m5J3hXrv8ObMa", "R_9L8RxssmQOGrPAR",
"R_3iExjba1az5mpLw", "R_2wodtnGyQkaGTbX", "R_dnln2Bzdjahd3ax",
"R_3iExjba1az5mpLw", "R_29gE0fK7dB6HENJ", "R_2E0mBlZmT618zQp",
"R_3EVZt1ncuzTbVRr", "R_3EVZt1ncuzTbVRr", "R_2anUpVhXXReyZAX",
"R_1dz55WFaXZ3Lm3Y", "R_3I0G7xzqlA4lUmm", "R_vUJsBPPRxV9J6CJ",
"R_xgbhYoALaqQ9TDX", "R_2E0mBlZmT618zQp", "R_2cuyzZ8C4khOGs8",
"R_2cuyzZ8C4khOGs8", "R_3LYcR4i5YB2k0N0", "R_yL9qi0TMXHfuJK9",
"R_vUJsBPPRxV9J6CJ", "R_1DqckuFAYHkKjDg", "R_51GdFWDnxQ7zvpv"
), icon = c(".rprt", ".mddm", ".cnsl", "ord.cnsl", "sgn.alrt",
"ent.advr", "flg.lab2", "ord.lab2", ".mddm", ".mds2", "rmv.prb2",
"sch.imgn", "edt.not2", "edt.prb4", "ord.lab", "grp.lab", "src.note",
"sgn.alrt", "sgn.imgn", "sch.lab", "sch.lab", ".note", "viw.imgn",
"flg.lab2", ".mddm", "ent.prbl", "ent.vtls", "ord.med", ".hstr",
"rnw.alrt", "ent.vtls", "viw.vtls", "sch.lab2", "edt.note", "rnw.med",
"ord.prcd", "rmv.prbl", "crt.grph", "edt.prb3", "ent.prb2"),
measure = c("firstclick", "lastclick", "subject", "clickcount",
"pagesubmit", "firstclick", "subject", "lastclick", "subject",
"pagesubmit", "firstclick", "subject", "clickcount", "lastclick",
"clickcount", "firstclick", "pagesubmit", "pagesubmit", "pagesubmit",
"lastclick", "action", "clickcount", "firstclick", "clickcount",
"subject", "clickcount", "firstclick", "lastclick", "subject",
"pagesubmit", "action", "lastclick", "lastclick", "pagesubmit",
"clickcount", "firstclick", "firstclick", "action", "pagesubmit",
"subject"), value = c("2.602", "4.849", "Consult(s)", "6",
"180", "1.456", "Lab / Imaging / Diagnostic", "70.335", "Medication(s)",
"180", "1.133", "Lab / Imaging / Diagnostic", "4", "3.938",
"4", "3.003", "180", "180", "180", "20.519", "Schedule",
"4", "4.758", "4", "Medication(s)", "4", "1.706", "8.582",
"Patient history", "11.599", "Enter", "9.098", "11.897",
"180", "4", "1.728", "2.423", "Search", "180", "Problem(s)"
), file = structure(c(60L, 37L, 4L, 41L, 67L, 17L, 25L, 44L,
37L, 39L, 57L, 63L, 11L, 15L, 43L, 29L, 66L, 67L, 68L, 64L,
64L, 40L, 78L, 25L, 37L, 20L, 22L, 45L, 33L, 58L, 22L, 87L,
65L, 10L, 59L, 47L, 56L, 7L, 14L, 21L), .Label = c("alert.png",
"allergies.png", "check-order.png", "consult.png", "copy-graph.png",
"create-encounter.png", "create-graph.png", "create-note.png",
"create-report.png", "edit-note.png", "edit-note2.png", "edit-problem.png",
"edit-problem2.png", "edit-problem3.png", "edit-problem4.png",
"encounter.png", "enter-adverse.png", "enter-med.png", "enter-medadmin.png",
"enter-problem.png", "enter-problem2.png", "enter-vitals.png",
"flag-imaging.png", "flag-lab.png", "flag-lab2.png", "flag-order.png",
"followup.png", "forward-alert.png", "graph-lab.png", "graph-lab2.png",
"graph-vitals.png", "graph.png", "history.png", "imaging.png",
"lab1.png", "lab2.png", "medadmin.png", "meds1.png", "meds2.png",
"note.png", "order-consult.png", "order-imaging.png", "order-lab.png",
"order-lab2.png", "order-med.png", "order-med2.png", "order-procedure.png",
"order-procedure2.png", "order.png", "problem1.png", "problem2.png",
"procedure1.png", "procedure2.png", "refill-med.png", "refill-med2.png",
"remove-problem.png", "remove-problem2.png", "renew-alert.png",
"renew-med.png", "report.png", "schedule-consult.png", "schedule-followup.png",
"schedule-imaging.png", "schedule-lab.png", "schedule-lab2.png",
"search-note.png", "sign-alert.png", "sign-imaging.png",
"sign-lab.png", "sign-lab2.png", "sign-note.png", "sign-order.png",
"sign-report.png", "sort-alert.png", "sort-vitals.png", "view-adverse.png",
"view-history.png", "view-imaging.png", "view-lab.png", "view-lab2.png",
"view-med.png", "view-note.png", "view-order.png", "view-problem.png",
"view-problem2.png", "view-report.png", "view-vitals.png",
"vitals.png"), class = "factor"), icon_action = c("", "",
"", "order", "sign", "enter", "flag", "order", "", "", "remove",
"schedule", "edit", "edit", "order", "graph", "search", "sign",
"sign", "schedule", "schedule", "", "view", "flag", "", "enter",
"enter", "order", "", "renew", "enter", "view", "schedule",
"edit", "renew", "order", "remove", "create", "edit", "enter"
), icon_subject = c("report", "medadmin", "consult", "consult",
"alert", "adverse", "lab2", "lab2", "medadmin", "meds2",
"problem2", "imaging", "note2", "problem4", "lab", "lab",
"note", "alert", "imaging", "lab", "lab", "note", "imaging",
"lab2", "medadmin", "problem", "vitals", "med", "history",
"alert", "vitals", "vitals", "lab2", "note", "med", "procedure",
"problem", "graph", "problem3", "problem2"), instance = structure(c(2L,
8L, 7L, 26L, 80L, 49L, 78L, 24L, 11L, 7L, 83L, 24L, 77L,
43L, 26L, 67L, 73L, 38L, 31L, 74L, 27L, 12L, 26L, 87L, 15L,
31L, 53L, 42L, 2L, 53L, 88L, 57L, 47L, 62L, 54L, 37L, 40L,
78L, 32L, 33L), .Label = c("1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30", "31", "32", "33", "34", "35", "36",
"37", "38", "39", "40", "41", "42", "43", "44", "45", "46",
"47", "48", "49", "50", "51", "52", "53", "54", "55", "56",
"57", "58", "59", "60", "61", "62", "63", "64", "65", "66",
"67", "68", "69", "70", "71", "72", "73", "74", "75", "76",
"77", "78", "79", "80", "81", "82", "83", "84", "85", "86",
"87", "88"), class = "factor"), action_instance = c(NA, NA,
NA, 2L, 7L, 3L, 4L, 1L, NA, NA, 2L, 1L, 5L, 2L, 2L, 2L, 1L,
6L, 2L, 4L, 1L, NA, 2L, 4L, NA, 2L, 3L, 4L, NA, 1L, 6L, 9L,
1L, 5L, 2L, 2L, 1L, 3L, 2L, 1L), subject_instance = c(1L,
1L, 1L, 2L, 4L, 2L, 7L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 6L,
5L, 3L, 2L, 3L, 2L, 1L, 2L, 7L, 1L, 2L, 4L, 2L, 1L, 5L, 5L,
3L, 4L, 4L, 3L, 1L, 3L, 3L, 1L, 2L)), .Names = c("ResponseID",
"ID", "icon", "measure", "value", "file", "icon_action", "icon_subject",
"instance", "action_instance", "subject_instance"), class = c("tbl_df",
"data.frame"), row.names = c(NA, -40L))
filter(mydata, measure=="pagesubmit") %>% ggplot(aes(y=as.numeric(value), x=as.factor(subject_instance), color=as.factor(subject_instance))) + geom_boxplot()
此外,在半相关的说明中,为什么这不起作用?:
filter(icon, measure=="pagesubmit") %>% mean(value)
答案 0 :(得分:2)
如@Spacedman所述,您的示例不具有可重复性/一致性。
第1部分
这有效:
filter(mydata, measure == "pagesubmit") %>%
ggplot(aes(
y = as.numeric(value),
x = as.factor(subject_instance),
color = as.factor(subject_instance))) +
geom_boxplot()
第2部分
如果您希望所有measure
的平均值等于pagesubmit
,则可以执行以下操作:
filter(mydata, measure == "pagesubmit") %>%
summarise(mean = mean(as.numeric(value)))
您的初始尝试无效,因为您试图从整个数据框中取出mean()
。实际上,当您执行... %>% mean(value)
时,会收到以下错误消息:
# Warning message:
# In mean.default(., value) : argument is not numeric or logical: returning NA
其中.
是左侧参数(mydata
已过滤以仅保留measure
等于pagesubmit
)%>%
管道前进至{{1} }}。遵循相同的逻辑,你应该做:
mean()