我正在尝试按比例计算数据并将其显示在轴的刻度上。
我最接近的解决方案如下:
aes(x=(paste(A_REF,"(n=", length(A_REF), ")"))
n是显示数据字段中存在多少个因子的数字。
编辑:如何实现V43的第一和第五因素显示? - >忘了图书馆(“外国人”)
# Load libraries & packages =================================
library("ggplot2")
library("scales")
library("dplyr")
library("foreign")
# Data setup =================================
spss_file_path <- "D:\\Programming\\Testing\\2017-03-15_data_import&ggplot2\\Beispieldatensatz(fiktiv).sav"
exampledata <- read.spss(spss_file_path, use.value.labels = TRUE,
to.data.frame = TRUE, reencode = TRUE)
names(exampledata) <- c(V101, A_REF, V43)
exampledata$V43 <- factor(exampledata$V43,
levels = c(1,2,3,4,5),
labels = c("1 Sehr zufrieden","2","3","4", "5 Sehr unzufrieden"))
exampledata$V43 <- factor(exampledata$V43, levels = rev(unique(levels(exampledata$V43))))
exampledata$A_REF <- factor(exampledata$A_REF, levels = rev(unique(levels(exampledata$A_REF))))
exampledata$V101 <- factor(exampledata$V101, levels = rev(unique(levels(exampledata$V101))))
labels <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
count(A_REF) %>%
mutate(labels = paste(A_REF,"(n=", n, ")")) %>%
select(A_REF, labels)
plot_data <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
left_join(labels, by = "A_REF")
# Plot =================================
ggplot(plot_data, aes(x = labels, fill = V43)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
labs(y=NULL, x=NULL, fill=NULL) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +
coord_flip()
structure(list(exampledata.V101 = structure(c(2L, NA, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, NA, 2L, 2L, 2L, 1L, 2L, NA,
NA, NA, 1L, 1L, 2L, NA, 2L, 2L, 2L, NA, 2L, 2L, NA, NA, 1L, NA,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, NA, NA, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, NA, 1L, NA, 1L, NA,
1L, 2L, NA, NA, 2L, NA, 1L, 2L, 2L, NA, 2L, NA, 2L, 2L, 1L, 2L,
1L, 2L, 1L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, NA, 2L, 1L, 2L, 2L
), .Label = c("Weiblich", "Männlich"), class = "factor"), exampledata.A_REF = structure(c(18L,
18L, 18L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L, 18L, 16L, 18L,
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
16L, 18L, 18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 16L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 17L, 18L, 18L,
16L, 18L, 16L, 18L, 18L, 16L, 16L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 16L, 18L,
16L, 16L, 18L, 18L, 18L, 17L, 16L, 18L), .Label = c("Zertifikat eines Aufbau- oder Ergänzungsstudiums",
"LA Berufliche Schulen", "LA Sonderschule", "LA Gymnasium", "LA Haupt- und Realschule",
"LA Grundschule", "Künstlerischer/musischer Abschluss", "Kirchlicher Abschluss",
"Staatsexamen (ohne Lehramt)", "Diplom Fachhochschule, Diplom I an Gesamthochschulen",
"Diplom Universität, Diplom II an Gesamthochschulen", "Sonstiges",
"Promotion", "Staatsexamen", "Magister", "Diplom", "Master",
"Bachelor"), class = "factor"), exampledata.V43 = structure(c(3L,
5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 3L, 2L, NA, 4L, 5L, 5L,
4L, 4L, 4L, 4L, NA, 2L, 4L, 3L, 5L, 4L, 4L, 4L, NA, 4L, 4L, NA,
NA, 3L, 5L, 2L, 4L, 5L, 4L, 4L, 5L, 5L, 4L, NA, NA, 4L, NA, 3L,
4L, 5L, 5L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 5L, 4L, 5L, NA, 4L,
NA, 4L, NA, 4L, 5L, 4L, NA, 5L, NA, 4L, 4L, 4L, NA, 4L, NA, 5L,
4L, 4L, 4L, 4L, 4L, 3L, 3L, 4L, 2L, 4L, 4L, 4L, 3L, 4L, NA, 4L,
5L, 5L, 4L), .Label = c("5 Sehr unzufrieden", "4", "3", "2",
"1 Sehr zufrieden"), class = "factor")), .Names = c("exampledata.V101",
"exampledata.A_REF", "exampledata.V43"), row.names = c(NA, 100L
), class = "data.frame")
答案 0 :(得分:1)
我认为最简单的方法是计算ggplot之外的标签。 请注意,对于您的数据,V43的第5级不会显示。
library(ggplot2)
library(dplyr)
names(exampledata) <- c("V101", "A_REF", "V43")
我计算A_REF然后应用你的公式来计算标签。
labels <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
count(A_REF) %>%
mutate(labels = paste(A_REF,"(n=", n, ")")) %>%
select(A_REF, labels)
然后我将标签加入数据
plot_data <- exampledata %>%
filter(!is.na(V101), !is.na(V43)) %>%
left_join(labels, by = "A_REF")
最后,这是情节。请注意,标题也不会显示。
ggplot(plot_data, aes(x = labels, fill = V43)) +
geom_bar(position = "fill") +
scale_y_continuous(labels = scales::percent, breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1)) +
labs(y=NULL, x=NULL, fill=NULL) +
ggtitle(paste(attr(exampledata, "variable.labels")[77])) +
theme_classic() +
geom_text(stat="count",aes(label = scales::percent((..count..)/sum(..count..))), position = position_fill(vjust=0.5)) +
coord_flip()