我有一个看起来像这样的数据框:
Data<- data.frame(item1=c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, NA, 5, NA, NA),
item2=c(1, 2, 2, 4, 1, 1, 2, 3, 5, 5, NA, NA, NA, NA),
item3=c(1, 2, 2, 4, 1, 1, 2, 3, 5, 5, NA, NA, NA, NA),
item4=c(1, 2, 2, 4, 1, 1, 4, 3, 1, 5, NA, 3, NA, NA),
item5=c(1, 5, 2, 4, 2, 1, 2, 3, 5, 5, NA, NA, 1, NA))
我已经定义了一个函数,它提取列频率并在没有NA的情况下绘制它
frequencies <- function(x,K=5)
{
p <- length(x) # items
n <- nrow(x) # observations
r <- (5, NA) # values
myf <- function(y) # extract frequencies
{
y <- y[!is.na(y)]
y <- as.factor(y)
aux <- summary(y)
res <- rep(0, r)
res[1:r %in% names(aux)] <- aux
100 * res / sum(res)
}
freqs <- apply(x, 2, FUN = myf) # apply myf by columns
df2 <- expand.grid(vals = 1:r, item = 1:p) # all possible combinations
df2$freq <- as.numeric(freqs) # add frequencies
# graph
plot(df2$item,df2$vals,type="n",xlim=c(1,p),ylim=c(1,r),xaxt = "n",
xlab="", ylab="", ann=FALSE)
axis(1, labels=FALSE)
labs <- paste(names(x)) ##labels=c("v1", "v2", ...)
text(1:p, srt = 60, adj=0.5, pos=1, las=2,
labels = labs, xpd = TRUE, par("usr")[1], cex.main=0.8, offset=1)
points(df2$item,df2$vals,pch=22,col="black", bg="gray", cex=(df2$freq/n)*K)
}
我希望NA被标记为&#34;值&#34; (在y坐标系列中)所以我的情节看起来与那个情节相似(用编辑器编辑,没有R):
提前谢谢你,
Angulo
答案 0 :(得分:2)
另一种可能是您将数据melt
用于长格式,然后使用table
计算exclude = NULL
以计算NA
。如果要使频率与面积成比例而不是正方形的宽度,请检查scale_size_area
。
library(reshape2)
library(ggplot2)
Data2 <- melt(Data)
Data3 <- with(Data2, as.data.frame(table(variable, value, exclude = NULL)))
Data3 <- Data3[!is.na(Data3$variable), ]
ggplot(data = Data3, aes(x = variable, y = value, size = Freq)) +
geom_point(shape = 0)
答案 1 :(得分:1)
尝试这样的事情:
#u Useful packages:
library(plyr)
library(ggplot2)
# Loop over variables getting the counts of each value
counts <- lapply(Data, count)
# Combine the list of counts into a single data frame
all_counts <- do.call(rbind, counts)
# A bit of fixing. Make x into a factor, and get the variable name
all_counts <- within(
all_counts,
{
Value <- factor(x)
Variable <- rep(names(counts), vapply(counts, nrow, integer(1)))
}
)
# Remove NAs (it isn't very clear from the question whether you want NAs or not)
all_counts <- subset(all_counts, !is.na(x))
# Draw the plot. sqrt is to scale area by freq rather than width by freq
(p <- ggplot(all_counts, aes(var, x, size = sqrt(freq))) +
geom_point(shape = 15) # shape 15 is a square. See ?points.
)