我正在尝试在直方图的顶部添加概率曲线,但是它似乎不起作用。例如
我想在右侧添加一条概率线,以便可以在左侧显示密度,并在右侧显示发生概率。
我完成的代码
x <- Delays_weather0$dif
h<-hist(x, breaks=10, col="red", xlab="Delays",
main="Flight Delays")
以及我要添加的概率曲线
my <- pnorm(-18:265, mean = mean(Delays_weather0$dif), sd = sd(Delays_weather0$dif), lower.tail = FALSE)
plot(my, type = "l")
我希望这是可以理解的
答案 0 :(得分:1)
我们无权访问Delays_weather0
数据集。因此,我将使用dep_delay
数据包中提供的flights
数据集nycflights13
的前100个观察值。
由于默认情况下R中的直方图会绘制频率,所以我将概率乘以观察数(即1000)以使两个图具有可比性。
我首先使用的是lines
函数。
library(nycflights13)
dataset <- flights$dep_delay[1:1000]
hist(x = dataset,
breaks = 10,
col = "red",
xlab = "Delays",
main = "Flight Delays")
range_dataset <- range(dataset,
na.rm = TRUE)
equidistant_points_in_range <- seq(from = range_dataset[1],
to = range_dataset[2],
length.out = length(x = dataset))
upper_cdf_probabilities <- pnorm(q = equidistant_points_in_range,
mean = mean(x = dataset,
na.rm = TRUE),
sd = sd(x = dataset,
na.rm = TRUE),
lower.tail = FALSE)
lines(x = length(x = dataset) * upper_cdf_probabilities,
col = "blue")
由reprex package(v0.2.1)于2019-03-17创建
使用curve函数的另一种方法。
dataset <- nycflights13::flights$dep_delay[1:1000]
range_dataset <- range(dataset,
na.rm = TRUE)
upper_tail_probability <- function(x)
{
pnorm(q = x,
mean = mean(x = dataset,
na.rm = TRUE),
sd = sd(x = dataset,
na.rm = TRUE),
lower.tail = FALSE)
}
vectorized_upper_tail_probability <- Vectorize(FUN = upper_tail_probability)
hist(x = dataset,
freq = FALSE,
col = "red",
xlab = "Delays",
main = "Flight Delays")
curve(expr = vectorized_upper_tail_probability,
from = range_dataset[1],
to = range_dataset[2],
n = 1000,
add = TRUE,
col = "blue")
由reprex package(v0.2.1)于2019-03-17创建
答案 1 :(得分:0)
在@yarnabrina的可复制示例之后,(1)使用核密度估计器而不是假设正态性,(2)在右侧放置概率轴:
library(nycflights13)
npts <- 1000
dataset <- flights$dep_delay[1:npts]
par(las=1,bty="l", ## cosmetic
mar=c(5,4,2,5)) ## expand R margin to make room for second axis
h0 <- hist(x = dataset,
breaks=100,
col = "red",
xlab = "Delay (min)",
ylab="",
main="",
xlim=c(-50,200)) ## cosmetic: leave out a few extreme values
## put axis label at *top* of axis
mtext(side=2,at=550,"Frequency")
## compute kernel density estimate
dd <- density(na.omit(dataset), adjust=3)
dx <- diff(h0$mids)[1] ## histogram bin width
## scale density to match count vales
lines(dd$x,dd$y*npts*dx,lwd=2,col="blue")
## set up auxiliary axis
dbrks <- seq(0,0.05,by=0.01)
axis(side=4,at=dbrks*npts*dx,label=dbrks)
mtext(side=4,at=550,"Probability") ## axis label