将边际分布拟合到R中的直方图的示例

时间:2017-07-26 01:09:11

标签: r ggplot2 histogram distribution bayesian

有人可以告诉我如何将多项式边际分布拟合到我的数据中吗?我做了二项式和beta二项式,但我想看看如何拟合多项式。如果那是你知道怎么做的话,我也会对尝试伽玛感兴趣。

这是我到目前为止所做的。

nodes <- read.table("https://web.stanford.edu/~hastie/CASI_files/DATA/nodes.txt",
           header = T)

nodes %>% 
ggplot(aes(x=x/n))+
  geom_histogram(bins = 30)+
  theme_bw()+
  labs(x = "nodes",
       n = "p=x/n")

# log-likelihood function
ll <- function(alpha, beta) {
x <- nodes$x
total <- nodes$n
-sum(VGAM::dbetabinom.ab(x, total, alpha, beta, log = TRUE))
}

# maximum likelihood estimation
m <- mle(ll, start = list(alpha = 1, beta = 10), method = "L-BFGS-B",
lower = c(0.0001, .1))
ab <- coef(m)
alpha0 <- ab[1]
beta0 <- ab[2]

nodes %>% 
  ggplot() +
  geom_histogram(aes(x/n, y = ..density..), bins= 30) +
  stat_function(fun = function(x) dbeta(x, alpha0, beta0), color = "red",
                size = 1) +
  xlab("p=x/n")

enter image description here

这是另一种契合

ll <- function(a){
  x <- nodes$x
  total <- nodes$n
  -sum(stats::dbinom(x, total, a, log = TRUE))
}

#stats::dbinom()
m <- mle(ll, start = list(a=.5), method = "L-BFGS-B",
lower = c(0.0001, .1))

a = coef(m)

nodes %>%
  ggplot() +
  geom_histogram(aes(x/n, y = ..density..), bins=40) +
  stat_function(fun = function(x) dbeta(x, a, 1), color = "red",
                size = 1) +
  xlab("proportion x/n")

enter image description here

1 个答案:

答案 0 :(得分:1)

用于拟合伽玛分布:

data(iris)
library(MASS) ##for the fitdistr function

fit.params <- fitdistr(iris$Sepal.Length, "gamma", lower = c(0, 0))

ggplot(data = iris) + 
 geom_histogram(data = as.data.frame(x), aes(x=iris$Sepal.Length, y=..density..)) +
 geom_line(aes(x=iris$Sepal.Length, 
 y=dgamma(iris$Sepal.Length,fit.params$estimate["shape"], 
 fit.params$estimate["rate"])), color="red", size = 1) + 
 theme_classic()

您可能还想在汽车包中使用qqp函数查看分位数的分布。以下是一些例子:

library(car)
qqp(iris$Sepal.Length, "norm") ##normal distribution

qqp(iris$Sepal.Length, "lnorm") ##log-normal distribution

gamma <- fitdistr(iris$Sepal.Length, "gamma")
qqp(iris$Sepal.Length, "gamma", shape = gamma$estimate[[1]], 
 rate = gamma$estimate[[2]]) ##gamma distribution

nbinom <- fitdistr(iris$Sepal.Length, "Negative Binomial")
qqp(iris$Sepal.Length, "nbinom", size = nbinom$estimate[[1]], 
 mu = nbinom$estimate[[2]]) ##negative binomial distribution

您可以将fitdistr函数用于ggplots或qqPlots。它支持许多不同的发行版。看看?fitdistr