Question

这是我之前提出的Integrating ggplot2 with user-defined stat_function()问题的后续问题，我昨天已经回答了这个问题。我当前的问题是，在下面的可重现的示例中，应该绘制数据值的组件的行。 混合物分布，既不出现在预期的位置，也不出现在预期的形状中，如下图所示（见第二幅图中y = 0处的红线）。

enter image description here

完成可重现的示例：

library(ggplot2)
library(scales)
library(RColorBrewer)
library(mixtools)

NUM_COMPONENTS <- 2

set.seed(12345) # for reproducibility

data(diamonds, package='ggplot2')  # use built-in data
myData <- diamonds$price

# extract 'k' components from mixed distribution 'data'
mix.info <- normalmixEM(myData, k = NUM_COMPONENTS,
                        maxit = 100, epsilon = 0.01)
summary(mix.info)

numComponents <- length(mix.info$sigma)
message("Extracted number of component distributions: ",
        numComponents)

calc.components <- function(x, mix, comp.number) {

  mix$lambda[comp.number] *
    dnorm(x, mean = mix$mu[comp.number], sd = mix$sigma[comp.number])
}

g <- ggplot(data.frame(x = myData)) +
  scale_fill_continuous("Count", low="#56B1F7", high="#132B43") + 
  scale_x_log10("Diamond Price [log10]",
                breaks = trans_breaks("log10", function(x) 10^x),
                labels = prettyNum) +
  scale_y_continuous("Count") +
  geom_histogram(aes(x = myData, fill = 0.01 * ..density..),
                 binwidth = 0.01)
print(g)

# we could select needed number of colors randomly:
#DISTRIB_COLORS <- sample(colors(), numComponents)

# or, better, use a palette with more color differentiation:
DISTRIB_COLORS <- brewer.pal(numComponents, "Set1")

distComps <- lapply(seq(numComponents), function(i)
  stat_function(fun = calc.components,
                arg = list(mix = mix.info, comp.number = i),
                geom = "line", # use alpha=.5 for "polygon"
                size = 1,
                color = "red")) # DISTRIB_COLORS[i]
print(g + distComps)

更新：快速说明我的努力。我还尝试了其他几个选项，包括将绘图的x轴刻度转换为 normal 并请求原始数据值＆＃39;直方图部分中的日志转换，如下所示：geom_histogram(aes(x = log10(data), fill = ..count..), binwidth = 0.01)，但最终结果仍然保持不变。关于我的第一个评论，我意识到只要我使用对..count ..对象的引用，就不需要我提到的转换。

更新2 ：将stat_function()生成的行颜色更改为红色，以澄清问题。

Answer 1

最后，我已经找到了问题，删除了我以前的答案，并且我在下面提供了我的最新解决方案（我唯一没有解决的是组件的图例面板 - 它没有＆t出于某种原因出现，但是对于EDA来证明混合物分布的存在我觉得它已经足够了）。完整的可重现解决方案如下。感谢SO上的所有人直接或间接地帮助了这一点。

library(ggplot2)
library(scales)
library(RColorBrewer)
library(mixtools)

NUM_COMPONENTS <- 2

set.seed(12345) # for reproducibility

data(diamonds, package='ggplot2')  # use built-in data
myData <- diamonds$price


calc.components <- function(x, mix, comp.number) {

  mix$lambda[comp.number] *
    dnorm(x, mean = mix$mu[comp.number], sd = mix$sigma[comp.number])
}


overlayHistDensity <- function(data, calc.comp.fun) {

  # extract 'k' components from mixed distribution 'data'
  mix.info <- normalmixEM(data, k = NUM_COMPONENTS,
                          maxit = 100, epsilon = 0.01)
  summary(mix.info)

  numComponents <- length(mix.info$sigma)
  message("Extracted number of component distributions: ",
          numComponents)

  DISTRIB_COLORS <- 
    suppressWarnings(brewer.pal(NUM_COMPONENTS, "Set1"))

  # create (plot) histogram and ...
  g <- ggplot(as.data.frame(data), aes(x = data)) +
    geom_histogram(aes(y = ..density..),
                   binwidth = 0.01, alpha = 0.5) +
    theme(legend.position = 'top', legend.direction = 'horizontal')

  comp.labels <- lapply(seq(numComponents),
                        function (i) paste("Component", i))

  # ... fitted densities of components
  distComps <- lapply(seq(numComponents), function (i)
    stat_function(fun = calc.comp.fun,
                  args = list(mix = mix.info, comp.number = i),
                  size = 2, color = DISTRIB_COLORS[i]))

  legend <- list(scale_colour_manual(name = "Legend:",
                                     values = DISTRIB_COLORS,
                                     labels = unlist(comp.labels)))

  return (g + distComps + legend)
}

overlayPlot <- overlayHistDensity(log10(myData), 'calc.components')
print(overlayPlot)

<强>结果：

enter image description here

ggplot2对数据和比例的日志转换

1 个答案: