Question

我有问题绘制混合分布的四分位数，并进一步计算Cp＆amp; CPK。

我的数据：

> dput(hist)
structure(list(index = c(1, 10, 11, 12, 128044, 128045, 128046, 
128047, 128048, 128049, 128050, 128051, 128052, 128053, 128054, 
128055, 128056, 128057, 128058, 128059, 128060, 128061, 128062, 
128063, 128064, 128065, 128066, 128067, 128068, 128069, 128070, 
128071, 128072, 128073, 128074, 128075, 128076, 128077, 128078, 
128079, 128080, 128081, 128082, 13, 14, 15, 150780, 150781, 150782, 
150783, 150784, 150785, 150786, 150787, 150788, 150789, 150790, 
150791, 150792, 150793, 150794, 150795, 150796, 150797, 150798, 
150799, 150800, 16, 163525, 163526, 163527, 163528, 163529, 163530, 
163531, 163532, 163533, 163534, 163535, 163536, 163537, 163538, 
163539, 163540, 163541, 163542, 163543, 163544, 163545, 163546, 
163547, 163548, 163549, 163550, 163551, 163552, 17), Rundheit = c(0.24, 
0.25, 0.23, 0.24, 0.23, 0.24, 0.22, 0.24, 0.21, 0.22, 0.23, 0.24, 
0.22, 0.24, 0.27, 0.23, 0.26, 0.27, 0.35, 0.27, 0.27, 0.27, 0.27, 
0.27, 0.28, 0.32, 0.31, 0.3, 0.29, 0.28, 0.28, 0.27, 0.28, 0.27, 
0.28, 0.28, 0.29, 0.29, 0.28, 0.28, 0.27, 0.26, 0.27, 0.23, 0.26, 
0.24, 0.17, 0.52, 0.18, 0.19, 0.17, 0.18, 0.18, 0.18, 0.18, 0.2, 
0.17, 0.17, 0.18, 0.18, 0.18, 0.18, 0.18, 0.2, 0.19, 0.18, 0.18, 
0.25, 0.23, 0.23, 0.22, 0.23, 0.23, 0.23, 0.22, 0.23, 0.2, 0.21, 
0.21, 0.22, 0.23, 0.23, 0.23, 0.23, 0.22, 0.22, 0.23, 0.22, 0.22, 
0.22, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.24)), .Names = c("index", 
"Rundheit"), row.names = c(17L, 45L, 311125L, 622233L, 872553L, 
872581L, 872609L, 872637L, 872665L, 872693L, 872749L, 872777L, 
872805L, 872833L, 872861L, 872889L, 872917L, 872945L, 872973L, 
873001L, 873057L, 873085L, 873113L, 873141L, 873169L, 873197L, 
873225L, 873253L, 873281L, 873309L, 873365L, 873393L, 873421L, 
873449L, 873477L, 873505L, 873533L, 873561L, 873589L, 873617L, 
873673L, 873701L, 873729L, 933341L, 1244449L, 1555557L, 1579889L, 
1579917L, 1579945L, 1579973L, 1580001L, 1580029L, 1580057L, 1580085L, 
1580113L, 1580141L, 1580197L, 1580225L, 1580253L, 1580281L, 1580309L, 
1580337L, 1580365L, 1580393L, 1580421L, 1580449L, 1580533L, 1866665L, 
1976397L, 1976425L, 1976453L, 1976481L, 1976509L, 1976565L, 1976593L, 
1976621L, 1976649L, 1976677L, 1976705L, 1976733L, 1976761L, 1976789L, 
1976817L, 1976873L, 1976901L, 1976929L, 1976957L, 1976985L, 1977013L, 
1977041L, 1977069L, 1977097L, 1977125L, 1977181L, 1977209L, 1977237L, 
2177773L), na.action = structure(98:100, .Names = c("2412637", 
"2412665", "2412721"), class = "omit"), class = "data.frame")

我很容易绘制ggplot，密度看起来相当不错，但是四分位数（+/- 2s和+/- 3s）不正确。我的情节：

vec <- quantile(hist$Rundheit, na.rm = TRUE)

 ggplot(data=hist, aes(Rundheit)) +
  geom_bar(aes( y=..count..), stat="bin",position="dodge", fill="gray40", colour="white") + 
  stat_density(color="red", geom="line", size=1, position="identity") +
  geom_vline(xintercept=vec, linetype=2, colour="blue", size=1) + #Tolerance/Limits
  geom_vline(aes(xintercept=0.55), size = 1, color="red") + #Tolerance/Limits
  geom_vline(aes(xintercept=0), size = 1, color="red")

此外，我尝试使用SixSigma包计算Cp和Cpk：

library(SixSigma)

cp<- ss.ca.cp(hist$Rundheit, 0,0.55)

cp
[1] 1.922963

cpk <- ss.ca.cpk(hist$Rundheit, 0,0.55) 

cpk
[1] 1.658759

然而，SixSigma计算的cp和cpka的数量与我使用其他程序收到的数字不匹配，而 cp = 2.35 且 cpk = 2.11

仅供参考，我没有太多的统计背景

感谢tipps！

Answer 1

这样的事情怎么样？这是你的追求？说实话，我真的不知道cp，cpk，LSL和USL是什么。

（我将hist重命名为dat，因为hist是一种非常常用的功能。）

m <- mean(dat$Rundheit)
s <- sd(dat$Rundheit)
vec <- data.frame(val = c(m, m - 3*s, m + 3*s, m - 5*s, m + 5*s),
                  sigma = factor(c('mean', '3s', '3s', '5s', '5s'), c('mean', '3s', '5s')))

library(ggplot2)
ggplot(data=dat, aes(Rundheit)) +
  geom_bar(aes( y=..count..), stat="bin",position="dodge", fill="gray40", 
           colour="white") + 
  stat_density(color="red", geom="line", size=1, position="identity") +
  geom_vline(data = vec, aes(xintercept = val, lty = sigma), 
             colour = "blue", size = 1)

R：具有规格限制的非正态分布 - ＆gt;四分位数CP / Cpk值

1 个答案: