我有问题绘制混合分布的四分位数,并进一步计算Cp& CPK。
我的数据:
> dput(hist)
structure(list(index = c(1, 10, 11, 12, 128044, 128045, 128046,
128047, 128048, 128049, 128050, 128051, 128052, 128053, 128054,
128055, 128056, 128057, 128058, 128059, 128060, 128061, 128062,
128063, 128064, 128065, 128066, 128067, 128068, 128069, 128070,
128071, 128072, 128073, 128074, 128075, 128076, 128077, 128078,
128079, 128080, 128081, 128082, 13, 14, 15, 150780, 150781, 150782,
150783, 150784, 150785, 150786, 150787, 150788, 150789, 150790,
150791, 150792, 150793, 150794, 150795, 150796, 150797, 150798,
150799, 150800, 16, 163525, 163526, 163527, 163528, 163529, 163530,
163531, 163532, 163533, 163534, 163535, 163536, 163537, 163538,
163539, 163540, 163541, 163542, 163543, 163544, 163545, 163546,
163547, 163548, 163549, 163550, 163551, 163552, 17), Rundheit = c(0.24,
0.25, 0.23, 0.24, 0.23, 0.24, 0.22, 0.24, 0.21, 0.22, 0.23, 0.24,
0.22, 0.24, 0.27, 0.23, 0.26, 0.27, 0.35, 0.27, 0.27, 0.27, 0.27,
0.27, 0.28, 0.32, 0.31, 0.3, 0.29, 0.28, 0.28, 0.27, 0.28, 0.27,
0.28, 0.28, 0.29, 0.29, 0.28, 0.28, 0.27, 0.26, 0.27, 0.23, 0.26,
0.24, 0.17, 0.52, 0.18, 0.19, 0.17, 0.18, 0.18, 0.18, 0.18, 0.2,
0.17, 0.17, 0.18, 0.18, 0.18, 0.18, 0.18, 0.2, 0.19, 0.18, 0.18,
0.25, 0.23, 0.23, 0.22, 0.23, 0.23, 0.23, 0.22, 0.23, 0.2, 0.21,
0.21, 0.22, 0.23, 0.23, 0.23, 0.23, 0.22, 0.22, 0.23, 0.22, 0.22,
0.22, 0.23, 0.23, 0.23, 0.23, 0.23, 0.23, 0.24)), .Names = c("index",
"Rundheit"), row.names = c(17L, 45L, 311125L, 622233L, 872553L,
872581L, 872609L, 872637L, 872665L, 872693L, 872749L, 872777L,
872805L, 872833L, 872861L, 872889L, 872917L, 872945L, 872973L,
873001L, 873057L, 873085L, 873113L, 873141L, 873169L, 873197L,
873225L, 873253L, 873281L, 873309L, 873365L, 873393L, 873421L,
873449L, 873477L, 873505L, 873533L, 873561L, 873589L, 873617L,
873673L, 873701L, 873729L, 933341L, 1244449L, 1555557L, 1579889L,
1579917L, 1579945L, 1579973L, 1580001L, 1580029L, 1580057L, 1580085L,
1580113L, 1580141L, 1580197L, 1580225L, 1580253L, 1580281L, 1580309L,
1580337L, 1580365L, 1580393L, 1580421L, 1580449L, 1580533L, 1866665L,
1976397L, 1976425L, 1976453L, 1976481L, 1976509L, 1976565L, 1976593L,
1976621L, 1976649L, 1976677L, 1976705L, 1976733L, 1976761L, 1976789L,
1976817L, 1976873L, 1976901L, 1976929L, 1976957L, 1976985L, 1977013L,
1977041L, 1977069L, 1977097L, 1977125L, 1977181L, 1977209L, 1977237L,
2177773L), na.action = structure(98:100, .Names = c("2412637",
"2412665", "2412721"), class = "omit"), class = "data.frame")
我很容易绘制ggplot,密度看起来相当不错,但是四分位数(+/- 2s和+/- 3s)不正确。 我的情节:
vec <- quantile(hist$Rundheit, na.rm = TRUE)
ggplot(data=hist, aes(Rundheit)) +
geom_bar(aes( y=..count..), stat="bin",position="dodge", fill="gray40", colour="white") +
stat_density(color="red", geom="line", size=1, position="identity") +
geom_vline(xintercept=vec, linetype=2, colour="blue", size=1) + #Tolerance/Limits
geom_vline(aes(xintercept=0.55), size = 1, color="red") + #Tolerance/Limits
geom_vline(aes(xintercept=0), size = 1, color="red")
此外,我尝试使用SixSigma
包计算Cp和Cpk:
library(SixSigma)
cp<- ss.ca.cp(hist$Rundheit, 0,0.55)
cp
[1] 1.922963
cpk <- ss.ca.cpk(hist$Rundheit, 0,0.55)
cpk
[1] 1.658759
然而,SixSigma计算的cp和cpka的数量与我使用其他程序收到的数字不匹配,而 cp = 2.35 且 cpk = 2.11
仅供参考,我没有太多的统计背景
感谢tipps!
答案 0 :(得分:0)
这样的事情怎么样?这是你的追求?说实话,我真的不知道cp,cpk,LSL和USL是什么。
(我将hist
重命名为dat
,因为hist
是一种非常常用的功能。)
m <- mean(dat$Rundheit)
s <- sd(dat$Rundheit)
vec <- data.frame(val = c(m, m - 3*s, m + 3*s, m - 5*s, m + 5*s),
sigma = factor(c('mean', '3s', '3s', '5s', '5s'), c('mean', '3s', '5s')))
library(ggplot2)
ggplot(data=dat, aes(Rundheit)) +
geom_bar(aes( y=..count..), stat="bin",position="dodge", fill="gray40",
colour="white") +
stat_density(color="red", geom="line", size=1, position="identity") +
geom_vline(data = vec, aes(xintercept = val, lty = sigma),
colour = "blue", size = 1)