z4log <- c(0.0279 , 0.0002, 0.0191, -0.0329,- 0.0002,- 0.0322, 0.0204, 0.0229, -0.0057, 0.0181, 0.0192, 0.0018, -0.0209, 0.0398, -0.0039, 0.0355, 0.0528, 0.0064, 0.0227, -0.0035, 0.0206, 0.0305, -0.0185, -0.0218, -0.0118, -0.0292, 0.0309, 0.0277, 0.0270, 0.0052, 0.0066, -0.0080,- 0.0066, 0.0002, 0.0006,- 0.0339, 0.0432,- 0.0115,- 0.0225,- 0.02,0.0118, 0.0151, 0.0011, 0.0162, 0.0020, 0.0306, -0.0222, -0.0255, 0.0464, 0.0155, 0.0020, -0.0235, 0.0094, 0.0154, -0.0251, 0.0229, 0.0464, 0.0158, -0.0085, 0.0146, -0.0283, 0.0492, 0.0084, 0.0262, -0.0223, 0.0389, 0.0092, -0.0032, -0.0178, -0.0308, -0.0208, -0.0131, -0.0155, 0.0031, 0.0273, -0.0154, 0.0501, -0.0253, 0.0392, 0.0821, -0.0092, -0.0097, 0.0195, 0.0522, -0.0466, -0.0309, -0.0038, 0.0365, 0.0448, 0.0193, 0.0403,- 0.0639, -0.0413, 0.0613, 0.0504, 0.0183, -0.0138, -0.0096, 0.0683, 0.0238, 0.0245, 0.0012, 0.0393, -0.0274, 0.0388, 0.0568, -0.0321, -0.0134, -0.0062, 0.0458, -0.0143, 0.0143, 0.0310, 0.0203, -0.0516 , 0.0572, 0.0470, -0.0403, 0.0649, 0.0285, -0.0215, 0.0766, -0.0650, -0.0302, 0.0705, -0.0459, 0.0265, 0.01193, -0.0050, -0.0827, -0.0016, -0.0312, 0.0088, 0.0264, 0.0478, 0.0337, 0.0638, -0.0058, 0.0530 , 0.01195,- 0.01114, 0.0207, 0.01018,- 0.0585,- 0.0115, 0.0069, 0.0071, 0.0546,- 0.0268, 0.01000,- 0.01081,- 0.0614, 0.01144,- 0.0466,- 0.0380, 0.0732, 0.0479, 0.0548, 0.0742,- 0.0776,- 0.0109, 0.01346, 0.0458, 0.0057, 0.0748,- 0.0523,- 0.0332,- 0.0295, 0.0072, 0.0002, 0.0448, 0.01245,- 0.0756,- 0.0402,- 0.0366, 0.0175,- 0.0313, 0.0153,- 0.0050,- 0.0128,- 0.0433, 0.0282, 0.0754, 0.0250, 0.0710, 0.01604,- 0.0151, 0.0355,- 0.0424, 0.01009,- 0.0858, 0.0531,- 0.0193,- 0.0364,- 0.0016,- 0.0613, 0.01312, 0.0920, 0.0150, 0.0327, 0.0802,- 0.0148, 0.0853, 0.0323, 0.0135, 0.01106, 0.0452,- 0.0565, 0.0118, 0.0919,-0.01297, 0.0996,0.0834)
# Generating Multiple Distributions
library(wmtsa)
wavelet<- wavelet<- c("s14","d14","c14")
# Selection of n.levels = what range of frequencies I am interested at j0 (RANGE FROM TO)
nlevel<- seq(1: as.integer (floor (logb ((length(z4log)),base=2))))
schrinkfun <- c("soft","hard","mid")
threshfun <- c("universal", "adaptive", "minimax")
threshscale<- c(0.55,0.65,0.75,0.85,0.95)
# Generating Benchmark Distribution
x <- as.vector((z4log))
w2<- expand.grid(wavelet=wavelet,nlevel=nlevel,schrinkfun= schrinkfun,threshfun= threshfun,threshscale= threshscale, stringsAsFactors=FALSE)
result <- mapply(function(m,k,p,u,l,x) (wavShrink(x, wavelet= m, n.level =k, shrink.fun = p, thresh.fun =u, threshold=NULL, thresh.scale = l, xform="modwt", noise.variance=-1, reflect=TRUE)), w2$wavelet, w2$nlevel , w2$schrinkfun, w2$threshfun, w2$threshscale ,MoreArgs=list(x= (x)))
# Plot Comparison Distributions
Dist <- density(z4log)
plot(Dist, type="l", xlab="x", ylab="y", col="red", lwd=3, ylim=c(0,2* max(density(z4log)$y)), xlim=c(min(density(z4log)$x),-min(density (z4log)$x))) # pdf
j <- seq(1:(length(result)/length(z4log)))
fun <- function (x) lines(density(result[,x]))
lapply(j,fun)
lines(Dist, type="l", xlab="x", ylab="y", col="red", lwd=3, ylim=c(0,2* max(density(z4log)$y)), xlim=c(min(density(z4log)$x),-min(density (z4log)$x))) # pdf
从图中可以看出,对红色分布绘制了40,000个分布。有些是在分布范围内,有些则超出了红色分布限制。
我的目标是找出40,000个分布在主要分布中的分布。为此,我使用了重叠函数,但是你会看到这个函数告诉我,如果缺乏拟合在我的目标分布范围内,那就不合适。
# simulate two samples
AA <- z4log
b <- function(x) result[,x]
f <- seq(1:dim(result)[[2]])
BB <- lapply(f,b)
# same dataframe before rescaling. You'll need to load the ‘scales‘ library. But first add a "Source" column to be able to distinguish between a and b after they are combined.
a = data.frame(value = AA, Source = "a" )
fun <- function(x) data.frame(value = BB[[x]], Source = "b")
b = lapply(f,fun)
d = function(x) rbind(a, b[[x]])
d = lapply(f,d)
library(scales)
fun <- function (x) rescale(d[[x]]$value, to = c(0,2*pi))
d$value <- lapply(f,fun)
# Now you can create the rescaled a and b vectors.
A <- function(x) d[[x]][d[[x]]$Source == "a", 1]
A <-lapply(f,A)
B <- function(x) d[[x]][d[[x]]$Source == "b", 1]
B <-lapply(f,B)
# You can then calculate the area of overlap as you did previously.
lower <- function (x) min(d[[x]]$value)-1
lower <- lapply(f,lower)
upper <- function (x) max(d[[x]]$value)+1
upper <- lapply(f,upper)
# generate kernel densities
da <- function(x) density(A[[x]], from=lower[[x]], to=upper[[x]], adjust = 1)
da <- lapply(f,da)
db <- function (x) density(B[[x]], from=lower[[x]], to=upper[[x]],adjust = 1)
db <- lapply(f,db)
# Compute overlap coefficient
fun <- function(x) overlapTrue (da[[x]][[2]],db[[x]][[2]])
overlap <-lapply(f,fun)
因此,如果我的目标是找出哪些重叠分布(你得到合适的地方)或缺乏契合,那将是什么呢?因为它属于分布基准。