对于正负符号的非常重尾数据,我有时希望在单位区间内看到图中的所有数据而不隐藏结构。
在Python中使用Matplotlib进行绘图时,我可以通过选择symlog scale来实现这一点,arcsinh在某个区间之外使用对数变换,并在其中进行线性绘图。
以前在R中,我通过一次性转换数据lattice来构建类似的行为。但是,刻度标签等操作起来非常棘手(见下文)。
现在,我面临着大量数据,其中ggplot或ggplot uses a package called scales中的子集非常方便。由于子集,我不想使用Matplotlib,但我确实缺少 symlog !
我看到{{3}},它解决了很多这个问题(如果它有效)。自动选择刻度线和标签放置看起来仍然很难做得很好。可能是log_breaks
和cbreaks
的某种组合?
以下代码也不错
sinh.scaled <- function(x,scale=1){ sinh(x)*scale }
asinh.scaled <- function(x,scale=1) { asinh(x/scale) }
asinh_breaks <- function (n = 5, scale = 1, base=10)
{
function(x) {
log_breaks.callable <- log_breaks(n=n,base=base)
rng <- rng <- range(x, na.rm = TRUE)
minx <- floor(rng[1])
maxx <- ceiling(rng[2])
if (maxx == minx)
return(sinh.scaled(minx, scale=scale))
big.vals <- 0
if (minx < (-scale)) {
big.vals = big.vals + 1
}
if (maxx>scale) {
big.vals = big.vals + 1
}
brk <- c()
if (minx < (-scale)) {
rbrk <- log_breaks.callable( c(-min(maxx,-scale), -minx ) )
rbrk <- -rev(rbrk)
brk <- c(brk,rbrk)
}
if ( !(minx>scale | maxx<(-scale)) ) {
rng <- c(max(minx,-scale), min(maxx,scale))
minc <- floor(rng[1])
maxc <- ceiling(rng[2])
by <- floor((maxc - minc)/(n-big.vals)) + 1
cb <- seq(minc, maxc, by = by)
brk <- c(brk,cb)
}
if (maxx>scale) {
brk <- c(brk,log_breaks.callable( c(max(minx,scale), maxx )))
}
brk
}
}
asinh_trans <- function(scale = 1) {
trans <- function(x) asinh.scaled(x, scale)
inv <- function(x) sinh.scaled(x, scale)
trans_new(paste0("asinh-", format(scale)), trans, inv,
asinh_breaks(scale = scale),
domain = c(-Inf, Inf))
}
答案 0 :(得分:7)
基于包scales
的解决方案,灵感来自@Dennis提到的Brian Diggs的帖子:
symlog_trans <- function(base = 10, thr = 1, scale = 1){
trans <- function(x)
ifelse(abs(x) < thr, x, sign(x) *
(thr + scale * suppressWarnings(log(sign(x) * x / thr, base))))
inv <- function(x)
ifelse(abs(x) < thr, x, sign(x) *
base^((sign(x) * x - thr) / scale) * thr)
breaks <- function(x){
sgn <- sign(x[which.max(abs(x))])
if(all(abs(x) < thr))
pretty_breaks()(x)
else if(prod(x) >= 0){
if(min(abs(x)) < thr)
sgn * unique(c(pretty_breaks()(c(min(abs(x)), thr)),
log_breaks(base)(c(max(abs(x)), thr))))
else
sgn * log_breaks(base)(sgn * x)
} else {
if(min(abs(x)) < thr)
unique(c(sgn * log_breaks()(c(max(abs(x)), thr)),
pretty_breaks()(c(sgn * thr, x[which.min(abs(x))]))))
else
unique(c(-log_breaks(base)(c(thr, -x[1])),
pretty_breaks()(c(-thr, thr)),
log_breaks(base)(c(thr, x[2]))))
}
}
trans_new(paste("symlog", thr, base, scale, sep = "-"), trans, inv, breaks)
}
我不确定参数scale
的影响是否与Python相同,但这里有几个比较(参见Python版本here):
data <- data.frame(x = seq(-50, 50, 0.01), y = seq(0, 100, 0.01))
data$y2 <- sin(data$x / 3)
# symlogx
ggplot(data, aes(x, y)) + geom_line() + theme_bw() +
scale_x_continuous(trans = symlog_trans())
# symlogy
ggplot(data, aes(y, x)) + geom_line() + theme_bw()
scale_y_continuous(trans="symlog")
# symlog both, threshold = 0.015 for y
# not too pretty because of too many breaks in short interval
ggplot(data, aes(x, y2)) + geom_line() + theme_bw()
scale_y_continuous(trans=symlog_trans(thr = 0.015)) +
scale_x_continuous(trans = "symlog")
# Again symlog both, threshold = 0.15 for y
ggplot(data, aes(x, y2)) + geom_line() + theme_bw()
scale_y_continuous(trans=symlog_trans(thr = 0.15)) +
scale_x_continuous(trans = "symlog")