我在O'Reilly演示文稿(http://cdn.oreillystatic.com/en/assets/1/event/85/Case%20Study_%20What_s%20a%20Customer%20Worth_%20Presentation.pdf)
中找到了下图
是否可以在R中使用基本图形,格子或ggplot2重新创建此图?
以下是一些玩具数据:
set.seed(123)
number_of_observations <- 20
number_of_customers <- 5
day_start <- "2013/01/01"
day_end <- "2013/12/31"
d <- data.frame(id=letters[sample(number_of_customers, size=number_of_observations, replace=TRUE)],
date=sample(seq.Date(as.Date(day_start),as.Date(day_end),by="day"), size=number_of_observations, replace=TRUE),
amount=sample(1000, size=number_of_observations, replace=TRUE))
d_sorted <- d[order(d$id, d$date),]
d_sorted
# id date amount
# a 2013-01-09 561
# a 2013-03-20 754
# a 2013-09-16 139
# b 2013-04-27 896
# b 2013-10-04 128
# b 2013-11-21 143
# c 2013-02-23 858
# c 2013-04-16 266
# c 2013-07-18 234
# c 2013-08-22 414
# c 2013-10-18 122
# c 2013-11-26 443
# d 2013-09-10 415
# d 2013-09-10 799
# e 2013-03-26 375
# e 2013-06-24 207
# e 2013-08-05 466
# e 2013-08-28 153
# e 2013-12-18 46
# e 2013-12-29 369
感谢您的帮助。
答案 0 :(得分:2)
我认为跟随尽可能接近。
data.table
它可以更简单D <- data.table(d)
MaxDate <- D[, max(date)] + 10
MinDate <- D[, min(date)]
D2 <- D[, list( date, firstdate = min(date)), by = id]
D2[, plot(date, id, type='n', bty='n',axes=FALSE, xlab = "", ylab ="")]
D2[date!=firstdate, points(date, id, pch =4)]
D2[date==firstdate, points(date, id)]
D2[date==firstdate, arrows(x0=date, y0=as.numeric(id), x1=MaxDate, length=0.1)]
abline(v=MaxDate - 5)
abline(v=MinDate)
axis(side= 2, tick = FALSE, at=D[, unique(id)], labels=D[, paste0("ID = ", as.character(unique(id)))], las=2)
axis(side = 1, tick= FALSE, at=c(MinDate, MaxDate), labels = strftime(c(MinDate, MaxDate),format="Week %W, %Y") )
答案 1 :(得分:0)
是的,有可能(在某种程度上)。这应该让你开始。
# load packages
require(data.table)
require(ggplot2)
# prepare data for plotting
dt <- data.table(d_sorted, key='id')
dt[, first:=as.numeric(min(date)==date), by=id]
dt <- rbindlist(list(dt, data.table(id=dt[,unique(dt$id)],
date=as.Date(day_end)+5,
amount=NA,
first=2)))
# plot (here you can change many things, depending on what is important to
# reproduce exactly as in original and where you are willing to have some
# differences)
ggplot(dt, aes(date, id)) +
geom_point(aes(shape=factor(first)), size=3) +
scale_shape_manual(values = c('1'=5, '0'=4, '2'=17), guide=FALSE) +
scale_x_date(limits=c(as.Date(day_start), as.Date(day_end)+5)) +
geom_line()