我有一些时间序列数据,我在ggplot2中拟合黄土曲线,如附图所示。数据采用“S”曲线的形状。我真正需要知道的是数据开始趋于平稳的日期,它看起来正好在'550'或'600'时间
是否有某种定量方式可以在图表中标出?
指向数据集的链接:http://dl.dropbox.com/u/75403/stover_data.txt
数据集的dput()
:
structure(list(date = c(211L, 213L, 215L, 217L, 218L, 221L, 222L,
223L, 224L, 225L, 226L, 228L, 229L, 230L, 231L, 232L, 233L, 234L,
235L, 236L, 237L, 238L, 239L, 240L, 241L, 242L, 244L, 246L, 247L,
248L, 249L, 250L, 251L, 253L, 254L, 255L, 256L, 258L, 259L, 260L,
261L, 262L, 263L, 264L, 265L, 266L, 267L, 268L, 269L, 270L, 271L,
272L, 273L, 274L, 275L, 276L, 277L, 278L, 279L, 281L, 282L, 283L,
285L, 286L, 287L, 288L, 290L, 291L, 292L, 293L, 294L, 295L, 296L,
297L, 298L, 299L, 300L, 301L, 302L, 304L, 305L, 306L, 307L, 308L,
309L, 310L, 311L, 312L, 313L, 314L, 315L, 316L, 317L, 318L, 319L,
320L, 321L, 322L, 323L, 324L, 325L, 326L, 327L, 328L, 329L, 330L,
331L, 332L, 333L, 334L, 335L, 336L, 337L, 338L, 339L, 340L, 341L,
342L, 343L, 344L, 345L, 346L, 347L, 348L, 349L, 350L, 351L, 352L,
353L, 354L, 355L, 356L, 357L, 358L, 359L, 360L, 361L, 362L, 363L,
364L, 365L, 366L, 367L, 368L, 369L, 370L, 371L, 372L, 373L, 374L,
375L, 376L, 377L, 378L, 379L, 380L, 381L, 382L, 383L, 384L, 385L,
386L, 387L, 388L, 389L, 390L, 391L, 392L, 393L, 394L, 395L, 396L,
397L, 398L, 399L, 400L, 402L, 404L, 405L, 406L, 407L, 408L, 410L,
411L, 413L, 414L, 415L, 416L, 418L, 419L, 420L, 421L, 422L, 423L,
424L, 425L, 426L, 427L, 428L, 429L, 430L, 431L, 432L, 433L, 434L,
435L, 436L, 437L, 438L, 439L, 440L, 441L, 442L, 443L, 444L, 445L,
446L, 447L, 448L, 449L, 450L, 451L, 452L, 453L, 455L, 456L, 457L,
458L, 459L, 460L, 461L, 462L, 463L, 464L, 465L, 466L, 467L, 468L,
469L, 470L, 471L, 472L, 473L, 474L, 475L, 476L, 477L, 478L, 479L,
480L, 481L, 482L, 483L, 484L, 485L, 486L, 487L, 488L, 489L, 490L,
491L, 492L, 493L, 494L, 495L, 496L, 497L, 498L, 499L, 500L, 501L,
502L, 503L, 504L, 505L, 506L, 507L, 508L, 509L, 510L, 511L, 512L,
513L, 514L, 515L, 516L, 517L, 518L, 519L, 520L, 521L, 522L, 523L,
524L, 527L, 528L, 529L, 530L, 531L, 532L, 533L, 534L, 535L, 536L,
537L, 538L, 539L, 540L, 541L, 544L, 545L, 546L, 547L, 548L, 549L,
550L, 551L, 552L, 553L, 554L, 555L, 556L, 557L, 558L, 559L, 560L,
561L, 562L, 563L, 564L, 565L, 566L, 567L, 568L, 569L, 570L, 571L,
572L, 573L, 574L, 575L, 576L, 577L, 578L, 579L, 580L, 581L, 582L,
583L, 587L, 588L, 589L, 590L, 591L, 592L, 593L, 594L, 595L, 596L,
597L, 598L, 599L, 600L, 601L, 602L, 603L, 604L, 605L, 606L, 607L,
608L, 609L, 610L, 611L, 612L, 613L, 614L, 615L, 616L, 617L, 618L,
619L, 620L, 621L, 622L, 623L, 624L, 625L, 626L, 627L, 628L, 629L,
630L, 631L, 632L, 634L, 635L, 636L, 637L, 638L, 639L, 640L, 641L,
642L, 643L, 644L, 645L, 646L, 647L, 648L, 649L, 650L, 651L, 652L,
653L, 654L, 655L, 656L, 657L, 658L, 659L, 660L, 661L, 662L, 663L,
664L, 665L, 666L, 667L, 668L, 669L, 670L, 671L, 672L, 673L, 674L,
675L, 676L, 677L, 678L, 679L, 680L, 681L, 684L, 685L, 686L, 687L,
688L, 689L, 690L, 691L, 692L, 693L, 694L, 695L, 696L, 697L, 698L,
699L, 700L, 701L, 702L, 703L, 704L, 705L, 706L, 707L, 708L, 709L,
710L, 711L, 712L, 713L, 714L, 715L, 716L, 717L, 718L, 719L, 720L,
721L, 722L, 723L, 724L, 725L, 726L, 727L, 728L, 729L, 730L, 731L,
732L, 733L, 734L, 735L, 736L, 737L, 738L, 739L, 740L, 741L, 742L,
743L, 744L, 745L, 746L, 747L, 748L, 749L, 750L, 751L, 752L, 753L,
754L, 755L, 756L, 757L, 758L, 759L, 760L, 761L, 762L, 763L, 764L,
765L, 766L, 767L, 768L, 769L, 770L, 771L, 772L, 773L, 774L, 775L,
776L, 777L, 778L, 781L, 782L, 783L, 784L, 785L, 786L, 787L, 788L,
789L, 790L, 791L, 792L, 793L, 794L, 795L, 796L, 797L, 798L, 799L,
800L, 801L, 802L, 803L, 804L, 805L, 806L, 807L, 808L, 809L, 810L,
811L, 812L, 813L, 814L, 815L, 816L, 817L, 818L, 819L, 820L, 821L,
822L, 823L, 824L, 825L, 826L, 827L, 828L, 829L, 830L, 831L, 832L,
833L, 834L, 835L, 836L, 837L, 838L, 839L, 840L, 841L), org_count = c(2L,
1L, 3L, 1L, 1L, 1L, 2L, 1L, 1L, 3L, 2L, 5L, 3L, 2L, 1L, 4L, 1L,
1L, 10L, 10L, 4L, 5L, 4L, 1L, 2L, 2L, 1L, 1L, 3L, 1L, 1L, 2L,
1L, 3L, 6L, 4L, 2L, 1L, 3L, 1L, 2L, 4L, 4L, 6L, 3L, 2L, 6L, 12L,
13L, 14L, 8L, 7L, 5L, 11L, 11L, 1L, 40L, 13L, 1L, 2L, 4L, 2L,
5L, 2L, 1L, 2L, 3L, 5L, 1L, 3L, 4L, 1L, 4L, 7L, 12L, 3L, 3L,
2L, 2L, 2L, 2L, 2L, 3L, 4L, 2L, 5L, 6L, 4L, 5L, 6L, 3L, 6L, 4L,
16L, 79L, 61L, 31L, 43L, 40L, 38L, 25L, 22L, 29L, 22L, 5L, 6L,
11L, 6L, 6L, 8L, 7L, 4L, 7L, 11L, 4L, 18L, 10L, 13L, 10L, 8L,
12L, 14L, 11L, 22L, 13L, 16L, 16L, 6L, 5L, 11L, 17L, 11L, 11L,
16L, 15L, 13L, 16L, 15L, 12L, 16L, 14L, 9L, 15L, 18L, 20L, 13L,
15L, 21L, 16L, 6L, 22L, 20L, 13L, 19L, 15L, 23L, 19L, 18L, 21L,
21L, 12L, 15L, 41L, 26L, 14L, 12L, 11L, 11L, 9L, 9L, 8L, 7L,
5L, 2L, 7L, 6L, 2L, 3L, 4L, 2L, 2L, 1L, 7L, 3L, 3L, 4L, 2L, 3L,
1L, 2L, 1L, 2L, 2L, 2L, 6L, 5L, 7L, 8L, 6L, 5L, 8L, 6L, 5L, 5L,
4L, 4L, 8L, 5L, 3L, 6L, 6L, 6L, 6L, 5L, 6L, 4L, 1L, 4L, 2L, 5L,
1L, 2L, 1L, 1L, 1L, 2L, 3L, 5L, 1L, 1L, 3L, 3L, 4L, 3L, 4L, 6L,
6L, 1L, 2L, 3L, 6L, 4L, 7L, 17L, 6L, 5L, 2L, 4L, 6L, 8L, 1L,
3L, 2L, 4L, 4L, 2L, 3L, 4L, 3L, 3L, 7L, 9L, 6L, 14L, 12L, 12L,
6L, 15L, 33L, 19L, 13L, 17L, 12L, 16L, 10L, 7L, 7L, 6L, 20L,
20L, 8L, 14L, 9L, 22L, 21L, 6L, 6L, 8L, 54L, 44L, 22L, 21L, 14L,
13L, 64L, 34L, 26L, 21L, 61L, 43L, 47L, 42L, 37L, 57L, 46L, 38L,
33L, 32L, 51L, 76L, 36L, 31L, 45L, 35L, 27L, 17L, 17L, 12L, 7L,
77L, 69L, 18L, 28L, 37L, 35L, 40L, 47L, 36L, 37L, 33L, 17L, 24L,
13L, 19L, 28L, 22L, 27L, 49L, 37L, 25L, 30L, 35L, 20L, 16L, 20L,
10L, 15L, 67L, 35L, 32L, 28L, 48L, 66L, 76L, 68L, 38L, 16L, 18L,
37L, 29L, 37L, 53L, 31L, 30L, 20L, 48L, 36L, 35L, 31L, 33L, 16L,
13L, 32L, 56L, 47L, 32L, 39L, 20L, 27L, 53L, 62L, 60L, 49L, 41L,
17L, 25L, 26L, 42L, 33L, 48L, 34L, 25L, 24L, 51L, 31L, 44L, 37L,
27L, 17L, 35L, 32L, 34L, 28L, 28L, 28L, 28L, 53L, 48L, 58L, 49L,
25L, 25L, 34L, 33L, 63L, 75L, 112L, 74L, 29L, 36L, 36L, 42L,
42L, 44L, 49L, 16L, 24L, 27L, 47L, 40L, 37L, 33L, 13L, 25L, 31L,
45L, 40L, 53L, 51L, 30L, 41L, 43L, 60L, 46L, 39L, 24L, 39L, 48L,
59L, 43L, 71L, 31L, 21L, 37L, 45L, 41L, 45L, 34L, 19L, 19L, 25L,
45L, 40L, 28L, 33L, 19L, 25L, 25L, 31L, 25L, 29L, 31L, 30L, 27L,
40L, 31L, 25L, 42L, 29L, 18L, 11L, 27L, 34L, 35L, 59L, 32L, 23L,
22L, 29L, 38L, 39L, 35L, 47L, 21L, 16L, 33L, 22L, 15L, 18L, 16L,
20L, 16L, 36L, 44L, 58L, 35L, 21L, 20L, 14L, 55L, 34L, 30L, 40L,
27L, 34L, 31L, 47L, 53L, 42L, 59L, 55L, 41L, 43L, 29L, 26L, 32L,
40L, 33L, 28L, 27L, 47L, 40L, 52L, 48L, 58L, 38L, 35L, 29L, 37L,
19L, 19L, 22L, 15L, 16L, 21L, 31L, 25L, 31L, 23L, 32L, 30L, 80L,
45L, 49L, 32L, 18L, 29L, 35L, 23L, 27L, 21L, 21L, 29L, 43L, 106L,
58L, 117L, 49L, 28L, 24L, 43L, 49L, 34L, 23L, 28L, 16L, 21L,
45L, 37L, 29L, 32L, 26L, 16L, 18L, 26L, 24L, 21L, 18L, 16L, 23L,
10L, 19L, 24L, 29L, 11L, 26L, 15L, 14L, 19L)), .Names = c("date",
"org_count"), class = "data.frame", row.names = c(NA, -599L))
代码:
> p<-qplot(date,org_count, data=christi)
> p+stat_smooth(method="loess",size=1.5)
答案 0 :(得分:6)
如果要求确定曲线最大点(即平坦)的方法,则与找到线的斜率最大值(来自基本微积分)的点相同。 / p>
首先,阅读您的数据:
christi <- read.table("http://dl.dropbox.com/u/75403/stover_data.txt", sep="\t", header=TRUE)
接下来,使用loess
来适应平滑模型:
fit <- loess(org_count~date, data=christi)
然后,预测x值范围内的值(使用predict.loess
),确定斜率(diff
足够接近'),然后找到
x <- 200:800
px <- predict(fit, newdata=x)
px1 <- diff(px)
which.max(px1)
[1] 367
由于x的起始值为200,这意味着曲线在位置200+367=567
处是平的。
如果你想绘制这个:
par(mfrow=c(1, 2))
plot(x, px, main="loess model")
plot(x[-1], px1, main="diff(loess model)")
abline(v=567, col="red")
答案 1 :(得分:4)
这完全取决于“数据开始平稳的地方”的含义。你需要把它变成数学。根据您使用的带宽,LOESS曲线可能非常颠簸。您可能希望修改标记为“A行”的注释下方的行,以指定您的意思。例如,您可能希望查看的不仅仅是前一个值。例如,您可以查看前5个the_diff
值的总和。
library(ggplot2)
christi <- read.table("stover_data.txt",header=TRUE)
the_fit = loess(org_count ~ date, data=christi)
pred = predict(the_fit, christi, se=FALSE) #could change data with larger grid
with_loess <- cbind(christi,pred)
p<-qplot(date,org_count, data=christi)
the_plot <- p+stat_smooth(method="loess",size=1.5)
the_diff <- diff(with_loess$pred)
tolerance <- .1
#line A: the following line is what you want to modify.
vline <- min(with_loess$date[the_diff < -tolerance])
new_plot <- the_plot + geom_vline(xintercept=vline)
例如,您可以执行以下操作(从the_diff
行开始替换上面代码的最后一部分)
the_diff <- diff(with_loess$pred, lag=20)
tolerance <- 1
vline <- min(with_loess$date[the_diff < -tolerance])
new_plot <- the_plot + geom_vline(xintercept=vline)
另请注意,您可能希望根据“开始平稳”的含义来移动the_diff
向量(即将来它会平稳,或者它已经开始趋于平稳,等等)。还要记住the_diff
的{{1}}参数的长度比数据集的长度短。