R-基于数据帧中的列值的子集数据行

时间:2014-10-16 22:25:51

标签: r plot extract subset

我想绘制像(其中C是列)的东西: 对于所有类似的C1和C4,C4对C2 对于所有类似的C2,C1对C4 有问题的数据框是:

    C1          C2  C3  C4
1   2012-12-28  0   NA  10773
2   2012-12-28  5   NA  34112
3   2012-12-28  10  NA  30901
4   2012-12-28  0   NA  12421
5   2012-12-30  0   NA  3925
6   2012-12-30  5   NA  17436
7   2012-12-30  10  NA  13717
8   2012-12-30  15  NA  36708
9   2012-12-30  20  NA  28408
10  2012-12-30  NA  NA  2880
11  2013-01-02  0   -13.89  9972
12  2013-01-02  5   -13.89  10576
13  2013-01-02  10  -13.89  33280
14  2013-01-02  15  -13.89  28667
15  2013-01-02  20  -13.89  21104
16  2013-01-02  25  -13.89  24771
17  2013-01-02  NA  NA  22
18  2013-01-05  0   -3.80   20727
19  2013-01-05  5   -3.80   2033
20  2013-01-05  10  -3.80   16045
21  2013-01-05  15  -3.80   12074
22  2013-01-05  20  -3.80   10095
23  2013-01-05  NA  NA  32693
24  2013-01-08  0   -1.70   19579
25  2013-01-08  5   -1.70   20200
26  2013-01-08  10  -1.70   12263
27  2013-01-08  15  -1.70   28797
28  2013-01-08  20  -1.70   23963
29  2013-01-11  0   -2.30   26525
30  2013-01-11  5   -2.30   21472
31  2013-01-11  10  -2.30   9633
32  2013-01-11  15  -2.30   27849
33  2013-01-11  20  -2.30   23950
34  2013-01-17  0   1.40    16271
35  2013-01-17  5   1.40    18581
36  2013-01-19  0   0.10    5910
37  2013-01-19  5   0.10    16890
38  2013-01-19  10  0.10    13078
39  2013-01-19  NA  NA  55
40  2013-01-23  0   -9.20   15048
41  2013-01-23  6   -9.20   20792
42  2013-01-26  0   NA  21649
43  2013-01-26  6   NA  24655
44  2013-01-29  0   0.10    9100
45  2013-01-29  5   0.10    27514
46  2013-01-29  10  0.10    19392
47  2013-01-29  15  0.10    21720
48  2013-01-29  NA  0.10    112
49  2013-02-11  0   0.40    13619
50  2013-02-11  5   0.40    2748
51  2013-02-11  10  0.40    1290
52  2013-02-11  15  0.40    762
53  2013-02-11  20  0.40    1125
54  2013-02-11  25  0.40    1709
55  2013-02-11  30  0.40    29459
56  2013-02-11  35  0.40    106474
57  2013-02-13  0   1.30    3355
58  2013-02-13  5   1.30    970
59  2013-02-13  10  1.30    2240
60  2013-02-13  15  1.30    35871
61  2013-02-18  0   -0.60   8564
62  2013-02-20  0   -1.20   12399
63  2013-02-26  0   0.30    2985
64  2013-02-26  5   0.30    9891
65  2013-03-01  0   0.90    5221
66  2013-03-01  5   0.90    9736
67  2013-03-05  0   0.60    3192
68  2013-03-05  5   0.60    4243
69  2013-03-09  0   0.10    45138
70  2013-03-09  5   0.10    55534
71  2013-03-12  0   1.40    7278
72  2013-03-12  NA  NA  45
73  2013-03-15  0   0.30    2447
74  2013-03-15  5   0.30    2690
75  2013-03-18  0   -2.30   3008
76  2013-03-22  0   -0.90   11411
77  2013-03-22  5   -0.90   NA
78  2013-03-22  10  -0.90   17675
79  2013-03-22  NA  NA  47
80  2013-03-25  0   1.20    9802
81  2013-03-25  5   1.20    15790

此处还有其他帖子关于时间序列子设置和合并/匹配/粘贴子设置,但我想我错过了我试图遵循这些说明的观点。

最终目标是为每C1 C4制作C2 = 0C2 = 5的关系图,依此类推。对于C4,每个C2 C1data.frame()相同。我知道有一些重复的C1和C2,但这些值的C4可以平均。我可以想出这些图,我只需要知道如何以这种方式对数据进行子集化。也许用这些子集创建一个新的{{1}}可能是最简单的? 提前谢谢,

1 个答案:

答案 0 :(得分:1)

使用ggplot2绘制子集相对容易。首先,您需要重塑"范围内的数据。到"长"格式化,创建具有可能值C4C5的新分类变量。

library(reshape2)
library(ggplot2)

# Starting with the data you posted in a data frame called "dat":

# Convert C2 to date format
dat$C2 = as.Date(dat$C2)

# Reshape data to long format
dat.m = melt(dat, id.var=c("C1","C2","C3"))

# Plot values of C4 and C5 vs. C2 with separate lines for each level of C3
ggplot(dat.m, aes(x=C2, y=value, group=C3, colour=as.factor(C3))) +
   geom_line() + geom_point() +
   facet_grid(variable ~ ., scales="free_y")

每个C4级别的C3行都相同,因此它们彼此重叠。

enter image description here

您还可以为C3的每个级别设置单独的面板:

ggplot(dat.m, aes(x=C2, y=value, group=variable, colour=variable)) +
  geom_line() + geom_point() +
  facet_grid(variable ~ C3, scales="free_y") + 
  theme(axis.text.x=element_text(angle=-90)) +
  guides(colour=FALSE)

enter image description here

这是获取单独图表的基本图形方法。我在下面使用您的新列名称:

# Use lapply to create a separate plot for each level of C2
lapply(na.omit(unique(dat$C2)), function(x) { 

  # The next line of code removes NA values so that there will be a line through 
  # every point. You can remove this line if you don't care whether all points 
  # are connected or not.
  dat = dat[complete.cases(dat[,c("C1","C2","C4")]),]

  # Create a plot of C4 vs. C1 for the current value of C2       
  plot(dat$C1[dat$C2==x], dat$C4[dat$C2==x],
       type="o", pch=16, 
       xlab=paste0("C2=",x), ylab="C4")
})