如何根据条件组合从R中的数据框中获取行名?

时间:2014-07-17 21:21:53

标签: r

我在R中有一个数据框,每行都有一个唯一的" X"和" Y"位置(如图像中的像素)。在处理期间,数据帧被合并多次,现在rownames已被加扰。我需要第一个数据框中的原始rownames才能继续处理数据。例如:

>head(dr1)
  X Y RA1_Mix1_25Apr14.cel_MEAN RA2_Mix1_25Apr14.cel_MEAN
1 0 0                      5707                      8141
2 1 0                       259                       318
3 2 0                      5005                      7749
4 3 0                       193                       253
5 4 0                       112                        94
6 5 0                       305                       237
  RA3_Mix1_25Apr14.cel_MEAN CTR1_Mix1_21Feb14.cel_MEAN
1                      8375                       7123
2                       281                        215
3                      7824                       6309
4                       183                        168
5                        61                         96
6                       177                        120
  CTR2_Mix1_21Feb14.cel_MEAN CTR3_Mix1_21Feb14.cel_MEAN
1                       8216                       6726
2                        382                        197
3                       7234                       7333
4                        248                        284
5                        100                         69
6                        259                        193
> # Add UnitNames to X and Y from the filtered scm file.
> dr1.1 <- merge(dr1, scm2, by=c("X", "Y"), all.x=TRUE)
>
> # Add annotation from the ann file pulled form scheme.
> dr1.2 <- merge(dr1.1, ann, all.x=TRUE)
> head(dr1.2)
  UNIT_ID   X    Y RA1_Mix1_25Apr14.cel_MEAN RA2_Mix1_25Apr14.cel_MEAN
1     -23 503 1054                        35                        30
2     -23 330  922                        41                        32
3     -23 332 1385                        41                        30
4     -23 348  178                        44                        36
5     -23 372 1135                        51                        35
6     -23 879 1585                        55                        35
  RA3_Mix1_25Apr14.cel_MEAN CTR1_Mix1_21Feb14.cel_MEAN
1                        41                         37
2                        31                         37
3                        30                         33
4                        37                         39
5                        40                         45
6                        28                         30
  CTR2_Mix1_21Feb14.cel_MEAN CTR3_Mix1_21Feb14.cel_MEAN ProbeLength Mask
1                         34                         41          25   -2
2                         37                         30          25   -2
3                         27                         31          25   -2
4                         39                         50          25   -2
5                         39                         39          25   -2
6                         35                         33          25   -2
  EXON_ID PROBESET_ID TranscriptClusterID GeneName GeneSymbol GeneAccession
1     -23         -23                  NA     <NA>       <NA>          <NA>
2     -23         -23                  NA     <NA>       <NA>          <NA>
3     -23         -23                  NA     <NA>       <NA>          <NA>
4     -23         -23                  NA     <NA>       <NA>          <NA>
5     -23         -23                  NA     <NA>       <NA>          <NA>
6     -23         -23                  NA     <NA>       <NA>          <NA>
  EntrezID Chromosome Cytoband Start Stop Strand CrossHybridization
1       NA       <NA>     <NA>    NA   NA   <NA>                 NA
2       NA       <NA>     <NA>    NA   NA   <NA>                 NA
3       NA       <NA>     <NA>    NA   NA   <NA>                 NA
4       NA       <NA>     <NA>    NA   NA   <NA>                 NA
5       NA       <NA>     <NA>    NA   NA   <NA>                 NA
6       NA       <NA>     <NA>    NA   NA   <NA>                 NA
  ProbesetType
1         <NA>
2         <NA>
3         <NA>
4         <NA>
5         <NA>
6         <NA>
> erccs1_ln_rn <- c("AFFX-ERCC-00171_st", "AFFX-ERCC-00136_st", "AFFX-ERCC-00003_st", "AFFX-ERCC-00009_st", "AFFX-ERCC-00108_st", "AFFX-ERCC-00145_st", "AFFX-ERCC-00042_st", "AFFX-ERCC-00043_st", "AFFX-ERCC-00111_st", "AFFX-ERCC-00116_st", "AFFX-ERCC-00022_st", "AFFX-ERCC-00060_st", "AFFX-ERCC-00076_st", "AFFX-ERCC-00092_st", "AFFX-ERCC-00035_st", "AFFX-ERCC-00044_st", "AFFX-ERCC-00095_st", "AFFX-ERCC-00112_st", "AFFX-ERCC-00131_st", "AFFX-ERCC-00025_st", "AFFX-ERCC-00051_st", "AFFX-ERCC-00062_st", "AFFX-ERCC-00071_st", "AFFX-ERCC-00079_st", "AFFX-ERCC-00162_st", "AFFX-ERCC-00165_st", "AFFX-ERCC-00019_st", "AFFX-ERCC-00053_st", "AFFX-ERCC-00078_st", "AFFX-ERCC-00084_st", "AFFX-ERCC-00144_st", "AFFX-ERCC-00054_st", "AFFX-ERCC-00059_st", "AFFX-ERCC-00099_st", "AFFX-ERCC-00126_st", "AFFX-ERCC-00148_st", "AFFX-ERCC-00163_st", "AFFX-ERCC-00170_st", "AFFX-ERCC-00034_st", "AFFX-ERCC-00085_st", "AFFX-ERCC-00154_st", "AFFX-ERCC-00157_st", "AFFX-ERCC-00160_st")

> erccs1_linear <- dr1.2[dr1.2$GeneName %in% erccs1_ln_rn, ]
> head(erccs1_linear)
      UNIT_ID    X    Y RA1_Mix1_25Apr14.cel_MEAN RA2_Mix1_25Apr14.cel_MEAN
18090      76  991  123                       957                       595
18091      76 1362 1345                      6336                      4770
18092      76  846  745                      1443                       719
18093      76 1153 1419                       298                       163
18094      76  429 1242                     10413                      9262
18095      76  489  759                      5537                      4239
      RA3_Mix1_25Apr14.cel_MEAN CTR1_Mix1_21Feb14.cel_MEAN
18090                       611                       2959
18091                      4732                       8408
18092                       885                       2365
18093                       155                        785
18094                      9438                      13124
18095                      4133                       8646
      CTR2_Mix1_21Feb14.cel_MEAN CTR3_Mix1_21Feb14.cel_MEAN ProbeLength Mask
18090                       2205                       1202          25   32
18091                       7400                       5700          25   32
18092                       1703                        705          25   32
18093                        714                        310          25   32
18094                      12012                      10879          25   32
18095                       7842                       5544          25   32
      EXON_ID PROBESET_ID TranscriptClusterID           GeneName GeneSymbol
18090      76          76            17883608 AFFX-ERCC-00003_st       <NA>
18091      76          76            17883608 AFFX-ERCC-00003_st       <NA>
18092      76          76            17883608 AFFX-ERCC-00003_st       <NA>
18093      76          76            17883608 AFFX-ERCC-00003_st       <NA>
18094      76          76            17883608 AFFX-ERCC-00003_st       <NA>
18095      76          76            17883608 AFFX-ERCC-00003_st       <NA>
      GeneAccession EntrezID Chromosome Cytoband Start Stop Strand
18090      17883608       -1       <NA>     <NA>     0    0      ?
18091      17883608       -1       <NA>     <NA>     0    0      ?
18092      17883608       -1       <NA>     <NA>     0    0      ?
18093      17883608       -1       <NA>     <NA>     0    0      ?
18094      17883608       -1       <NA>     <NA>     0    0      ?
18095      17883608       -1       <NA>     <NA>     0    0      ?
      CrossHybridization  ProbesetType
18090                  0 control->affx
18091                  0 control->affx
18092                  0 control->affx
18093                  0 control->affx
18094                  0 control->affx
18095                  0 control->affx

所以现在我只需要收集&#39; X&#39;和&#39; Y&#39;来自&#39; erccs1_linear&#39;的值数据框并从dr1获取rownames的向量以进行进一步的数据分析。非常感谢任何建议或反馈!

0 个答案:

没有答案