基于键的行到列

时间:2017-10-26 09:36:49

标签: r

我有一个数据框:

<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<div class="wrapper">
<br />                         
  <div id="viewer2" class="viewer" style="width:800px; height:300px; position: absolute; z-index : 0;"></div>
  <canvas id="paper" width="800" height="300" style="border:1px solid #ccc;position: absolute; z-index : 1;"></canvas>           
  <br />
</div>
> head(bp_data)
event bp_no  sample chrom      bp       gene    feature type length          id fpkm
1   bp1 A373R11    2L 2425901 intergenic intergenic  INV    0.1        <NA>    0
1   bp2 A373R11    2L 2426025 intergenic intergenic  INV    0.1        <NA>    0
3   bp1 A373R11    2L 6694426        Tsp     intron  INV    0.1 FBgn0031850    0
3   bp2 A373R11    2L 6694566        Tsp     intron  INV    0.1 FBgn0031850    0
6   bp1 A373R11    2R 8387755       pdm3     intron  INV    0.2 FBgn0261588    0
6   bp2 A373R11    2R 8387927       pdm3     exon_2  INV    0.2 FBgn0261588    0

每个样本的每个事件都有两个structure(list(event = c(1L, 1L, 3L, 3L, 6L, 6L), bp_no = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("bp1", "bp2"), class = "factor"), sample = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("A373R11", "A373R13", "A373R3", "A373R5", "A373R9", "A512R19", "A512R21", "A512R23", "A573R25", "A573R27", "A573R29", "A573R31", "A573R33", "B241R35", "B241R37", "B241R39", "B241R41a", "B241R43", "B241R45", "B241R51", "B241R53", "B241R55", "B241R57", "B241R59", "B241R61", "B241R63", "HUM-1", "HUM-4", "HUM-7"), class = "factor"), chrom = structure(c(1L, 1L, 1L, 1L, 2L, 2L), .Label = c("2L", "2R", "3L", "3R", "X", "Y"), class = "factor"), bp = c(2425901L, 2426025L, 6694426L, 6694566L, 8387755L, 8387927L), gene = structure(c(67L, 67L, 114L, 114L, 92L, 92L), .Label = c("5-HT7", "Ankle2", "Arpc3B", "Atac3", "B4", "be", "bru3", "CalpB", "CanA1", "CG12081", "CG12535", "CG13024", "CG13991", "CG1632", "CG17211", "CG32121", "CG32191", "CG32447", "CG32549", "CG34356", "CG3520", "CG3655", "CG4116", "CG42238", "CG42321", "CG42404", "CG43707", "CG44838", "CG45002", "CG45263", "CG5004", "CG5535", "CG5910", "CG6707", "CG6907", "CG6959", "CG7720", "CG7878", "CG8213", "CG8216", "CG8861", "CG9416", "CG9821", "CG9837", "Cpr", "CR32773", "CR44173", "CR44181", "CR44602", "CR44886", "CR45161", "CR45814", "dco", "DIP-alpha", "DNApol-epsilon255", "dnc", "dpr13", "dpr8", "ed", "elg1", "Fim", "Frl", "heph", "Hers", "Hs6st", "Hsromega", "intergenic", "inv", "jp", "kirre", "kkv", "klar", "kuz", "Lim1", "lola", "magu", "mamo", "Map205", "mars", "mask", "mbl", "mnd", "Mnt", "mor", "N", "nAChRalpha3", "nAChRbeta1", "nahoda", "Nhe2", "nvy", "Octbeta3R", "pdm3", "Phax", "pico", "Pif1A", "Poxm", "pros", "Pzl", "rdx", "rhea", "RhoGEF3", "Rim2", "Scp1", "Shab", "Slc45-1", "sm", "Snoo", "Sox100B", "SPR", "Su(var)2-10", "Syn", "tefu", "Treh", "Tsp", "TwdlJ", "TwdlK", "Upf3", "Vps52", "w", "wcy", "wdb", "WDY", "Yeti"), class = "factor"), feature = structure(c(16L, 16L, 17L, 17L, 17L, 9L), .Label = c("3UTR", "5UTR", "CDS", "exon", "exon_1", "exon_12", "exon_15", "exon_17", "exon_2", "exon_3", "exon_4", "exon_5", "exon_6", "exon_7", "exon_9", "intergenic", "intron", "ncRNA"), class = "factor"), type = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("INV", "DEL", "TRA", "DUP", "BND", "TANDUP"), class = "factor"), length = c(0.1, 0.1, 0.1, 0.1, 0.2, 0.2), id = structure(c(NA, NA, 46L, 46L, 98L, 98L), .Label = c("FBgn0000038", "FBgn0000479", "FBgn0000547", "FBgn0001234", "FBgn0001269", "FBgn0001311", "FBgn0001316", "FBgn0002413", "FBgn0002645", "FBgn0002778", "FBgn0002783", "FBgn0003129", "FBgn0003435", "FBgn0003612", "FBgn0003748", "FBgn0003996", "FBgn0004573", "FBgn0004575", "FBgn0004595", "FBgn0004647", "FBgn0005636", "FBgn0010015", "FBgn0011224", "FBgn0015519", "FBgn0015623", "FBgn0020908", "FBgn0023215", "FBgn0023407", "FBgn0024238", "FBgn0024288", "FBgn0025866", "FBgn0026411", "FBgn0027492", "FBgn0028343", "FBgn0028369", "FBgn0029649", "FBgn0029657", "FBgn0029768", "FBgn0030027", "FBgn0030053", "FBgn0030812", "FBgn0031359", "FBgn0031710", "FBgn0031711", "FBgn0031785", "FBgn0031850", "FBgn0032129", "FBgn0032414", "FBgn0033358", "FBgn0033359", "FBgn0033380", "FBgn0033845", "FBgn0034286", "FBgn0034438", "FBgn0034797", "FBgn0034859", "FBgn0034923", "FBgn0035968", "FBgn0036058", "FBgn0036574", "FBgn0036665", "FBgn0036764", "FBgn0036993", "FBgn0037549", "FBgn0037635", "FBgn0037636", "FBgn0037676", "FBgn0037956", "FBgn0038652", "FBgn0038755", "FBgn0039439", "FBgn0039440", "FBgn0040297", "FBgn0040397", "FBgn0043884", "FBgn0045035", "FBgn0052121", "FBgn0052191", "FBgn0052343", "FBgn0052447", "FBgn0052529", "FBgn0052549", "FBgn0052594", "FBgn0052600", "FBgn0052773", "FBgn0052791", "FBgn0065032", "FBgn0085385", "FBgn0085450", "FBgn0250867", "FBgn0250910", "FBgn0259221", "FBgn0259823", "FBgn0259984", "FBgn0260442", "FBgn0260748", "FBgn0261015", "FBgn0261588", "FBgn0261811", "FBgn0262169", "FBgn0262593", "FBgn0263846", "FBgn0264001", "FBgn0264326", "FBgn0264493", "FBgn0264707", "FBgn0265062", "FBgn0265070", "FBgn0265487", "FBgn0265813", "FBgn0266101", "FBgn0266180", "FBgn0266354", "FBgn0266654", "FBgn0266801", "FBgn0267033", "FBgn0267398", "FBgn0267430", "FBgn0267449", "FBgn0267464", "FBgn0267795", "FBgn0283521"), class = "factor"), fpkm = c(0, 0, 0, 0, 0, 0)), .Names = c("event", "bp_no", "sample", "chrom", "bp", "gene", "feature", "type", "length", "id", "fpkm"), row.names = c(NA, 6L), class = "data.frame") 值(bpbp1),这些值当前在分隔行上,例如:

bp2

我想将每个观察的event bp_no sample chrom bp gene feature type length id fpkm 1 bp1 A373R11 2L 2425901 intergenic intergenic INV 0.1 <NA> 0 1 bp2 A373R11 2L 2426025 intergenic intergenic INV 0.1 <NA> 0 bp1值组合成一个新的数据框,例如:

bp2

有人可以建议一种方法吗?

1 个答案:

答案 0 :(得分:2)

使用dplyr和tidyr

bp_data %>%
  gather(field, value, -event, -bp_no, -sample, -chrom) %>%
  mutate(field = paste0(bp_no, "_", field)) %>%
  select(-bp_no) %>%
  spread(field, value) 

# event  sample chrom  bp1_bp bp1_feature bp1_fpkm   bp1_gene      bp1_id bp1_length bp1_type  bp2_bp bp2_feature bp2_fpkm   bp2_gene      bp2_id bp2_length bp2_type
# 1     1 A373R11    2L 2425901  intergenic        0 intergenic        <NA>        0.1      INV 2426025  intergenic        0 intergenic        <NA>        0.1      INV
# 2     3 A373R11    2L 6694426      intron        0        Tsp FBgn0031850        0.1      INV 6694566      intron        0        Tsp FBgn0031850        0.1      INV
# 3     6 A373R11    2R 8387755      intron        0       pdm3 FBgn0261588        0.2      INV 8387927      exon_2        0       pdm3 FBgn0261588        0.2      INV