如何缩短输入的长度

时间:2013-11-04 11:57:43

标签: r

在我做的最后一个问题中,他们指出,作为可重复示例的一部分,较少的数据将易于阅读和理解。在再次询问的过程中,我试图通过dput(head(data))来缩短数据,但我得到的结果就像我dput(data)dput(data[1:6, ])甚至dput(data)[1:6, ]一样(在最后一种情况下)我还得到了整个dput

之后的第6行数据

有一种简单的方法吗?在dput选项中,我没有找到任何内容,必须有一个解决方案,以避免手动删除我不想显示的内容。

以下是整个输入数据:

>dput(data)
structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L, 46L, 
47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 
60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 71L, 72L, 76L, 77L, 78L, 
83L, 87L, 88L, 89L, 93L, 96L, 97L, 101L, 103L, 104L, 105L, 106L, 
109L, 111L, 113L, 114L, 116L), .Label = c("GO:0000746", "GO:0000910", 
"GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399", "GO:0006412", 
"GO:0006457", "GO:0006464", "GO:0006468", "GO:0006486", "GO:0006520", 
"GO:0006725", "GO:0006766", "GO:0006810", "GO:0006811", "GO:0006839", 
"GO:0006897", "GO:0006950", "GO:0006970", "GO:0006974", "GO:0006979", 
"GO:0006986", "GO:0006997", "GO:0007005", "GO:0007010", "GO:0007029", 
"GO:0007031", "GO:0007033", "GO:0007034", "GO:0007049", "GO:0007059", 
"GO:0007114", "GO:0007124", "GO:0007126", "GO:0007165", "GO:0009408", 
"GO:0009409", "GO:0015031", "GO:0016044", "GO:0016050", "GO:0016070", 
"GO:0016071", "GO:0016072", "GO:0016192", "GO:0016567", "GO:0016568", 
"GO:0016570", "GO:0019725", "GO:0030435", "GO:0031505", "GO:0032196", 
"GO:0032989", "GO:0042221", "GO:0042254", "GO:0042594", "GO:0043543", 
"GO:0044255", "GO:0044257", "GO:0044262", "GO:0045333", "GO:0046483", 
"GO:0048193", "GO:0051169", "GO:0051186", "GO:0051276", "GO:0070271", 
"GO:0000278", "GO:0000902", "GO:0002181", "GO:0005975", "GO:0006325", 
"GO:0006353", "GO:0006360", "GO:0006366", "GO:0006383", "GO:0006397", 
"GO:0006401", "GO:0006414", "GO:0006418", "GO:0006470", "GO:0006605", 
"GO:0006629", "GO:0006865", "GO:0006869", "GO:0006873", "GO:0006887", 
"GO:0006914", "GO:0008033", "GO:0008213", "GO:0008643", "GO:0009311", 
"GO:0009451", "GO:0015931", "GO:0016197", "GO:0023052", "GO:0031399", 
"GO:0032543", "GO:0042255", "GO:0042273", "GO:0042274", "GO:0043144", 
"GO:0043934", "GO:0045454", "GO:0051052", "GO:0051321", "GO:0051603", 
"GO:0051604", "GO:0051726", "GO:0055086", "GO:0070647", "GO:0000054", 
"GO:0001403", "GO:0006352", "GO:0006354", "GO:0006364", "GO:0006413", 
"GO:0006417", "GO:0006497", "GO:0008380", "GO:0009072", "GO:0051049", 
"GO:0061025", "GO:0071554"), class = "factor"), GOdesc = structure(c(16L, 
17L, 23L, 19L, 58L, 62L, 59L, 37L, 39L, 40L, 38L, 3L, 4L, 67L, 
60L, 27L, 30L, 20L, 51L, 48L, 46L, 49L, 52L, 33L, 29L, 18L, 21L, 
34L, 64L, 63L, 2L, 14L, 1L, 43L, 28L, 56L, 47L, 45L, 41L, 9L, 
65L, 54L, 31L, 55L, 66L, 42L, 12L, 26L, 7L, 57L, 22L, 61L, 6L, 
44L, 53L, 50L, 35L, 8L, 10L, 5L, 11L, 25L, 24L, 32L, 15L, 13L, 
36L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), .Label = c("cell budding", "cell cycle", 
"cellular amino acid and metabolic process", "cellular aromatic compound metabolic process", 
"cellular carbohydrate metabolic process", "cellular component morphogenesis", 
"cellular homeostasis", "cellular lipid metabolic process", "cellular membrane organization", 
"cellular protein catabolic process", "cellular respiration", 
"chromatin modification", "chromosome organization and biogenesis", 
"chromosome segregation", "cofactor metabolic process", "conjugation", 
"cytokinesis", "cytoskeleton organization and biogenesis", "DNA metabolic process", 
"endocytosis", "ER organization and biogenesis", "fungal-type cell wall organization", 
"generation of precursor metabolites and energy", "golgi vesicle transport", 
"heterocycle metabolic process", "histone modification", "ion transport", 
"meiosis", "mitchondrion organization", "mitochondrial transport", 
"mRNA metabolic process", "nuclear transport", "nucleus organization", 
"peroxisome organization", "protein acylation", "protein complex biogenesis", 
"protein folding", "protein glycosylation", "protein modification process", 
"protein phosphorylation", "protein transport", "protein ubiquitination", 
"pseudohyphal growth", "response to chemical stimulus", "response to cold", 
"response to DNA damage stimulus", "response to heat", "response to osmotic stress", 
"response to oxidative stress", "response to starvation", "response to stress", 
"response to unfolded protein", "ribosome biogenesis", "RNA metabolic process", 
"rRNA metabolic process", "signal transduction", "sporulation resulting in formation of a cellular spore", 
"transcription", "translation", "transport", "transposition", 
"tRNA metabolic process", "vacuolar transport", "vacuole organizations", 
"vesicle organization", "vesicle-mediated transport", "vitamin metabolic process"
), class = "factor"), GSA_p33_SC = c(NA, -1, NA, NA, NA, NA, 
NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, 
-1, -1, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA, NA, NA, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA), GSA_p38_SC = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, -1, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA), GSA_p38_X33 = c(NA, 
1, NA, NA, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 1, 
1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, -1, NA, NA, 1, NA, NA), GSA_p52_SC = c(NA, NA, NA, NA, 
NA, NA, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 
-1, -1, NA, NA, NA), GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, 
NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, -1, NA, 
NA, NA, NA), GSA_p64_SC = c(NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, 1, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
1, NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, -1, NA, -1, -1, 
NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, -1, 1, 
-1, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, NA, NA, NA, NA
), GSA_p64_X33 = c(1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 1, NA, NA, 
NA, NA, NA, NA, -1, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, NA, NA, 
NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, -1, -1), GSA_SC_X33 = c(NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -1, NA, 
NA, NA, NA, NA, NA, NA, -1, NA, 1, NA, NA, NA, NA, NA, NA, 1, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1, NA, 
NA, NA, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, NA, NA, 
1, NA, NA, 1, -1, NA, -1, NA, NA, NA, -1, 1, NA, NA, NA, NA, 
NA, -1, NA, NA, NA, NA, NA, NA)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
-89L), class = "data.frame")

缩短的版本可能如下:

structure(list(GOterm = structure(c(1L, 2L, 3L, 4L, 5L, 6L),
.Label = c("GO:0000746", "GO:0000910", "GO:0006091", "GO:0006259",
 "GO:0006351", "GO:0006399"), class = "factor"),
 GOdesc = structure(c(16L,17L, 23L, 19L, 58L, 62L),
.Label = c("cell budding", "cell cycle", 
    "cellular amino acid and metabolic process", "cellular aromatic compound
 metabolic process", "cellular carbohydrate metabolic process", "cellular
component morphogenesis"), class = "factor"),
GSA_p33_SC = c(NA, -1, NA, NA, NA, NA),
GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
GSA_p38_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), 
GSA_p52_SC = c(NA, NA, NA, NA, NA, NA), 
GSA_p52_X33 = c(NA, NA, NA, NA, NA, NA),
GSA_p64_SC = c(NA, NA, NA, NA, NA, NA),
GSA_p64_X33 = c(1, NA, NA, NA, NA, NA),
GSA_SC_X33 = c(NA, NA, NA, NA, NA, NA)),
.Names = c("GOterm", "GOdesc", 
    "GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
    "GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
    -6L), class = "data.frame"))

2 个答案:

答案 0 :(得分:3)

所有这些额外的恐惧都来自您的factor级别。如果您知道在删除这些级别后问题仍然可以重现,那么您可以考虑(等待它)droplevels

> dput(droplevels(head(data)))
structure(list(GOterm = structure(1:6, .Label = c("GO:0000746", 
"GO:0000910", "GO:0006091", "GO:0006259", "GO:0006351", "GO:0006399"
), class = "factor"), GOdesc = structure(c(1L, 2L, 4L, 3L, 5L, 
6L), .Label = c("conjugation", "cytokinesis", "DNA metabolic process", 
"generation of precursor metabolites and energy", "transcription", 
"tRNA metabolic process"), class = "factor"), GSA_p33_SC = c(NA, 
-1, NA, NA, NA, NA), GSA_p33_X33 = c(NA, NA, -1, NA, NA, NA), 
    GSA_p38_SC = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_), GSA_p38_X33 = c(NA, 1, NA, NA, NA, NA), GSA_p52_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p52_X33 = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_SC = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), GSA_p64_X33 = c(1, 
    NA, NA, NA, NA, NA), GSA_SC_X33 = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_)), .Names = c("GOterm", "GOdesc", 
"GSA_p33_SC", "GSA_p33_X33", "GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", 
"GSA_p52_X33", "GSA_p64_SC", "GSA_p64_X33", "GSA_SC_X33"), row.names = c(NA, 
6L), class = "data.frame") 

以下示例更容易说明这一点:

x <- factor("A", levels = LETTERS)
x
# [1] A
# Levels: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
dput(x)
# structure(1L, .Label = c("A", "B", "C", "D", "E", "F", "G", "H", 
# "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", 
# "V", "W", "X", "Y", "Z"), class = "factor")
dput(droplevels(x))
# structure(1L, .Label = "A", class = "factor")

答案 1 :(得分:0)

缩短它的另一种方法是在character之前将列转换为dput。然后可以使用as.data.frame读回数据并保留因子级别。

第一个子集

> data2 <- data[sample(nrow(data), 4), ]

然后dput作为字符

> d <- dput(lapply(data2, as.character))
structure(list(GOterm = c("GO:0000746", "GO:0070647", "GO:0006914", 
"GO:0007010"), GOdesc = c("conjugation", NA, NA, "cytoskeleton organization and biogenesis"
), GSA_p33_SC = c(NA_character_, NA_character_, NA_character_, 
NA_character_), GSA_p33_X33 = c(NA, NA, "1", "1"), GSA_p38_SC = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p38_X33 = c(NA_character_, 
NA_character_, NA_character_, NA_character_), GSA_p52_SC = c(NA, 
"-1", NA, NA), GSA_p52_X33 = c(NA, NA, NA, "1"), GSA_p64_SC = c(NA, 
NA, NA, "1"), GSA_p64_X33 = c("1", NA, NA, NA), GSA_SC_X33 = c(NA, 
NA, NA, "1")), .Names = c("GOterm", "GOdesc", "GSA_p33_SC", "GSA_p33_X33", 
"GSA_p38_SC", "GSA_p38_X33", "GSA_p52_SC", "GSA_p52_X33", "GSA_p64_SC", 
"GSA_p64_X33", "GSA_SC_X33"))

回读

> as.data.frame(d)