S <- c('G','A','L','M','F','W','K','Q','E','S','P','V','I','C','Y','H','R','N','D','T')
allCombs <- function(x) c(x, lapply(seq_along(x)[-1L],
function(y) combn(x, y, collapse = "")),
recursive = TRUE)
Scombi <- allCombs(S)
| Species | Domain | Actual OGT | A | C | D | E | F | G | H | I | K | L | M | N | P | Q | R | S | T | V | W | Y |
| Acaryochloris_marina | Bacteria | 25 | 0.089806129655016 | 0.011179368033588 | 0.052093758404379 | 0.056116688487831 | 0.033311792369428 | 0.074719969063287 | 0.021456955206517 | 0.062874293719234 | 0.046629846831622 | 0.105160548187069 | 0.023372745414207 | 0.034667218445279 | 0.050847279968411 | 0.052372091362254 | 0.054393907299958 | 0.058415776607691 | 0.059282788930956 | 0.075786041807662 | 0.012266709932789 | 0.025246090272826 |
| Acetobacter_pasteurianus | Bacteria | 26 | 0.113635842586218 | 0.009802006063102 | 0.053600553080754 | 0.058133056353357 | 0.036903783608575 | 0.085210142094237 | 0.021833316616858 | 0.053123968429941 | 0.045353753818743 | 0.096549489115246 | 0.025913145427995 | 0.027225003296464 | 0.052562918173042 | 0.033342785074972 | 0.072705595398914 | 0.049908591821467 | 0.056094207383391 | 0.079084190962059 | 0.010144168305489 | 0.018873482389179 |
| Acetobacterium_woodii | Bacteria | 30 | 0.074955804625209 | 0.011863137047001 | 0.058166310295556 | 0.071786218284636 | 0.03424697521635 | 0.075626240308253 | 0.018397399287915 | 0.087245372635541 | 0.078978610001876 | 0.087790924875632 | 0.03068806687375 | 0.046498124583435 | 0.036120348133785 | 0.031790536900726 | 0.045179171055634 | 0.050727609439901 | 0.055617806111571 | 0.069643619533744 | 0.005984048340735 | 0.028693676448754 |
| Acetohalobium_arabaticum | Bacteria | 37 | 0.07294006171749 | 0.008402092275195 | 0.063388830763099 | 0.094174357919767 | 0.032968396601359 | 0.074335444399095 | 0.014775170057021 | 0.081175614650614 | 0.068173658934912 | 0.096191143631822 | 0.023591084039018 | 0.042176390239929 | 0.036535950562554 | 0.032690297143697 | 0.045929769851454 | 0.05201834344653 | 0.049098780255464 | 0.079225589949997 | 0.004923023531168 | 0.027286000029819 |
| Acholeplasma_laidlawii | Bacteria | 37 | 0.067353087090147 | 0.002160134400001 | 0.056809775441953 | 0.065310218890485 | 0.038735792072418 | 0.069508395797039 | 0.018942086187746 | 0.081435757342441 | 0.084786245636216 | 0.096181862610799 | 0.026545056054257 | 0.045549913713558 | 0.038323250930165 | 0.033008924859672 | 0.047150659509282 | 0.054698408656138 | 0.059971572823796 | 0.072199395290938 | 0.005926270925023 | 0.03540319176793 |
| Achromobacter_xylosoxidans | Bacteria | 30 | 0.120974236639852 | 0.008469732379263 | 0.054028585828065 | 0.055476991380945 | 0.035048667997051 | 0.086814010110846 | 0.02243157894653 | 0.050520668283285 | 0.039296015271673 | 0.099074202941835 | 0.028559018986725 | 0.025845147774914 | 0.049701994138614 | 0.034808403369533 | 0.073998251525545 | 0.050072992977641 | 0.051695040348985 | 0.080314177991249 | 0.011792085285623 | 0.021078197821829 |
| Species | Domain | Actual OGT | A | AC | AD | AE |
| Acaryochloris_marina | Bacteria | 25 | 0.089806129655016 | 0.191179368033588 | 0.1782093758404379 | 0.186116688487831 |
对于不清楚的道歉,是否有人对如何创建此数据集有任何想法? (或者询问/解释我应该查找的最佳方式?)
Species <- structure(list(Species = c("Acaryochloris_marina",
"Acetobacterium_woodii", "Acetohalobium_arabaticum", "Acholeplasma_laidlawii",
"Achromobacter_xylosoxidans"), Domain = c("Bacteria", "Bacteria",
"Bacteria", "Bacteria", "Bacteria", "Bacteria"), Actual.OGT = c(25,
26, 30, 37, 37, 30), A = c(0.089806129655016, 0.113635842586218,
0.074955804625209, 0.07294006171749, 0.067353087090147, 0.120974236639852
), C = c(0.011179368033588, 0.009802006063102, 0.011863137047001,
0.008402092275195, 0.002160134400001, 0.008469732379263), D = c(0.052093758404379,
0.053600553080754, 0.058166310295556, 0.063388830763099, 0.056809775441953,
0.054028585828065), E = c(0.056116688487831, 0.058133056353357,
0.071786218284636, 0.094174357919767, 0.065310218890485, 0.055476991380945
), F = c(0.033311792369428, 0.036903783608575, 0.03424697521635,
0.032968396601359, 0.038735792072418, 0.035048667997051), G = c(0.074719969063287,
0.085210142094237, 0.075626240308253, 0.074335444399095, 0.069508395797039,
0.086814010110846), H = c(0.021456955206517, 0.021833316616858,
0.018397399287915, 0.014775170057021, 0.018942086187746, 0.02243157894653
), I = c(0.062874293719234, 0.053123968429941, 0.087245372635541,
0.081175614650614, 0.081435757342441, 0.050520668283285), K = c(0.046629846831622,
0.045353753818743, 0.078978610001876, 0.068173658934912, 0.084786245636216,
0.039296015271673), L = c(0.105160548187069, 0.096549489115246,
0.087790924875632, 0.096191143631822, 0.096181862610799, 0.099074202941835
), M = c(0.023372745414207, 0.025913145427995, 0.03068806687375,
0.023591084039018, 0.026545056054257, 0.028559018986725), N = c(0.034667218445279,
0.027225003296464, 0.046498124583435, 0.042176390239929, 0.045549913713558,
0.025845147774914), P = c(0.050847279968411, 0.052562918173042,
0.036120348133785, 0.036535950562554, 0.038323250930165, 0.049701994138614
), Q = c(0.052372091362254, 0.033342785074972, 0.031790536900726,
0.032690297143697, 0.033008924859672, 0.034808403369533), R = c(0.054393907299958,
0.072705595398914, 0.045179171055634, 0.045929769851454, 0.047150659509282,
0.073998251525545), S = c(0.058415776607691, 0.049908591821467,
0.050727609439901, 0.05201834344653, 0.054698408656138, 0.050072992977641
), T = c(0.059282788930956, 0.056094207383391, 0.055617806111571,
0.049098780255464, 0.059971572823796, 0.051695040348985), V = c(0.075786041807662,
0.079084190962059, 0.069643619533744, 0.079225589949997, 0.072199395290938,
0.080314177991249), W = c(0.012266709932789, 0.010144168305489,
0.005984048340735, 0.004923023531168, 0.005926270925023, 0.011792085285623
), Y = c(0.025246090272826, 0.018873482389179, 0.028693676448754,
0.027286000029819, 0.03540319176793, 0.021078197821829)), .Names = c("Species",
"Domain", "Actual.OGT", "A", "C", "D", "E", "F", "G", "H", "I",
"K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"), row.names = c(NA,
-6L), class = "data.frame")
答案 0 :(得分:2)
# Reshape data. This will make it easier to split and access proportion
# within each species.
SpeciesLong <-
Species %>%
gather(protein, proportion,
A:Y) %>%
# Get unique species
S <- unique(SpeciesLong$protein)
# Build the combination list
# Note, this is different than your code, I added FUN = paste0
Scombi <- unlist(lapply(seq_along(S),
function(x) combn(S, x, FUN = paste0, collapse = "")))
# Function to get the joint proportion
# I took the sum, for convenience. You'll need to replace this
# with whatever function you use to get the joint proportion.
# The key part is getting the correct proteins, which happens within
# the `sum` call.
joint_protein <- function(protein_combo, data){
# make a list data frames, one for each species
SplitSpecies <-
# Make a cluster of processors to run on
cl <- makeCluster(detectCores() - 1)
# export Scombi and joint_protein to all processes in the cluster
clusterExport(cl, c("Scombi", "joint_protein"))
# Get the aggregate values for each species in a one-row data frame.
SpeciesAggregate <-
X = SplitSpecies,
fun = function(data){
X <- lapply(Scombi,
names(X) <- Scombi
# Join the results to the Species data
# You may want to save your data before this step. I'm not entirely
# sure I did this right to match the rows correctly.
Species <- cbind(Species, SpeciesAggregate)