我正在使用rpy2构建一个ExpressionSet类,并以相关的tutorial为指导。我用Eset对象做的最常见的事情之一是子集化,在本机R中就像
一样简单eset2<-eset1[1:10,1:5] # first ten features, first five samples
使用给定的索引返回具有表达式和表型数据子集的新ExpressionSet对象。 Rpy2的RS4对象似乎不允许直接子集,或者具有rx / rx2属性,例如RS3载体。我尝试了大约50%的成功,添加了一个'_subset'函数(下面),它分别创建了这两个数据集的子集,并将它们分配回Eset,但是有一种更简单的方法,我错过了吗?
from rpy2 import (robjects, rinterface)
from rpy2.robjects import (r, pandas2ri, Formula)
from rpy2.robjects.packages import (importr,)
from rpy2.robjects.methods import (RS4,)
class ExpressionSet(RS4):
# funcs to get the attributes
def _assay_get(self): # returns an environment, use ['exprs'] key to access
return self.slots["assayData"]
def _pdata_get(self): # returns an RS4 object, use .slots("data") to access
return self.slots["phenoData"]
def _feats_get(self): # returns an RS4 object, use .slots("data") to access
return self.slots["featureData"]
def _annot_get(self): # slots returns a tuple, just pick 1st (only) element
return self.slots["annotation"][0]
def _class_get(self): # slots returns a tuple, just pick 1st (only) element
return self.slots["class"][0]
# funcs to set the attributes
def _assay_set(self, value):
self.slots["assayData"] = value
def _pdata_set(self, value):
self.slots["phenoData"] = value
def _feats_set(self,value):
self.slots["featureData"] = value
def _annot_set(self, value):
self.slots["annotation"] = value
def _class_set(self, value):
self.slots["class"] = value
# funcs to work with the above to get/set the data
def _exprs_get(self):
return self.assay["exprs"]
def _pheno_get(self):
pdata = self.pData
return pdata.slots["data"]
def _exprs_set(self, value):
assay = self.assay
assay["exprs"] = value
def _pheno_set(self, value):
pdata = self.pData
pdata.slots["data"] = value
assay = property(_assay_get, _assay_set, None, "R attribute 'assayData'")
pData = property(_pdata_get, _pdata_set, None, "R attribute 'phenoData'")
fData = property(_feats_get, _feats_set, None, "R attribute 'featureData'")
annot = property(_annot_get, _annot_set, None, "R attribute 'annotation'")
exprs = property(_exprs_get, _exprs_set, None, "R attribute 'exprs'")
pheno = property(_pheno_get, _pheno_set, None, "R attribute 'pheno")
def _subset(self, features=None, samples=None):
features = features if features else self.exprs.rownames
samples = samples if samples else self.exprs.colnames
fx = robjects.BoolVector([f in features for f in self.exprs.rownames])
sx = robjects.BoolVector([s in samples for s in self.exprs.colnames])
self.pheno = self.pheno.rx(sx, self.pheno.colnames)
self.exprs = self.exprs.rx(fx,sx) # can't assign back to exprs this way
答案 0 :(得分:1)
做的时候
eset2<-eset1[1:10,1:5]
在R中,R S4方法&#34; [
&#34;使用您提供的参数值获取并运行签名(&#34; ExpressionSet
&#34;)。
文档建议使用getmethod
(参见http://rpy2.readthedocs.org/en/version_2.7.x/generated_rst/s4class.html#methods)来促进获取相关S4方法的任务,但是在编写文档之后它的行为似乎已经改变了(解决了通过继承发送不再进行。)
以下应该这样做:
from rpy2.robjects.packages import importr
methods = importr('methods')
r_subset_expressionset = methods.selectMethod("[", "ExpressionSet")
答案 1 :(得分:0)
感谢@ lgautier的回答,这里是我上面代码的片段,经过修改后允许对RS4对象进行子集化:
from multipledispatch import dispatch
@dispatch(RS4)
def eset_subset(eset, features=None, samples=None):
"""
subset an RS4 eset object
"""
features = features if features else eset.exprs.rownames
samples = samples if samples else eset.exprs.colnames
fx = robjects.BoolVector([f in features for f in eset.exprs.rownames])
sx = robjects.BoolVector([s in samples for s in eset.exprs.colnames])
esub=methods.selectMethod("[", signature="ExpressionSet")(eset, fx,sx)
return esub