我正在尝试将数据存储在列表列表中,以便每个样本(具有与之关联的各种信息)一起列在列表中,然后将大量列表存储在列表中。到目前为止我所拥有的是:
#!/usr/bin/env python
from operator import itemgetter # For sorting
class XaDatum (object):
fields = {'name':0, 'ki':1, 'amt':2, 'rep': 3, 'stage':4, 'variety':5,
'date':6, 'comments':7}
def __init__(self, name, ki, amt, rep=None, stage=None, variety=None,
date=None, comments = None):
for item in (name, rep, stage, variety, date, comments):
if item is not None:
item = str(item)
ki = int(ki)
amt = float(amt)
self.datum = [name, ki, amt, rep, stage, variety, date, comments]
def __getitem__(self, key):
return self.datum[self.fields[key]]
def copy(self):
return XaDatum(self['name'], self['ki'],
self['amt'], self['rep'],
self['stage'], self['variety'],
self['date'], self['comments'])
def __setitem__(self, key, item):
if key in ['name', 'rep', 'stage', 'variety', 'date', 'comments']:
item = str(item)
if key == 'ki':
item = int(item)
if key == 'amt':
item = float(item)
self.datum[self.fields[key]] = item
def __str__(self):
return repr(self.datum)
def show(self):
print("{0} {1} {2} {3} {4} {5} {6} {7}".format(
self['name'], self['ki'], self['amt'], self['rep'],
self['stage'], self['variety'], self['date'],
self["comments"]))
class XaData (object):
def __init__(self):
self.data = []
self.count = 0
def __getitem__(self, index):
return self.data[index]
def __str__(self):
return repr(self.data)
def append(self, name, ki, amt, rep=None, stage=None, variety=None,
date=None, comments=None):
self.data.append(
XaDatum(name, ki, amt, rep, stage, variety, date, comments))
self.count += 1
def show(self):
for i in self.data:
i.show()
def copy(self):
returnme = XaData()
for item in self:
returnme.data.append(item.copy())
return returnme
# Result points to the same memory! Changes to the returned
# znoselonglist will result in changes to the original!
def filter(self, inverse=False, min=-float('Inf'), max=float('Inf'),
ki_min=-float('Inf'), ki_max=float('Inf'), rep=None, stage=None,
variety=None, date=None, comment=None):
returnme = XaData()
for item in self.data:
match = ((item['amt'] >= min)
and (item['amt'] <= max)
and (item['ki'] >= ki_min)
and (item['ki'] <= ki_max)
and (rep is None or item['rep'] in rep)
and (stage is None or item['stage'] in stage)
and (variety is None or item['variety'] in variety)
and (date is None or item['date'] in date)
and (comment is None or item['comment'] in comment))
if match ^ inverse:
returnme.data.append(item)
return returnme
def sort(self, *args):
if len(args) == 0:
args = ('name', 'ki')
self.data = sorted(self.data, key=itemgetter(*args))
def unique(self, key):
key_list = [item[key] for item in self.data]
return sorted(list(set(key_list)))
def unique_kis(self):
kilist = [item['ki'] for item in self.data]
return sorted(list(set(kilist)))
def unique_names(self):
namelist = [item['name'] for item in self.data]
return sorted(list(set(namelist)))
if __name__ == "__main__":
da = XaData()
da.append('x00', 35, 501, stage='B', variety='V1')
da.append('x01', 40, 309, stage='D', variety='V2')
da.append('x02', 37, 450, stage='D', variety='V1')
da.append('x03', 35, 470, stage='A', variety='V2')
da.append('x04', 40, 378, stage='B', variety='V1')
da.append('x05', 45, 770, stage='A', variety='V2')
如果我这样做,我可以这样做:
In [1]: da.show()
x00 35 501.0 None B V1 None None
x01 40 309.0 None D V2 None None
x02 37 450.0 None D V1 None None
x03 35 470.0 None A V2 None None
x04 40 378.0 None B V1 None None
x05 45 770.0 None A V2 None None
In [2]: daf = da.filter(variety='V1')
In [3]: daf.show()
x00 35 501.0 None B V1 None None
x02 37 450.0 None D V1 None None
x04 40 378.0 None B V1 None None
In [4]: daf[0]['amt'] *= 0.2
In [5]: daf.show()
x00 35 100.2 None B V1 None None
x02 37 450.0 None D V1 None None
x04 40 378.0 None B V1 None None
但我做不到,
In [6]: daf[:]['amt'] *= 0.2
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/home/nathan/work/simo_znose/<ipython console> in <module>()
TypeError: list indices must be integers, not str
所以我的问题是,我将如何进行切片工作?或者,是否已经有一种数据类型或已经完成了我想要的东西?
提前致谢!
森
答案 0 :(得分:1)
您可以使用NumPy的record arrays非常方便地操作这些表格。 NumPy是Python中数组操作的事实标准。
您还可以考虑使用数据库表,例如通过SQLite。
答案 1 :(得分:1)
在我看来,切片正如预期的那样工作。 daf[:]
返回daf.data
的副本,这是一个列表 - 因此当您尝试使用'amt'
索引结果时会出现错误,因为您无法使用字符串索引列表。
重申戳戳的评论,目前还不清楚你想要daf[:]['amt']
做什么。您好像想要更改'amt'
副本包含的所有 XaDatum对象的daf
。但这实际上是你想要的吗?如果是这样,您将不得不改变XaData.__setitem__
(和XaData.__getitem__
为了完整性)接受字符串索引。
您还需要更改XaData.__getitem__
,以便切片返回新的XaData
对象而不是新的list
对象。无论如何你应该这样做 - 无论如何,这是预期的切片行为。
如果您要编写自定义__getitem__
和__setitem__
方法,您可能会发现了解slice objects会很有帮助。