我正在解析一个XML文件,其中一个xml标签的结构如下:
def letters(word_1, word_2):
size = len(word_1) # No need to pass size as argument
# Create a binary tree for word_1, organised as a list
# in in-order sequence, and with the values equal to the number of
# non-matched letters in the range up to and including the current index:
treesize = (1<<size.bit_length()) - 1
numLeft = [(i >> 1 ^ ((i + 1) >> 1)) + 1 for i in range(0, treesize)]
# Keep track of parents in this tree (could probably be simpler, I welcome comments).
parent = [(i & ~((i^(i+1)) + 1)) | (((i ^ (i+1))+1) >> 1) for i in range(0, treesize)]
# Create a linked list for each distinct character
next = [-1] * size
head = {}
for i in range(len(word_1)-1, -1, -1): # go backwards
c = word_1[i]
# Add index at front of the linked list for this character
if c in head:
next[i] = head[c]
head[c] = i
# Main loop counting number of swaps needed for each letter
result = 0
for i, c in enumerate(word_2):
# Extract next occurrence of this letter from linked list
j = head[c]
head[c] = next[j]
# Get number of preceding characters with a binary tree lookup
p = j
index_of_letter = 0
while p < treesize:
if p >= j: # On or at right?
numLeft[p] -= 1 # Register that a letter has been removed at left side
if p <= j: # On or at left?
index_of_letter += numLeft[p] # Add the number of left-side letters
p = parent[p] # Walk up the tree
result += index_of_letter
return result
我编写了一个生成器函数来按顺序访问产品标签:
<product>
<item seq="division-sec">Division</item>
<item seq="dept-sec">Dept Info</item>
<item seq="label01">Label 01</item>
<item seq="label02">Label 02</item>
...
<item seq="labelN">Label N</item>
<item seq="date-mfg">27-11-2017</item>
<item seq="date-exp">28-11-2019</item>
</product>
现在我正在寻找一个函数或者可能是一个迭代器类,这将允许我获得它的第一个,最后一个,上一个,下一个和搜索方法:
def product_labels(xmlpage):
#... parsed xml here
for item in xmlpage:
#-- process item for validation such as case sensitivity, etc
yield item # ("division-sec", "Division")
因此假设我需要使用该对象,它应该像这样工作:
class ProdcutReader(object):
def __init__(self, product_labels):
self.product_labels = product_labels
def __iter__(self):
return self
def __prev__(self):
return prev(self.product_labels ) #-- Dont know how to do this :(
def __next__(self):
current = self.current(next(self.product_labels))
return current
def current(self, obj=None):
if not obj:
return self.first()
return obj
def first(self):
return list(self.product_labels)[0]
# search by label seq
def search(self, seq):
# Not sure if this is the correct way
for i in self.product_labels:
if i[0] == seq:
self.current(i)
return i
... # With some more methods (if search works I can have some more methods)
如果我能得到如何写上一个,下一个和搜索我将能够写出剩余的方法,如,第一,最后,当前等。
答案 0 :(得分:1)
您显然需要随时随机访问任何元素。在我看来,我不明白为什么你不能简单地使用清单。
如果您需要当前/下一个/上一个功能而无需跟踪计数器变量,您仍然可以将构造基于列表:
class ProductReader(object):
def __init__(self, product_labels):
self.generator = product_labels
self.active_generator = self.generator()
self.element = None
self.cur = -1
def at_n(self, index):
if self.cur > index:
self.active_generator = self.generator()
while len(self.storage) < index+1:
self.element = next(self.active_generator)
self.cur = index
return self.element
def current(self):
if self.cur > -1:
return self.element
def last(self):
try:
while True:
self.element = next(self.generator)
self.cur += 1
except StopIteration:
return self.element
def first(self):
return self.at_n(0)
def next(self):
return self.at_n(self.cur+1)
__next__ = next
def prev(self):
return self.at_n(self.cur-1)
def search(self, query):
oldcur = self.cur
self.active_generator = self.generator()
for i, element in enumerate(self.generator()):
if query in element:
self.cur = i
self.element = element
return element
self.at_n(self.cur) # reset to old state
return None