import re

# Regex to check that a cap exist in string.
pattern1 = re.compile(r'\d.*?[A-Z].*?[a-z]')
vocab = ['dog', 'lazy', 'the', 'fly'] # Imagine it's a longer list.

def check_no_caps(s):
    return None if re.match(pattern1, s) else s

def check_nomorethan_five(s):
    return s if len(s) <= 5 else None

def check_in_vocab_plus_x(s,x):
    # s and x are both str.
    return None if s not in vocab else s+x

slist = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
# filter with check_no_caps
slist = [check_no_caps(s) for s in slist]
# filter no more than 5.
slist = [check_nomorethan_five(s) for s in slist if s is not None]
# filter in vocab
slist = [check_in_vocab_plus_x(s, str(i)) for i,s in enumerate(slist) if s is not None]



slist = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
# filter with check_no_caps and no more than 5.
slist = (s2 check_no_caps(s1) for s1 in slist 
         for s2 in check_nomorethan_five(s1) if s1)
# filter in vocab
slist = [check_in_vocab_plus_x(s, str(i)) for i,s in enumerate(slist) if s is not None]


slist = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
slist = (s3 check_no_caps(s1) for s1 in slist 
         for s2 in check_nomorethan_five(s1) if s1
         for s3 in check_in_vocab_plus_x(s2, str(i)) if s2)

必须有更好的方法。 有没有办法让for循环链变得更快?



vocab = ['dog', 'lazy', 'the', 'fly'] # Imagine it's a longer list.

# note that first two functions can be combined in one
def no_caps_and_length(s):
    return s if s.islower() and len(s)<=5 else None

# this one is more complicated and cannot be merged with first two
# (not really, but as you say, some functions are rather complicated)
def check_in_vocab_plus_x(s,x):
    # s and x are both str.
    return None if s not in vocab else s+x

# now let's introduce a function that would pipe a string through all functions you need
def pipe_through_funcs(s):
    # yeah, here we have only two, but could be more
    funcs = [no_caps_and_length, check_in_vocab_plus_x]
    for func in funcs:
        if s == None: return s
        s = func(s)
    return s

slist = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
# final step:
slist = filter(lambda a: a!=None, map(pipe_through_funcs, slist))


vocab = ['dog', 'lazy', 'the', 'fly'] # Imagine it's a longer list.

# make a function that does all the checks for filtering
# you can make a big expression and return its result,
# or a sequence of ifs, or anything in-between,
# it won't affect performance,
# but make sure you put cheaper checks first
def my_filter(s):
    if len(s)>5: return False
    if not s.islower(): return False
    if s not in vocab: return False
    # maybe more checks here
    return True

# now we need modifying function
# there is a concern: if you need indices as they were in original list
# you might need to think of some way to pass them here
# as you iterate through filtered out list
def modify(s,x):
    s += x
    # maybe more actions
    return s

slist = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
# final step:
slist = map(modify, filter(my_filter, slist))


initial_list = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
new_list = []
for s in initial_list:
    processed = pipe_through_funcs(s)
    if processed != None: new_list.append(processed)

import random
slist = []
for i in range(0,100):

# Unified functions which have the same function description
# x is the value
# i is the counter from enumerate
def add(x, i):
    return x + 2

def replace(x, i):
    return int(str(x).replace('2', str(i)))

# Specifying your pipelines as a list of tuples 
# Where tuple is (filter function, transformer function)
_pipeline = [
    (lambda s: True, add),
    (lambda s: s % 2 == 0, replace),

# Execute your pipeline
for _filter, _fn in _pipeline:
    slist = map(lambda item: _fn(*item), enumerate(filter(_filter, slist)))

代码适用于python 2和python 3.不同之处在于Python3中的所有东西都返回一个生成器,因此它必须先执行才能执行。因此,您可以有效地对列表进行一次迭代。

<map object at 0x7f92b8315fd0>



def check1(s):
    if s.islower():
        return s

def check2(s):
    if len(s) < 5:
        return s

checks = [check1, check2]


l = ['dog', 'Cat', 'house', 'foo']


def checks_first(l, checks):
    for check in checks:
        l = filter(None, map(check, l))

    return list(l)

def strings_first(l, checks):
    res = []

    for item in l:
        for check in checks:
            item = check(item)
            if item is None:

    return res

您可以使用timeit module计算这两种方法。注意:您可能必须使用字符串的子集来及时获得这些结果。

import timeit

print(timeit.timeit('checks_first(l, checks)', setup='from __main__ import checks_first, checks, l', number=10))
print(timeit.timeit('strings_first(l, checks)', setup='from __main__ import strings_first, checks, l', number=10))



import functools

def time_func(func, timer_dict):

    def inner(*args, **kwargs):
        t0 = time.time()
        res = func(*args, **kwargs)
        timer_dict[func.__name__] += time.time() - t0
        return res

    return inner


from collections import defaultdict

timer_dict = defaultdict(lambda: 0)
checks = [time_func(check, timer_dict) for check in checks]


checks_first(l, checks)
strings_first(l, checks)


# {'check1': 0.41464924812316895, 'check2': 0.2684309482574463}

然后通过检查或分析确定昂贵检查中的瓶颈。后者可以通过使用time module计时行代码或使用类似this的行分析器来完成。


首先:我认为您的示例代码没有按照您的想法进行。结果是['the0', 'dog1', None, None, 'the4', 'fly5'],但我相信您不需要None值。




要对脚本进行概要分析,您可以按如下方式运行它: python -m cProfile <script-name>

我可以看到你可能做出的三个优化。首先,如果“词汇”中的所有单词都小于或等于5,则不需要检查“slist”中的单词是否小于或等于5,这意味着您可以删除整个for循环。第二个优化是,如果“vocab”中的所有单词都只是小写,并且您的单词比较算法区分大小写,那么您不需要检查“slist”中的单词是否区分大小写,这意味着您可以删除for loop。

这个原则的基本概括是,如果一个单词必须满足几个条件而一个条件意味着另一个条件(即如果你需要一个可被4和2整除的数字,你只需要检查它是否可以被4整除。) ,你可以删除隐含条件。



Efficient list intersection algorithm

Computing set intersection in linear time?

总之,你可以删除两个for循环,并减少循环的“词汇” - “slist”比较的时间复杂性。

  1. 内置函数可提高性能,从而在本机代码中保持更多工作。当您使用它们来调用lambda或其他python调用时,它们会失去大部分性能值。只使用本机内置函数完成任务时使用它们。 itertoolsoperatorfunctools模块可以为此提供很多帮助。
  2. 生成器主要帮助进行内存优化,您不希望一次将所有值保存在内存中。如果你可以在一次迭代中完成所有操作而不使用它们,那么它会更好并节省生成器开销。
  3. 我在具体示例中改变的另一件事是使用正则表达式。在这个大写字母的简单情况下,只需比较字符就可以快速进行。定期完成不是很好的表现可能会对表现造成危险,我倾向于避免它们在没有特定利益的情况下进行更复杂的比较。

    vocab = ['dog', 'lazy', 'the', 'fly'] # Imagine it's a longer list.
    def check_no_caps(s):
        for char in s:
            if 'A' <= char <= 'Z':
                return None
        return s
    def check_nomorethan_five(s):
        return s if len(s) <= 5 else None
    def check_in_vocab_plus_x(s, x):
        # s and x are both str.
        return None if s not in vocab else s + str(x)
    slist = ['the', 'dog', 'jumps', 'over', 'the', 'fly']
    result = [check_in_vocab_plus_x(check_nomorethan_five(check_no_caps(string)), i) for i, string in enumerate(slist)]