Question

def get_houseid_list():
    """Returns a list of all house ids from db"""
    print 'Building list of all HouseIDs...'
    houseid_list = []
    houseids = session.query(Episode.HouseID).all()
    for i in houseids:
        houseid_list.append(i[0])
    return houseid_list


def walkDir(top, ignore=[]):
    """Returns a complete list of files from a directory, recursing through subfolders"""
    print 'Building list of files...'
    fflist = []
    for root, dirs, files in os.walk(top):
        dirs[:] = [ dn for dn in dirs if dn not in ignore ]
        file_list =  [name for name in files if name[0] != '.']
        if len(file_list):
            for f in file_list:
                try:
                    houseid_parse(f)
                    print 'adding...', f
                    [fflist.append(join(root, f)) for f in file_list]
                except HouseIdException:
                    print 'skipping...', f
            print 'Found', len(file_list), 'files in', root
    return fflist


def get_nonmatches(houseid_list, finallist):
    print 'Comparing files to HouseIDs...'
    nonmatches = []
    for id in houseid_list:
        print 'Searching for files to match', id
        for f in finallist:
            if re.search(id, f, re.IGNORECASE):
                nonmatches.append(f)
    return nonmatches


def writeCSV(nonmatch):
    print 'Writing nonmatches to CSV...'
    csv.write('%s' % nonmatch)


if __name__ == "__main__":

    houseid_list = get_houseid_list()
    print len(houseid_list), 'HouseIDs found'
    wdirs = ['/Volumes/Assets/Projects']
    finallist = []
    for d in wdirs:
        fflist = walkDir(d)
    for f in fflist:
        nonmatches = get_nonmatches(houseid_list,f)
    print 'nonmatches', nonmatches

Answer 1

关于此代码的一些评论，我们等待您提供足够的信息来解决您的问题..

这很可怕取决于像这样的副作用

[fflist.append(join(root, f)) for f in file_list]

什么时候可以说

fflist.extend(join(root, f) for f in file_list)

但这看起来像是一个bug，你的意思是再次在那里迭代file_list吗？也许你只需要

fflist.append(join(root, f))

这部分似乎从它的效果中移除了条件

if len(file_list):
    for f in file_list:
        try:
            houseid_parse(f)
            print 'adding...', f
            [fflist.append(join(root, f)) for f in file_list]
        except HouseIdException:
            print 'skipping...', f
    print 'Found', len(file_list), 'files in', root

为什么不这样写呢？

for f in file_list:
    try:
        houseid_parse(f)
        print 'adding...', f
        fflist.append(join(root, f))
    except HouseIdException:
        print 'skipping...', f
if file_list:
    print 'Found', len(file_list), 'files in', root

如果你只想迭代fflist，也许你可以将walkDir变成一个生成器

def walkDir(top, ignore=[]):
    """Returns a generator for a complete list of files from a directory,
       recursing through subfolders"""
    for root, dirs, files in os.walk(top):
        dirs[:] = [ dn for dn in dirs if dn not in ignore ]
        file_list =  [name for name in files if name[0] != '.']
        for f in file_list:
            try:
                houseid_parse(f)
                print 'yielding...', f
                yield join(root, f)
            except HouseIdException:
                print 'skipping...', f
        if file_list:
            print 'Found', len(file_list), 'files in', root

现在也许你告诉我们程序的输出是什么以及为什么你确定它是一个无限循环而不只是花费很长时间才能运行。总之，我们可以告诉这一行

houseids = session.query(Episode.HouseID).all()

可能需要很长时间才能执行

为什么这会在永无止境的循环中继续存在？

1 个答案: