
时间:2016-02-19 21:35:13

标签: python list for-loop iteration nested-loops


from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

diffs =[
"""- It contains a Title II provision that changes the age at which workers
compensation/public disability offset ends for disability beneficiaries from age 65 to full retirement age (FRA).""",
"""+ It contains a Title II provision that changes the age at which workers 
compensation/public disability offset ends for disability beneficiaries from age 68 to full retirement age (FRA).""",
"""+ Here's a new paragraph I added for testing."""]

for s in diffs:
    others = [i for i in diffs if i != s]
    for j in others:
        if similar(s, j) > 0.7:
            print '"{}" and "{}" refer to the same sentence'.format(s, j)
            print '"{}" is a new sentence'.format(s)



"- It contains a Title II provision that changes the age at which workers
compensation/public disability offset ends for disability beneficiaries from age 65 to full retirement age (FRA)." and "+ It contains a Title II provision that changes the age at which workers 
compensation/public disability offset ends for disability beneficiaries from age 68 to full retirement age (FRA)." refer to the same sentence

"- It contains a Title II provision that changes the age at which workers
compensation/public disability offset ends for disability beneficiaries from age 65 to full retirement age (FRA)." is a new sentence
"+ Here's a new paragraph I added for testing." is a new sentence



3 个答案:

答案 0 :(得分:1)

from difflib import SequenceMatcher
from collections import defaultdict

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

diffs =[
"""- It contains a Title II provision that changes the age at which workers
compensation/public disability offset ends for disability beneficiaries from age 65 to full retirement age (FRA).""",
"""+ It contains a Title II provision that changes the age at which workers 
compensation/public disability offset ends for disability beneficiaries from age 68 to full retirement age (FRA).""",
"""+ Here's a new paragraph I added for testing."""]

sims = set()
simdict = defaultdict(list)
for i in range(len(diffs)):
    if i in sims:
    s = diffs[i]

    for j in range(i+1, len(diffs)):
        r = diffs[j]
        if similar(s, r) > 0.7:

for k, v in simdict.iteritems():
    print diffs[k] + " is similar to:"
    print '\n'.join(diffs[e] for e in v)

答案 1 :(得分:0)


print '"{}" is a new sentence'.format(s)

print '"{}" and "{}" are different sentences'.format(s,j)


答案 2 :(得分:0)

由于修改后的字符串将始终背靠背显示(一个前面带有' - ',另一个'+'和' - ',所以可以做到(我相信它会适用于所有情况)。


def extract_modified_and_new(diffs):
    for z1, z2 in zip(diffs[::2], diffs[1::2]):
        if similar(z1, z2) > 0.7:
            print z1, 'is similar to', z2
            print z1, ' and ', z2, 'are new'
    if len(diffs) % 2 != 0:
            print diffs[-1], ' is new'