Python - 将复杂的文本行读入字典

时间:2015-12-07 12:48:21

标签: python loops dictionary

我有一个巨大的术语列表,我想从文本文件中提取并将它们分组到以下组之一:动物,艺术,建筑物,车辆,人,人,食物,玻璃,瓶子,标牌,口号,DJ,派对。我目前在tester2文件中有四个单词:

I like sorbet
I am a man wearing a shirt  
Pizza is my favorite meal
formula 1 racing is awesome
steak

这是我的代码:

keyword_dictionary = {
    'Animal' : ['animal', 'dog', 'cat'],
    'Art' : ['art', 'sculpture', 'fearns'],
    'Buildings' : ['building', 'architecture', 'gothic', 'skyscraper'],
    'Vehicle' : ['car','formula','f-1','f1','f 1','f one','f-one','moped','mo ped','mo-ped','scooter'],
    'Person' : ['person','dress','shirt','woman','man','attractive','adult','smiling','sleeveless','halter','spectacles','button','bodycon'],
    'People' : ['people','women','men','attractive','adults','smiling','group','two','three','four','five','six','seven','eight','nine','ten','2','3','4','5','6','7','8','9','10'],
    'Food' : ['food','plate','chicken','steak','pizza','pasta','meal','asian','beef','cake','candy','food pyramid','spaghetti','curry','lamb','sushi','meatballs','biscuit','apples','meat','mushroom','jelly', 'sorbet','nacho','burrito','taco','cheese'],
    'Glass' : ['glass','drink','container','glasses','cup'],
    'Bottle' : ['bottle','drink'],
    'Signage' : ['sign','martini','ad','advert','card','bottles','logo','mat','chalkboard','blackboard'],
    'Slogan' : ['Luck is overrated'],
    'DJ' : ['dj','disc','jockey','mixer','instrument','turntable'],
    'Party' : ['party']
 }

def matcher(keywords, searcher):
            for key, words in keywords.iteritems():
                if searcher in words:
                   print key


    with open("tester2.txt") as termsdesk:
        for line in termsdesk:
            term = matcher(keyword_dictionary, line.strip())

我希望我的结果看起来像这样:

Food
Person
Food
Vehicle
Food

但我只是得到了这个:

Food

我想象的是,而是让我的代码完成它需要制作的精确匹配"类似于"火柴。我不确定如何实现这一点。是否可以使用" if"功能也许?

2 个答案:

答案 0 :(得分:1)

反转映射更有意义,效率更高:

keyword_dictionary = {'mo-ped': 'Vehicle', 'group': 'People', 'spaghetti': 'Food', 'f-1': 'Vehicle', '6': 'People',
                      '5': 'People', 'five': 'People', 'gothic': 'Buildings', 'seven': 'People', 'adults': 'People',
                      'burrito': 'Food', 'martini': 'Signage', 'f one': 'Vehicle', 'ten': 'People', 'instrument': 'DJ',
                      'dress': 'Person', 'drink': 'Bottle', 'mushroom': 'Food', 'cat': 'Animal', 'glass': 'Glass',
                      'animal': 'Animal', 'pizza': 'Food', 'formula': 'Vehicle', 'meal': 'Food', 'curry': 'Food',
                      '3': 'People', 'sign': 'Signage', 'f1': 'Vehicle', 'biscuit': 'Food', 'bottles': 'Signage',
                      'pasta': 'Food', 'card': 'Signage', 'sculpture': 'Art', '8': 'People', 'apples': 'Food', '9':
                          'People', 'nacho': 'Food', 'mat': 'Signage', 'bottle': 'Bottle', 'shirt': 'Person', 'halter':
                          'Person', 'jockey': 'DJ', 'six': 'People', 'beef': 'Food', 'party': 'Party', 'container': 'Glass',
                      'women': 'People', 'four': 'People', '10': 'People', 'attractive': 'Person', 'mo ped': 'Vehicle',
                      'blackboard': 'Signage', 'two': 'People', 'f-one': 'Vehicle', '4': 'People', 'car': 'Vehicle',
                      'cheese': 'Food', 'plate': 'Food', 'food': 'Food', 'smiling': 'Person', 'bodycon': 'Person',
                      'jelly': 'Food', 'button': 'Person', 'men': 'People', 'people': 'People', 'eight': 'People',
                      'sushi': 'Food', 'chalkboard': 'Signage', 'cake': 'Food', 'sorbet': 'Food', 'turntable': 'DJ',
                      '2': 'People', 'skyscraper': 'Buildings', 'nine': 'People', 'meatballs': 'Food', '7': 'People',
                      'art': 'Art', 'building': 'Buildings', 'sleeveless': 'Person', 'lamb': 'Food', 'disc': 'DJ',
                      'scooter': 'Vehicle', 'asian': 'Food', 'chicken': 'Food', 'food pyramid': 'Food', 'person':
                          'Person', 'ad': 'Signage', 'spectacles': 'Person', 'glasses': 'Glass', 'dog': 'Animal',
                      'logo': 'Signage', 'mixer': 'DJ', 'dj': 'DJ', 'architecture': 'Buildings', 'three': 'People',
                      'fearns': 'Art', 'taco': 'Food', 'f 1': 'Vehicle', 'steak': 'Food', 'cup': 'Glass', 'man':
                          'Person', 'woman': 'Person', 'advert': 'Signage', 'candy': 'Food', 'meat': 'Food',
                      'adult': 'Person', 'moped': 'Vehicle', 'Luck is overrated': 'Slogan'}

with open("test.txt") as termsdesk:
    for line in termsdesk:
        for word in line.split():
            if word in keyword_dictionary:
                print(keyword_dictionary[word])

输出:

Food  # sorbet
Person # man
Person # shirt
Food # meal
Vehicle # formula
Food # steak

如果你要走你的路线,你应该制作列表集,你需要迭代每个单词然后每个k,v配对:

keyword_dictionary = {
    'Animal' : {'animal', 'dog', 'cat'},
    'Art' : {'art', 'sculpture', 'fearns'},
    'Buildings' : {'building', 'architecture', 'gothic', 'skyscraper'},
    'Vehicle' : {'car','formula','f-1','f1','f 1','f one','f-one','moped','mo ped','mo-ped','scooter'},
    'Person' : {'person','dress','shirt','woman','man','attractive','adult','smiling','sleeveless','halter','spectacles','button','bodycon'},
    'People' : {'people','women','men','attractive','adults','smiling','group','two','three','four','five','six','seven','eight','nine','ten','2','3','4','5','6','7','8','9','10'},
    'Food' : {'food','plate','chicken','steak','pizza','pasta','meal','asian','beef','cake','candy','food pyramid','spaghetti','curry','lamb','sushi','meatballs','biscuit','apples','meat','mushroom','jelly', 'sorbet','nacho','burrito','taco','cheese'},
    'Glass' : {'glass','drink','container','glasses','cup'},
    'Bottle' : {'bottle','drink'},
    'Signage' : {'sign','martini','ad','advert','card','bottles','logo','mat','chalkboard','blackboard'},
    'Slogan' : {'Luck is overrated'},
    'DJ' : {'dj','disc','jockey','mixer','instrument','turntable'},
    'Party' : {'party'}
 }

def matcher(keywords, searcher):
    for word in searcher:
        for key, words in keywords.items():
            if word in words:
                print(key)
                break


with open("test.txt") as termsdesk:
    for line in termsdesk:
        matcher(keyword_dictionary, line.split())

输出:

Food
Person
Person
Food
Vehicle
Food

您的功能不会返回任何内容,因此设置
            term = matcher(....设置的字词等于None

比较你的逻辑,使用set作为值并反转映射:

您的代码将涉及遍历每一行和单词,然后遍历您的dict中的每个键和值以及另一个0(n)循环,使用您的列表查找值列表中的每个单词。

使用sets作为值,除了删除最后O(n)次搜索,用O(1)集查找替换它时,所有内容都与您自己的逻辑相同。

第一个代码只是循环遍历每一行和单词,并且通过常量工作检查单词是否为dict,如果单词在dict中,则获取每个值,因此效率更高。

如果您认为任何数量的匹配只是一个,您可以看到每个值的单词列表不是disjoint

keyword_dictionary = {
    'Animal' : {'animal', 'dog', 'cat'},
    'Art' : {'art', 'sculpture', 'fearns'},
    'Buildings' : {'building', 'architecture', 'gothic', 'skyscraper'},
    'Vehicle' : {'car','formula','f-1','f1','f 1','f one','f-one','moped','mo ped','mo-ped','scooter'},
    'Person' : {'person','dress','shirt','woman','man','attractive','adult','smiling','sleeveless','halter','spectacles','button','bodycon'},
    'People' : {'people','women','men','attractive','adults','smiling','group','two','three','four','five','six','seven','eight','nine','ten','2','3','4','5','6','7','8','9','10'},
    'Food' : {'food','plate','chicken','steak','pizza','pasta','meal','asian','beef','cake','candy','food pyramid','spaghetti','curry','lamb','sushi','meatballs','biscuit','apples','meat','mushroom','jelly', 'sorbet','nacho','burrito','taco','cheese'},
    'Glass' : {'glass','drink','container','glasses','cup'},
    'Bottle' : {'bottle','drink'},
    'Signage' : {'sign','martini','ad','advert','card','bottles','logo','mat','chalkboard','blackboard'},
    'Slogan' : {'Luck is overrated'},
    'DJ' : {'dj','disc','jockey','mixer','instrument','turntable'},
    'Party' : {'party'}
 }

def matcher(keywords, searcher):
     for key, words in keywords.items():
        if not words.isdisjoint(searcher):
            print(key)



with open("test.txt") as termsdesk:
    for line in termsdesk:
        matcher(keyword_dictionary, line.split())

输出:

Food
Person
Food
Vehicle
Food

如果每行只能获得一个匹配项,要将相同的逻辑应用于反向映射方法,您只需添加break

with open("test.txt") as termsdesk:
    for line in termsdesk:
        for word in line.split():
            if word in keyword_dictionary:
                print(keyword_dictionary[word])
                break

输出:

Food
Person
Food
Vehicle
Food

答案 1 :(得分:-1)

您是否尝试过以下操作?

  config.assets.raise_runtime_errors = true
  config.action_mailer.default_url_options = { host: 'localhost', port: '3000' }
  config.action_mailer.delivery_method = :smtp
  config.action_mailer.perform_deliveries = true 
  config.action_mailer.smtp_settings = {
      :enable_starttls_auto => true,
      :address => "webmail.xxx.com",
      :port => 25,
      # :domain => "xxx.com",
      :authentication => :login,
      :user_name => 'xxx',
      :password => 'xxx',
      :openssl_verify_mode  => 'none'
  }