我想找到Substring
的确切string
。
import string
a=['accept','freed*','partie*','accepta*','freeing','party*']
sent="i am accepting your invitation for the party"
token=sent.split(" ")
for j in range(0,len(a)):
for i in range(0,len(token)):
if(token[i].find(a[j])==0):
print "found",token[i],a[j],token[i].find(a[j])
输出:
> found accepting accept 0
期望的输出:
> found accepting accept 0
> found part party* 0
我已经尝试过很多,使用re.search(),index()..但是我没有得到所需的输出。如果有人知道这一点,请求帮助我吗?
溶液:
import operator,csv,re
from collections import defaultdict
def post_features(inpt_word_first_char):
input_file="/home/user/Thesis/BOOKS/Features/Posemo.csv"
match_words=[]
fin=open(input_file,"r")
read_list=fin.read()
match_words=[word for word in read_list.split() if word.startswith(inpt_word_first_char)]
return match_words
matches = defaultdict(list)
input_line="I am accepting your invitation for the party"
input_line=input_line.lower()
input_words=input_line.split(" ")
for i in range(0,len(input_words)):
inpt_word_first_char=input_words[i][0]
match_words=post_features(inpt_word_first_char)
match_words1=[]
for k in range (0,len(match_words)):
match_words1.append(match_words[k].rstrip("*"))
for match in match_words1:
if match in input_words[i] :
if((len(input_words[i])>=len(match) and len(match)>2) or len(match)==len(input_words[i])):
match_perc=map(operator.eq,input_words[i],match).count(True)
matches[input_words[i]].append([match,match_perc])
##print matches
for word,match_percentage in matches.iteritems():
print('Key: {} - Matched word : {}'.format(word,max(match_percentage[match_percentage.index(max(match_percentage))])))
答案 0 :(得分:1)
您可以使用简单的比较
a="namit"
b="amit"
if b in a:
print("found")
因此,您不必拆分已发送的字符串,只需为
运行循环for x in a:
if x in sent:
print("found",x)
答案 1 :(得分:1)
这是另一种只过滤匹配的键的方法:
import re
needles = ['accept','freed','partie','accepta','freeing','party']
haystack = "I am accepting your invitation for the party."
words = re.findall(r'(\w+)', haystack)
results = [(word, key) for key in needles for word in words if key in word]
# Or, the long way
results = []
for key in needles:
for word in words:
if key in word:
results.append((word, key))
for word,key in results:
print('Found {} {}'.format(word, key))
如果您想知道密钥匹配的次数,那么您需要采用不同的方法:
import re
from collections import defaultdict
matches = defaultdict(list)
needles = ['accept','freed','partie','accepta','freeing','party']
haystack = "I am accepting your invitation for the party. No, really, I accept!"
words = re.findall(r'(\w+)', haystack)
for key in needles:
for word in words:
if key in word:
matches[key].append(word)
for key, found in matches.iteritems():
print('Key: {} - Total Matches: {}'.format(key, len(found)))
for match in found:
print('\t{}'.format(match))
以下是一个例子:
>>> needles
['accept', 'freed', 'partie', 'accepta', 'freeing', 'party', 'problem']
>>> haystack
'My party had two problems. One problem, and another problem. Too many people accepted the invitation to this party!'
>>> matches = defaultdict(list)
>>> words = re.findall(r'(\w+)', haystack)
>>> for key in needles:
... for word in words:
... if key in word:
... matches[key].append(word)
...
>>> for key, found in matches.iteritems():
... print('Key: {} - Total Matches: {}'.format(key, len(found)))
... for match in found:
... print('\t{}'.format(match))
...
Key: party - Total Matches: 2
party
party
Key: problem - Total Matches: 3
problems
problem
problem
Key: accept - Total Matches: 1
accepted