Hello开发人员,
我正在尝试使用马尔科夫链建立一个聊天机器人,我遇到了问题。我在下面的代码中,我制作了一个从电影脚本中学习的随机句子生成器。问题是,如何让这个句子生成器不是随机的并响应用户的输入?我该怎么做呢?是否与输入/输出训练有关:
In: how are you today
Out: I'm good thanks how are you
这是我的代码。大多数函数用于将数据放入csv文件中,所以不要介意。
from collections import defaultdict
import random, itertools, nltk, pandas, csv, string, re, os, time
class Chatbot:
def __init__(self, name, txt_transcript_filedir, character=None):
self.name = name
self.txt_transcript_filedir = txt_transcript_filedir
self.character = character
print("Hello my name is " + name + ".")
def parse_transcript(self):
parsed_lines = []
self.csv_transcript_filedir = self.txt_transcript_filedir.replace('.txt', '.csv')
with open(self.txt_transcript_filedir, encoding='utf-8') as txt_file:
lines = txt_file.readlines()
for line in lines:
line = line.replace(', ', ' ')
line = re.sub(r'\[.*?\]', '', line)
if ': ' in line:
line = line.replace(': ', ',')
parsed_lines.append(line)
with open(self.csv_transcript_filedir, 'w', encoding='utf-8') as csv_file:
writer = csv.writer(csv_file)
writer.writerow(['person', 'text'])
for line in parsed_lines:
csv_file.write(line)
def tokenize_transcript(self):
csv_file = pandas.read_csv(self.csv_transcript_filedir)
textss = []
final_sents = []
if self.character == None:
texts = csv_file['text']
for text in texts:
sent = nltk.sent_tokenize(text)
textss.append(sent)
else:
char_sets = csv_file[csv_file['person'] == self.character]
texts = char_sets['text']
for text in texts:
sent = nltk.sent_tokenize(text)
textss.append(sent)
for text in textss:
for sent in text:
if sent[0] == ' ':
sent = sent[1:]
final_sent = [w for w in sent if w not in string.punctuation]
final_sent = ''.join(final_sent)
final_sents.append(final_sent)
self.training_data = [sent for sent in final_sents]
def learn(self):
self.parse_transcript()
self.tokenize_transcript()
self.make_word_dict(self.training_data)
def make_word_dict(self, text):
word_dict = defaultdict(list)
for sent in text:
words = nltk.word_tokenize(sent)
for i in range(len(words) - 1):
if i+2 >= (len(words)):
word_dict[(words[i], words[i+1])].append('<end>')
else:
word_dict[(words[i], words[i+1])].append(words[i+2])
self.vocabulary = word_dict
def generate_text(self, num):
for i in range(0, num):
start_key = random.choice(list(self.vocabulary.keys()))
text = []
text.append(start_key[0])
text.append(start_key[1])
for i in itertools.count():
key = (text[i], text[i+1])
if key[1] == '<end>':
break
else:
text.append(random.choice(self.vocabulary[text[i], text[i+1]]))
text = ' '.join(text)
if text.endswith('<end>'):
text = text[:-6]
text = text + '.'
return text
def say(self, text):
os.system('say -v Oliver ' + text)
def main():
num = 100
bot = Chatbot("J.A.R.V.I.S", "avengers_age_of_ultron.txt", "JARVIS")
bot.learn()
for i in range(num):
text = bot.generate_text(1)
print(text)
if __name__ == '__main__':
main()