我正在制作一个不和谐的机器人,每隔几秒就会将随机生成的句子喷射到聊天中。我试图使用nltk模块使句子更加连贯,但我抓住了一个错误并且无法弄明白。
import asyncio
import random
import discord.ext.commands
import markovify
import nltk
import re
class POSifiedText(markovify.Text):
def word_split(self, sentence):
words = re.split(self.word_split_pattern, sentence)
words = ["::".join(tag) for tag in nltk.pos_tag(words) ]
return words
def word_join(self, words):
sentence = " ".join(word.split("::")[0] for word in words)
return sentence
with open("/root/sample.txt") as f:
text = f.read()
text_model = POSifiedText(text, state_size=1)
client = discord.Client()
async def background_loop():
await client.wait_until_ready()
while not client.is_closed:
channel = client.get_channel('channelid')
messages = [(text_model.make_sentence(tries=8, max_overlap_total=10,default_max_overlap_ratio=0.5))]
await client.send_message(channel, random.choice(messages))
await asyncio.sleep(10)
client.loop.create_task(background_loop())
client.run("token")
来自输出日志的错误:
Traceback (most recent call last):
File "/root/untitled/Loop.py", line 21, in <module>
text_model = POSifiedText(text, state_size=1)
File "/usr/local/lib/python3.5/dist-packages/markovify/text.py", line 24, in __init__
runs = list(self.generate_corpus(input_text))
File "/root/untitled/Loop.py", line 11, in word_split
words = [": :".join(tag) for tag in nltk.pos_tag(words) ]
File "/usr/local/lib/python3.5/dist-packages/nltk/tag/__init__.py", line 129, in pos_tag
return _pos_tag(tokens, tagset, tagger)
File "/usr/local/lib/python3.5/dist-packages/nltk/tag/__init__.py", line 97, in _pos_tag
tagged_tokens = tagger.tag(tokens)
File "/usr/local/lib/python3.5/dist-packages/nltk/tag/perceptron.py", line 152, in tag
context = self.START + [self.normalize(w) for w in tokens] + self.END
File "/usr/local/lib/python3.5/dist-packages/nltk/tag/perceptron.py", line 152, in <listcomp>
context = self.START + [self.normalize(w) for w in tokens] + self.END
File "/usr/local/lib/python3.5/dist-packages/nltk/tag/perceptron.py", line 227, in normalize
elif word[0].isdigit():
IndexError: string index out of range
答案 0 :(得分:1)
word[0].isdigit():
抛出错误的事实意味着word
是一个空字符串。最可能的原因是你的正则表达式分裂有时会产生空字符串。
解决方案是在
之后words = re.split(self.word_split_pattern, sentence)
放行
words = [w for w in words if len(w) > 0]