当我尝试使用 ngrams 实现文本生成器时,我从 scipy 得到一个索引 -1 超出轴 0 的范围,大小为 0 错误。
Traceback (most recent call last):
File "C:\Users\hp\PycharmProjects\N-gram poems\trigram_model.py", line 125, in <module>
generate()
File "C:\Users\hp\PycharmProjects\N-gram poems\trigram_model.py", line 118, in generate
singleverse(int(c))
File "C:\Users\hp\PycharmProjects\N-gram poems\trigram_model.py", line 80, in singleverse
result = stats.multinomial.rvs(1, word_probabilities)
File "C:\Users\hp\PycharmProjects\N-gram poems\venv\lib\site-packages\scipy\stats\_multivariate.py", line 3242, in rvs
n, p, npcond = self._process_parameters(n, p)
File "C:\Users\hp\PycharmProjects\N-gram poems\venv\lib\site-packages\scipy\stats\_multivariate.py", line 3036, in _process_parameters
p[..., -1] = 1. - p[..., :-1].sum(axis=-1)
IndexError: index -1 is out of bounds for axis 0 with size 0
它在 for 循环中,每次发生错误时都会发生变化。有时它根本不会发生。它主要发生在接近程序结束时。
这是发生错误的代码:
def singleverse(num):
TrTrigrams = [((filtered_tokens[i], filtered_tokens[i + 1]), filtered_tokens[i + 2]) for i in
range(len(filtered_tokens) - 2)]
TrTrigramCFD = nltk.ConditionalFreqDist(TrTrigrams)
TrTrigramPbs = nltk.ConditionalProbDist(TrTrigramCFD, nltk.MLEProbDist)
rand = random.choice(random_choice_list)
start_word = ('<s>', rand)
data = []
for i in range(10):
probable_words = list(TrTrigramPbs[start_word].samples())
word_probabilities = [TrTrigramPbs[start_word].prob(word) for word in probable_words]
result = stats.multinomial.rvs(1, word_probabilities)
index_of_probable_word = list(result).index(1)
start_word = (start_word[1], (probable_words[index_of_probable_word]))
data.append(start_word[1])
line = []
for i in data:
if i != "<s>" and i != "</s>":
line.append(i)
poem_line = ' '.join([str(i) for i in line]).capitalize()
print(poem_line)
def generate():
"""Generates the final poem with user input of structure."""
print("What structure do you want?(e.g., 3 x 4, 2 x 4, 2 x 5): ")
while True:
try:
x, y, z = input().split()
except:
print("Enter the structure as shown above.")
continue
break
while True:
try:
for stanza in range(1):
for first_verse in range(1):
b = random.randint(7, 12)
firstverse(int(b))
for verse in range(int(z) - 1):
a = random.randint(7, 12)
singleverse(int(a))
print('\n')
for stanza in range(int(x) - 1):
for verse in range(int(z)):
c = random.randint(7, 12)
singleverse(int(c))
print('\n')
except KeyError:
print("This was not a valid seed word please try again.")
continue
break
generate()