所以我有这个代码训练和分类意图工作正常。但我已将此代码转换为类,现在我得到以下错误:
ValueError:无法为Tensor'InputData / X:0'提供形状值(1,568),其形状为'(?,1180)'
这是我的代码:
from __future__ import unicode_literals
from __future__ import unicode_literals
import psycopg2
import nltk
from nltk.stem.lancaster import LancasterStemmer
import os
import json
import datetime
import csv
import spacy
import json
import re
import os
import ast
from pathlib import Path
import json
import argparse
import unidecode
import numpy as np
import time
import sys
import tflearn
import tensorflow as tf
import random
nlp = spacy.load('en')
class Intent_Processing(object):
def __init__(self):
self.stemmer = LancasterStemmer()
simpletext=[]
THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
my_file = 'data.csv'
with open(my_file,'r') as simple:
sometext = csv.reader(simple)
training_data = []
for row in sometext:
training_data.append({"class":row[1], "sentence":row[0]})
words=[]
self.classes=[]
documents=[]
ignore_words=['?']
#loop through each sentence in our training data
for pattern in training_data:
#tokenize in each word in the sentence
w=nltk.word_tokenize(pattern['sentence'])
#add to our words list
words.extend(w)
#add to documents in our corpus
documents.append((w,pattern['class']))
#add to our classes list
if pattern['class'] not in self.classes:
self.classes.append(pattern['class'])
# print(self.classes)
#stem and lower each word and remove duplicate
words=[self.stemmer.stem(w.lower()) for w in words if w not in ignore_words]
self.words=list(set(words))
#remove duplicates
self.classes=list(set(self.classes))
# print(self.classes)
print(len(documents)," documents")
print(len(self.classes), " classes", self.classes)
# print(len(self.words)," unique stemmed words", words)
# create our training data
self.training = []
self.output = []
# create an empty array for our output
output_empty = [0] * len(self.classes)
# training set, bag of words for each sentence
for doc in documents:
# initialize our bag of words
bag = []
# list of tokenized words for the pattern
pattern_words = doc[0]
# stem each word
pattern_words = [self.stemmer.stem(word.lower()) for word in pattern_words]
# create our bag of words array
for w in words:
bag.append(1) if w in pattern_words else bag.append(0)
# print(bag)
# output is a '0' for each tag and '1' for current tag
output_row = list(output_empty)
output_row[self.classes.index(doc[1])] = 1
# print(output_row)
self.training.append([bag, output_row])
# shuffle our features and turn into np.array
random.shuffle(self.training)
self.training = np.array(self.training)
# create train and test lists
self.train_x = list(self.training[:,0])
self.train_y = list(self.training[:,1])
# reset underlying graph data
tf.reset_default_graph()
# Build neural network
net = tflearn.input_data(shape=[None, len(self.train_x[0])])
print(net)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(self.train_y[0]), activation='softmax')
net = tflearn.regression(net)
# # Define model and setup tensorboard
self.model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')
def clean_up_sentence(self,sentence):
#tokenize the pattern
sentence_words = nltk.word_tokenize(sentence)
#stem each word
sentence_words=[self.stemmer.stem(word.lower()) for word in sentence_words]
return sentence_words
#return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(self,sentence, words, show_details=False):
#tokenize the pattern
sentence_words=self.clean_up_sentence(sentence)
#bag of words
self.bag=[0]*len(self.words)
for s in sentence_words:
for i,w in enumerate(self.words):
if w == s:
self.bag[i] = 1
if show_details:
print ("found in bag: %s" % w)
return(np.array(self.bag))
def train(self):
# Start training (apply gradient descent algorithm)
self.model.fit(self.train_x, self.train_y, n_epoch=10, batch_size=8, show_metric=True)
self.model.save('trained_model/model.tflearn')
def Start_Training(self):
start_time = time.time()
self.train()
elapsed_time = time.time() - start_time
print ("processing time:", elapsed_time, "seconds")
def classify(self,sentence, show_details=False):
# probability threshold
ERROR_THRESHOLD = 0.25
# generate probabilities from the model
results = self.model.predict([self.bow(sentence, self.words)])[0]
# filter out predictions below a threshold
results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
# sort by strength of probability
results.sort(key=lambda x: x[1], reverse=True)
return_list = []
for r in results:
return_list.append((classes[r[0]], r[1]))
# return tuple of intent and probability
print(return_list)
return return_list
data.csv采用以下格式:
侨,问候
的Bonjour,问候
ALOHA,问候
ADIOS,问候
由于
答案 0 :(得分:0)
我认为错误是这一行
net = tflearn.input_data(shape=[None, len(self.train_x[0])])
第二个形状参数应该是568但是1180.也许你打算写:
net = tflearn.input_data(shape=[None, len(self.train_x[1])])
否则,如果您的模型允许,您可以将长度硬编码为
net = tflearn.input_data(shape=[None, 568)