ValueError:无法为Tensor'InputData / X:0'提供形状值(1,568),其形状为'(?,1180)'

时间:2018-03-09 12:04:24

标签: python numpy tensorflow nltk tflearn

所以我有这个代码训练和分类意图工作正常。但我已将此代码转换为类,现在我得到以下错误:

ValueError:无法为Tensor'InputData / X:0'提供形状值(1,568),其形状为'(?,1180)'

这是我的代码:

from __future__ import unicode_literals
from __future__ import unicode_literals
import psycopg2
import nltk
from nltk.stem.lancaster import LancasterStemmer
import os
import json
import datetime
import csv
import spacy
import json
import re
import os
import ast
from pathlib import Path

import json
import argparse
import unidecode
import numpy as np
import time
import sys
import tflearn
import tensorflow as tf
import random


nlp = spacy.load('en')


class Intent_Processing(object):
    def __init__(self):

        self.stemmer = LancasterStemmer()


        simpletext=[]
        THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
        my_file = 'data.csv'
        with open(my_file,'r') as simple:

            sometext = csv.reader(simple)

            training_data = []
            for row in sometext:
                training_data.append({"class":row[1], "sentence":row[0]})


        words=[]
        self.classes=[]
        documents=[]
        ignore_words=['?']

        #loop through each sentence in our training data
        for pattern in training_data:
            #tokenize in each word in the sentence
            w=nltk.word_tokenize(pattern['sentence'])
            #add to our words list
            words.extend(w)
            #add to documents in our corpus
            documents.append((w,pattern['class']))
            #add to our classes list
            if pattern['class'] not in self.classes:
                self.classes.append(pattern['class'])
            # print(self.classes)   
        #stem and lower each word and remove duplicate
        words=[self.stemmer.stem(w.lower()) for w in words if w not in ignore_words]
        self.words=list(set(words))
        #remove duplicates
        self.classes=list(set(self.classes))
        # print(self.classes)
        print(len(documents)," documents")
        print(len(self.classes), " classes", self.classes)
        # print(len(self.words)," unique stemmed words", words)


        # create our training data
        self.training = []
        self.output = []
        # create an empty array for our output
        output_empty = [0] * len(self.classes)

        # training set, bag of words for each sentence
        for doc in documents:
            # initialize our bag of words
            bag = []
            # list of tokenized words for the pattern
            pattern_words = doc[0]
            # stem each word
            pattern_words = [self.stemmer.stem(word.lower()) for word in pattern_words]
            # create our bag of words array
            for w in words:
                bag.append(1) if w in pattern_words else bag.append(0)
            # print(bag)
            # output is a '0' for each tag and '1' for current tag
            output_row = list(output_empty)
            output_row[self.classes.index(doc[1])] = 1
            # print(output_row)
            self.training.append([bag, output_row])



        # shuffle our features and turn into np.array
        random.shuffle(self.training)
        self.training = np.array(self.training)

        # create train and test lists
        self.train_x = list(self.training[:,0])
        self.train_y = list(self.training[:,1])

        # reset underlying graph data
        tf.reset_default_graph()
        # Build neural network
        net = tflearn.input_data(shape=[None, len(self.train_x[0])])
        print(net)

        net = tflearn.fully_connected(net, 8)
        net = tflearn.fully_connected(net, 8)
        net = tflearn.fully_connected(net, len(self.train_y[0]), activation='softmax')
        net = tflearn.regression(net)

        # # Define model and setup tensorboard
        self.model = tflearn.DNN(net, tensorboard_dir='tflearn_logs')

    def clean_up_sentence(self,sentence):
        #tokenize the pattern
        sentence_words = nltk.word_tokenize(sentence)
        #stem each word
        sentence_words=[self.stemmer.stem(word.lower()) for word in sentence_words]
        return sentence_words

    #return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
    def bow(self,sentence, words, show_details=False):
        #tokenize the pattern
        sentence_words=self.clean_up_sentence(sentence)
        #bag of words
        self.bag=[0]*len(self.words)
        for s in sentence_words:
            for i,w in enumerate(self.words):
                if w == s: 
                    self.bag[i] = 1
                    if show_details:
                        print ("found in bag: %s" % w)
        return(np.array(self.bag))



    def train(self):

        # Start training (apply gradient descent algorithm)
        self.model.fit(self.train_x, self.train_y, n_epoch=10, batch_size=8, show_metric=True)
        self.model.save('trained_model/model.tflearn')


    def Start_Training(self):

        start_time = time.time()

        self.train()
        elapsed_time = time.time() - start_time
        print ("processing time:", elapsed_time, "seconds") 


    def classify(self,sentence, show_details=False):
        # probability threshold
        ERROR_THRESHOLD = 0.25

        # generate probabilities from the model
        results = self.model.predict([self.bow(sentence, self.words)])[0]
        # filter out predictions below a threshold
        results = [[i,r] for i,r in enumerate(results) if r>ERROR_THRESHOLD]
        # sort by strength of probability
        results.sort(key=lambda x: x[1], reverse=True)
        return_list = []
        for r in results:
            return_list.append((classes[r[0]], r[1]))
        # return tuple of intent and probability
        print(return_list)
        return return_list

data.csv采用以下格式:

侨,问候

的Bonjour,问候

ALOHA,问候

ADIOS,问候

由于

1 个答案:

答案 0 :(得分:0)

我认为错误是这一行

net = tflearn.input_data(shape=[None, len(self.train_x[0])])

第二个形状参数应该是568但是1180.也许你打算写:

net = tflearn.input_data(shape=[None, len(self.train_x[1])])

否则,如果您的模型允许,您可以将长度硬编码为

net = tflearn.input_data(shape=[None, 568)