我正在创建一个应用程序,该应用程序使用文本文件和tensorflow对其求和,并生成摘要,但需要文件并生成TypeError:字符串索引必须为整数错误。我该如何解决此问题。是否存在任何兼容性问题 在tensorflow 1.10和python 3.6版本之间。
wordtovecsummeriser.py文件采用input_parsex.txt文件对其中的内容进行总结。
from __future__ import print_function, division
import sys
import os
import time
import numpy as np
import tensorflow as tf
from scipy import spatial
from Summariser import Summariser
import useful_functions
from useful_functions import printlist, wait, PAPER_SOURCE, weight_variable, bias_variable
from AbstractNetPreprocessor import AbstractNetPreprocessor
#from Models.Word2VecClassifier.word2vec_mlp import graph, SAVE_PATH, WORD_DIMENSIONS
from word2vec_linearclassifier import graph
from operator import itemgetter
from sklearn import linear_model
from Evaluation.rouge import Rouge
MODEL_NAME = "Word2VecLinear"
SUMMARY_WRITE_LOC = "Data/Generated_Data/Generated_Summaries/Word2VecSummariser/"
NUMBER_OF_PAPERS = len([name for name in os.listdir(PAPER_SOURCE) if name.endswith(".txt")])
LOADING_SECTION_SIZE = NUMBER_OF_PAPERS / 30
SAVE_PATH = "Trained_Models/" + MODEL_NAME + "/word2vec_linearclass.ckpt"
# The dimensions of the word vectors
WORD_DIMENSIONS = 100
NUM_CLASSES = 2
summary2 =[]
# ===============================================
class Word2VecSummariser(Summariser):
"""
Implements a logistic regression summariser that used a logistic regression classifier to tell if sentences are
summary sentences or not.
"""
def __init__(self):
"""
ROUGE based summariser. This compares each sentence in the paper to the abstract to see which ones make the best
summaries for the abstract. It is assumed that these sentences will then also be good highlights for the paper.
"""
self.summary_length = 10
self.r = Rouge()
self.preprocessor = AbstractNetPreprocessor()
tf.reset_default_graph()
self.computation_graph = graph()
self.sentences_input = self.computation_graph["sentence_input"]
self.prediction_probs = self.computation_graph["prediction_probs"]
##self.keep_prob = self.computation_graph["keep_prob"]
def summarise(self, filename):
"""
Generates a summary of the paper.
:param filename: the name of the file to summaries
:return: a sumamry of the paper.
"""
paper = self.prepare_paper(filename)
# ========> Code from here on is summariser specific <========
with tf.Session() as sess:
#output = tf.matmul(sentence_input, sent_weight) + sent_bias
tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=output,labels=labels))
#opt = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess,SAVE_PATH)
sentences_and_summary_probs = []
num_sents = len(paper)
sentence_matrix = np.zeros((num_sents, WORD_DIMENSIONS), dtype=np.float32)
for i, item in enumerate(paper):
sentence_matrix[i, :] = item[1]
probs = sess.run(self.prediction_probs, feed_dict={self.sentences_input: sentence_matrix})
for i in range(num_sents):
sentence = paper[i][0]
sentence_vec = paper[i][1]
prob = probs[i][1]
sentences_and_summary_probs.append((sentence, sentence_vec, prob, i))
sentences_and_summary_probs = [x for x in reversed(sorted(sentences_and_summary_probs, key=itemgetter(2)))]
summary = sentences_and_summary_probs[0:self.summary_length]
ordered_summary = sorted(summary, key=itemgetter(-1))
global summary2
for sentence, sentence_vec, prob, pos in ordered_summary:
sentence = " ".join(sentence)
summary2.append((sentence, pos))
useful_functions.write_summary(SUMMARY_WRITE_LOC, summary2, filename.strip(".txt"))
'''for sentence in summary:
print(sentence)
print()'''
print(summary2)
def load_model(self):
"""
Loads the classification model
:return: the classification model
"""
pass
def prepare_paper(self, filename):
"""
Prepares the paper for summarisation.
:return: The paper in a form suitable for summarisation
"""
paper = self.preprocessor.prepare_for_summarisation(filename)
return paper
def sum_print():
return summary2
if __name__ == "__main__":
summ = Word2VecSummariser()
summ.summarise("Input_PapersX.txt")