skip-thought Theano模型 - 未声明的列表

时间:2016-02-05 15:46:47

标签: python list definition theano

我已经定义了一个列表X来获取文件的内容并提供Theano中skip-though模型的预处理器。错误显示X is not defined

https://github.com/ryankiros/skip-thoughts/blob/master/skipthoughts.py

def encode(model, X, use_norm=True, verbose=True, batch_size=128, use_eos=False):
    """
    Encode sentences in the list X. Each entry will return a vector
    """
    # first, do preprocessing
    X = preprocess(X)

    # word dictionary and init
    d = defaultdict(lambda : 0)
    for w in model['utable'].keys():
        d[w] = 1
    ufeatures = numpy.zeros((len(X), model['uoptions']['dim']), dtype='float32')
    bfeatures = numpy.zeros((len(X), 2 * model['boptions']['dim']), dtype='float32')

    # length dictionary
    ds = defaultdict(list)
    captions = [s.split() for s in X]
    for i,s in enumerate(captions):
        ds[len(s)].append(i)


def preprocess(text):
    """
    Preprocess text for encoder
    """
    X = []

#For _chat.txt to list_______________________________________
    f= open("/var/www/html/_chat.txt", "r")
    file_list = f.readlines()
    first_three = file_list[1:3]
    X = [x.replace('\t',' ') for x in first_three]
    X = [x.replace('\x00','') for x in X]
    X = [x.replace(' \r\n','') for x in X]
#______________________________________________________________

    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    for t in text:
        sents = sent_detector.tokenize(t)
        result = ''
        for s in sents:
            tokens = word_tokenize(s)
            result += ' ' + ' '.join(tokens)
        X.append(result)
    return X

0 个答案:

没有答案