我有这个类通过对文本中所有向量的单词向量求平均来构建分类特征
class MeanEmbeddingVectorizer(object):
def __init__(self, word2vec):
self.word2vec = word2vec
self.dim = len(word2vec.itervalues().next())
def fit(self, X, y):
return self
def transform(self, X):
return np.array([
np.mean([self.word2vec[w] for w in words if w in self.word2vec]
or [np.zeros(self.dim)], axis=0)
for words in X
])
所以我的问题是如何将此类的输出打印到屏幕或将其保存到文件中 谢谢
答案 0 :(得分:1)
您可以在最后添加print
语句,但仍需要创建属性word2vec
import numpy as np
class MeanEmbeddingVectorizer(object):
def __init__(self, word2vec):
self.word2vec = word2vec
self.dim = len(word2vec.itervalues().next())
def fit(self, X, y):
return self
def transform(self, X):
return np.array([
np.mean([self.word2vec[w] for w in words if w in self.word2vec]
or [np.zeros(self.dim)], axis=0)
for words in X
])
print(MeanEmbeddingVectorizer.fit("X", "Y", "Z"))
会给你输出
X
None
但如果你跑
print(MeanEmbeddingVectorizer.transform("X", "Y"))
你得到了
AttributeError: 'str' object has no attribute 'word2vec'