T-SNE内存错误

时间:2018-05-09 07:33:35

标签: python machine-learning scikit-learn sklearn-pandas

我在拥有314k记录的数据集上运行tsne。我从数据集中取出了一列,即文本列并将其转换为单词包。当我运行时它给了我内存错误。任何人都可以帮忙解决这个问题吗?

from sklearn.manifold import TSNE
count_vect = CountVectorizer() #in scikit-learn
final_counts = count_vect.fit_transform(final['Text'].values)
labels = count_vect.get_feature_names()
model = TSNE(n_components=2, random_state = 0)
tsne_data = model.fit_transform(final_counts.todense())
tsne_data = np.vstack((tsne_data.T,labels)).T
tsne_df = pd.Dataframe(data=tsne_data,columns = ("D_1","D_2","label"))
# Ploting the result of tsne
sn.FacetGrid(tsne_df, hue="label", size=6).map(plt.scatter, "D_1", "D_2").add_legend()
plt.show()

错误:

---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
<ipython-input-44-1f788281948b> in <module>()
     13 # print(final_counts)
     14 model = TSNE(n_components=2, random_state = 0)
---> 15 tsne_data = model.fit_transform(final_counts.todense())
     16 tsne_data = np.vstack((tsne_data.T,labels)).T
     17 tsne_df = pd.Dataframe(data=tsne_data,columns = ("D_1","D_2","label"))

/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/base.py in todense(self, order, out)
    719             `numpy.matrix` object that shares the same memory.
    720         """
--> 721         return np.asmatrix(self.toarray(order=order, out=out))
    722 
    723     def toarray(self, order=None, out=None):

/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/compressed.py in toarray(self, order, out)
    962     def toarray(self, order=None, out=None):
    963         """See the docstring for `spmatrix.toarray`."""
--> 964         return self.tocoo(copy=False).toarray(order=order, out=out)
    965 
    966     ##############################################################

/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/coo.py in toarray(self, order, out)
    250     def toarray(self, order=None, out=None):
    251         """See the docstring for `spmatrix.toarray`."""
--> 252         B = self._process_toarray_args(order, out)
    253         fortran = int(B.flags.f_contiguous)
    254         if not fortran and not B.flags.c_contiguous:

/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/base.py in _process_toarray_args(self, order, out)
   1037             return out
   1038         else:
-> 1039             return np.zeros(self.shape, dtype=self.dtype, order=order)
   1040 
   1041     def __numpy_ufunc__(self, func, method, pos, inputs, **kwargs):

MemoryError: 

0 个答案:

没有答案