我在拥有314k记录的数据集上运行tsne。我从数据集中取出了一列,即文本列并将其转换为单词包。当我运行时它给了我内存错误。任何人都可以帮忙解决这个问题吗?
from sklearn.manifold import TSNE
count_vect = CountVectorizer() #in scikit-learn
final_counts = count_vect.fit_transform(final['Text'].values)
labels = count_vect.get_feature_names()
model = TSNE(n_components=2, random_state = 0)
tsne_data = model.fit_transform(final_counts.todense())
tsne_data = np.vstack((tsne_data.T,labels)).T
tsne_df = pd.Dataframe(data=tsne_data,columns = ("D_1","D_2","label"))
# Ploting the result of tsne
sn.FacetGrid(tsne_df, hue="label", size=6).map(plt.scatter, "D_1", "D_2").add_legend()
plt.show()
错误:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-44-1f788281948b> in <module>()
13 # print(final_counts)
14 model = TSNE(n_components=2, random_state = 0)
---> 15 tsne_data = model.fit_transform(final_counts.todense())
16 tsne_data = np.vstack((tsne_data.T,labels)).T
17 tsne_df = pd.Dataframe(data=tsne_data,columns = ("D_1","D_2","label"))
/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/base.py in todense(self, order, out)
719 `numpy.matrix` object that shares the same memory.
720 """
--> 721 return np.asmatrix(self.toarray(order=order, out=out))
722
723 def toarray(self, order=None, out=None):
/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/compressed.py in toarray(self, order, out)
962 def toarray(self, order=None, out=None):
963 """See the docstring for `spmatrix.toarray`."""
--> 964 return self.tocoo(copy=False).toarray(order=order, out=out)
965
966 ##############################################################
/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/coo.py in toarray(self, order, out)
250 def toarray(self, order=None, out=None):
251 """See the docstring for `spmatrix.toarray`."""
--> 252 B = self._process_toarray_args(order, out)
253 fortran = int(B.flags.f_contiguous)
254 if not fortran and not B.flags.c_contiguous:
/root/anaconda3/lib/python3.6/site-packages/scipy/sparse/base.py in _process_toarray_args(self, order, out)
1037 return out
1038 else:
-> 1039 return np.zeros(self.shape, dtype=self.dtype, order=order)
1040
1041 def __numpy_ufunc__(self, func, method, pos, inputs, **kwargs):
MemoryError: