为了使用此GloVe module,我需要精确定义的共生矩阵形式。现在,我有这个:
def wwcoomtrx(inlist):
logging.info('libmatrix.py > wwcoomtrx')
vocabulary = {} # map terms to column indices
data = [] # values (maybe weights)
row = [] # row (document) indices
col = [] # column (term) indices
for i, doc in enumerate(inlist):
for term in doc:
# get column index, adding the term to the vocabulary if needed
j = vocabulary.setdefault(term, len(vocabulary))
data.append(1) # uniform weights
A = scipy.sparse.coo_matrix((data, (row, col)))
return A.T*A
这样的结果on my 2D list of words:
(0, 172) 3
(0, 171) 3
(0, 170) 3
(0, 169) 3
(0, 168) 3
(0, 167) 3
: :
(2389, 54) 4
(2389, 52) 1
(2389, 40) 3
(2389, 24) 1
(2389, 20) 7
(2389, 15) 1
cooccur = {
0: {
0: 1.0,
2: 3.5
1: {
2: 0.5
2: {
0: 3.5,
1: 0.5,
2: 1.2