有没有办法用我们的数据重新训练预训练的GoogleNews-vectors-negative300.bin模型?

时间:2018-02-15 13:56:11

标签: python word2vec gensim

func pageViewController(pageViewController: UIPageViewController, viewControllerBeforeViewController viewController: UIViewController) -> UIViewController? {

    let identifier = viewController.restorationIdentifier
    var index = self.pageTitles.indexOf(identifier!)!
    print("index back = \(index)")

    if index == 0 {
        currentPageCount = -1
        nextPageCount = index
    }

    if index == 0 || index == NSNotFound {
        return nil
    }

    index -= 1
    currentPageCount = index-1
    nextPageCount = index
    return self.viewControllerAtIndex(index)
}

func pageViewController(pageViewController: UIPageViewController, viewControllerAfterViewController viewController: UIViewController) -> UIViewController? {

    let identifier = viewController.restorationIdentifier
    var index = self.pageTitles.indexOf(identifier!)!

    if index == NSNotFound {
        return nil;
    }

    index += 1
    if index == self.pageTitles.count {
        return nil;
    }
    return self.viewControllerAtIndex(index)
} 

import os import gensim.models as g import logging import gensim os.chdir("/home/ai/path"); #doc2vec parameters vector_size = 300 window_size = 5 min_count = 1 sampling_threshold = 1e-5 negative_size = 5 train_epoch = 100 dm= 0 worker_count = 2 #number of parallel processes #pretrained word embeddings pretrained_emb = "GoogleNews-vectors-negative300.bin" #input corpus train_corpus = "mydata.txt" #output model saved_path = "Googlemodel.bin" #enable logging logging.basicConfig(format='%(asctime)s : %(levelname)s : % (message)s', level=logging.INFO) #train doc2vec model docs = g.doc2vec.TaggedLineDocument(train_corpus) model = g.Doc2Vec(docs, size=vector_size, window=window_size, min_count=min_count, sample=sampling_threshold, workers=worker_count, hs=0, dm=dm, negative=negative_size, dbow_words=1, dm_concat=1, pretrained_emb=pretrained_emb, iter=train_epoch) 的大小为3.6 GB,我的数据大小为 455 MB

运行此代码或培训流程完成后,我的输出模型仅提供 850 MB

0 个答案:

没有答案