应用错误收集

按照惯例，应将许可证数据保存在import os import pickle import numpy as np import gensim from gensim.models import Word2Vec, KeyedVectors from gensim.models.callbacks import CallbackAny2Vec import operator os.mkdir("model_dir") # class EpochSaver(CallbackAny2Vec): # '''Callback to save model after each epoch.''' # def __init__(self, path_prefix): # self.path_prefix = path_prefix # self.epoch = 0 # def on_epoch_end(self, model): # list_of_existing_files = os.listdir(".") # output_path = 'model_dir/{}_epoch{}.model'.format(self.path_prefix, self.epoch) # try: # model.save(output_path) # except: # model.wv.save_word2vec_format('model_dir/model_{}.bin'.format(self.epoch), binary=True) # print("number of epochs completed = {}".format(self.epoch)) # self.epoch += 1 # list_of_total_files = os.listdir(".") # saver = EpochSaver("my_finetuned") # function to load vectors from existing model. # I am loading glove vectors from a text file, benefit of doing this is that I get complete vocab of glove as well. # If you are using a previous word2vec model I would recommed save that in txt format. # In case you decide not to do it, you can tweak the function to get vectors for words in your vocab only. def load_vectors(token2id, path, limit=None): embed_shape = (len(token2id), 300) freqs = np.zeros((len(token2id)), dtype='f') vectors = np.zeros(embed_shape, dtype='f') i = 0 with open(path, encoding="utf8", errors='ignore') as f: for o in f: token, *vector = o.split(' ') token = str.lower(token) if len(o) <= 100: continue if limit is not None and i > limit: break vectors[token2id[token]] = np.array(vector, 'f') i += 1 return vectors # path of text file of your word vectors. embedding_name = "word2vec.txt" data = "<training data(new line separated tect file)>" # Dictionary to store a unique id for each token in vocab( in my case vocab contains both my vocab and glove vocab) token2id = {} # This dictionary will contain all the words and their frequencies. vocab_freq_dict = {} # Populating vocab_freq_dict and token2id from my data. id_ = 0 training_examples = [] file = open("{}".format(data),'r', encoding="utf-8") for line in file.readlines(): words = line.strip().split(" ") training_examples.append(words) for word in words: if word not in vocab_freq_dict: vocab_freq_dict.update({word:0}) vocab_freq_dict[word] += 1 if word not in token2id: token2id.update({word:id_}) id_ += 1 # Populating vocab_freq_dict and token2id from glove vocab. max_id = max(token2id.items(), key=operator.itemgetter(1))[0] max_token_id = token2id[max_id] with open(embedding_name, encoding="utf8", errors='ignore') as f: for o in f: token, *vector = o.split(' ') token = str.lower(token) if len(o) <= 100: continue if token not in token2id: max_token_id += 1 token2id.update({token:max_token_id}) vocab_freq_dict.update({token:1}) with open("vocab_freq_dict","wb") as vocab_file: pickle.dump(vocab_freq_dict, vocab_file) with open("token2id", "wb") as token2id_file: pickle.dump(token2id, token2id_file) # converting vectors to keyedvectors format for gensim vectors = load_vectors(token2id, embedding_name) vec = KeyedVectors(300) vec.add(list(token2id.keys()), vectors, replace=True) # setting vectors(numpy_array) to None to release memory vectors = None params = dict(min_count=1,workers=14,iter=6,size=300) model = Word2Vec(**params) # using build from vocab to build the vocab model.build_vocab_from_freq(vocab_freq_dict) # using token2id to create idxmap idxmap = np.array([token2id[w] for w in model.wv.index2entity]) # Setting hidden weights(syn0 = between input layer and hidden layer) = your vectors arranged accoring to ids model.wv.vectors[:] = vec.vectors[idxmap] # Setting hidden weights(syn0 = between hidden layer and output layer) = your vectors arranged accoring to ids model.trainables.syn1neg[:] = vec.vectors[idxmap] model.train(training_examples, total_examples=len(training_examples), epochs=model.epochs) output_path = 'model_dir/final_model.model' model.save(output_path)中，该数据通常包含所有特定于应用程序的数据。

来自Library Directory Details document：

包含所有特定于应用程序的数据和支持文件。这些是您的应用代表用户创建和管理的文件，并且可以包含包含用户数据的文件。

按照惯例，所有这些项目都应放在名称与应用程序包标识符匹配的子目录中。例如，如果您的应用名为MyApp，并且具有捆绑标识符com.example.MyApp，则可以将应用特定于用户的数据文件和资源放在〜/ Library / Application Support / com.example.MyApp /目录中。您的应用负责根据需要创建此目录。

应用程序运行所需的资源必须放置在应用程序捆绑包本身内。

系统默认值？

1 个答案: