继续训练FastText模型

时间:2018-08-29 14:47:33

标签: python gensim fasttext

我已经下载了.bin FastText模型,并将其与gensim一起使用,如下所示:

model = FastText.load_fasttext_format("cc.fr.300.bin")

我想继续训练模型以使其适应我的领域。在检查了FastText's GithubGensim documentation之后,使用此人提议的modification(尚未合并)似乎是当前可行的方案。

我想念什么吗?

2 个答案:

答案 0 :(得分:1)

您可以继续使用Gensim的#!/usr/bin/python3 import sys import time import ctypes fullscreen = True sys.path.append("./shared") from sbmloader import SBMObject #from ktxloader import KTXObject #from textoverlay import OVERLAY_ from shader import shader_load, link_from_shaders from sbmath import m3dDegToRad, m3dRadToDeg, m3dTranslateMatrix44, m3dRotationMatrix44, \ m3dMultiply, m3dOrtho, m3dPerspective, rotation_matrix, translate, m3dScaleMatrix44, \ scale, m3dLookAt, normalize try: from OpenGL.GLUT import * from OpenGL.GL import * from OpenGL.GLU import * #from OpenGL.raw.GL.ARB.vertex_array_object import glGenVertexArrays, glBindVertexArray from OpenGL.raw.GL.ARB.bindless_texture import glGetTextureHandleARB, glMakeTextureHandleResidentARB except: print (''' ERROR: PyOpenGL not installed properly. ''') sys.exit() import numpy as np from math import cos, sin #import glm identityMatrix = [1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1] myobject = SBMObject() #ktxobject = KTXObject() #overlay = OVERLAY_() NUM_TEXTURES = 384 TEXTURE_LEVELS = 5 TEXTURE_SIZE = (1 << (TEXTURE_LEVELS - 1)) program = GLuint(0) class textures_: def __init__(self): self.name = GLuint(0) self.handle = GLuint64(0) textures = [textures_() for _ in range(1024)] class buffers_: transformBuffer = GLuint(0) textureHandleBuffer = GLuint(0) buffers = buffers_() class MATRICES_: view = (GLfloat * 16)(*identityMatrix) projection = (GLfloat * 16)(*identityMatrix) model = [(GLfloat * 16)(*identityMatrix) for _ in range(NUM_TEXTURES)] MATRICES = MATRICES_() import random random.seed (0x13371337) def random_uint(): return random.randint(0, 4294967295) def load_shaders(): global program shaders = [GLuint(0) for _ in range(2)] shaders[0] = shader_load("render.vs.glsl", GL_VERTEX_SHADER) shaders[1] = shader_load("render.fs.glsl", GL_FRAGMENT_SHADER) program = link_from_shaders(shaders, 2, True) class Scene: def __init__(self, width, height): global myobject global buffers tex_data = [0 for _ in range(32 * 32 * 4)] mutated_data = [0 for _ in range(32 * 32)] for i in range(0, 32): for j in range(0, 32): tex_data[i * 4 * 32 + j * 4] = (i ^ j) << 3 tex_data[i * 4 * 32 + j * 4 + 1] = (i ^ j) << 3 tex_data[i * 4 * 32 + j * 4 + 2] = (i ^ j) << 3 buffers.transformBuffer = glGenBuffers(1) glBindBuffer(GL_UNIFORM_BUFFER, buffers.transformBuffer) glBufferStorage(GL_UNIFORM_BUFFER, ctypes.sizeof(GLfloat) * 16 * 2 * NUM_TEXTURES, None, GL_MAP_WRITE_BIT) buffers.textureHandleBuffer = glGenBuffers(1) glBindBuffer(GL_UNIFORM_BUFFER, buffers.textureHandleBuffer) glBufferStorage(GL_UNIFORM_BUFFER, NUM_TEXTURES * ctypes.sizeof(GLuint64) * 2, None, GL_MAP_WRITE_BIT) pHandles = glMapBufferRange(GL_UNIFORM_BUFFER, 0, NUM_TEXTURES * ctypes.sizeof(GLuint64) * 2, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) pHandlesp = (GLuint64 * 2 * NUM_TEXTURES).from_address(pHandles) for i in range(0, NUM_TEXTURES): r = (random_uint() & 0xFCFF3F) << (random_uint() % 12) textures[i].name = glGenTextures(1) glBindTexture(GL_TEXTURE_2D, textures[i].name) glTexStorage2D(GL_TEXTURE_2D, TEXTURE_LEVELS, GL_RGBA8, TEXTURE_SIZE, TEXTURE_SIZE) for j in range(0, 32 * 32): mutated_data[j] = ( tex_data[j] & r) | 0x20202020 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, TEXTURE_SIZE, TEXTURE_SIZE, GL_RGBA, GL_UNSIGNED_BYTE, mutated_data) glGenerateMipmap(GL_TEXTURE_2D) textures[i].handle = glGetTextureHandleARB(textures[i].name) glMakeTextureHandleResidentARB(textures[i].handle) pHandlesp[i][0] = textures[i].handle glUnmapBuffer(GL_UNIFORM_BUFFER) load_shaders() myobject.load("torus_nrms_tc.sbm") def display(self): currentTime = time.time() last_time = 0.0 total_time = 0.0 f = currentTime proj_matrix = (GLfloat * 16)(*identityMatrix) proj_matrix = m3dPerspective(m3dDegToRad(70.0), float(self.width) / float(self.height), 0.1, 500.0); glViewport(0, 0, self.width, self.height) glClearBufferfv(GL_COLOR, 0, [0.0,0.0,0.0]) glClearBufferfi(GL_DEPTH_STENCIL, 0, 1.0, 0) glFinish() glBindBufferBase(GL_UNIFORM_BUFFER, 0, buffers.transformBuffer) pMatrices = glMapBufferRange(GL_UNIFORM_BUFFER, 0, ctypes.sizeof(GLfloat) * 16 * (NUM_TEXTURES+2), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) pMatricesp = (GLfloat * 16 * (NUM_TEXTURES+2)).from_address(pMatrices) T = (GLfloat * 16)(*identityMatrix) m3dTranslateMatrix44(T, 0.0, 0.0, -80.0) pMatricesp[0] = T pMatricesp[1] = proj_matrix angle = f angle2 = 0.7 * f angle3 = 0.1 * f for i in range (2, NUM_TEXTURES+2): T1 = (GLfloat * 16)(*identityMatrix) m3dTranslateMatrix44(T1, (i % 32) * 4.0 - 62.0, (i >> 5) * 6.0 - 33.0, 15.0 * sin(angle * 0.19) + 3.0 * cos(angle2 * 6.26) + 40.0 * sin(angle3)) RX = (GLfloat * 16)(*identityMatrix) m3dRotationMatrix44(RX, angle * m3dDegToRad(130.0), 1.0, 0.0, 0.0) RZ = (GLfloat * 16)(*identityMatrix) m3dRotationMatrix44(RZ, angle * m3dDegToRad(140.0), 0.0, 0.0, 1.0) pMatricesp[i] = m3dMultiply(T1, m3dMultiply(RX, RZ)) angle += 1.0 angle2 += 4.1 angle3 += 0.01 glUnmapBuffer(GL_UNIFORM_BUFFER) glFinish() glBindBufferBase(GL_UNIFORM_BUFFER, 1, buffers.textureHandleBuffer) glEnable(GL_DEPTH_TEST) glUseProgram(program) myobject.render(NUM_TEXTURES) glutSwapBuffers() def reshape(self, width, height): self.width = width self.height = height def keyboard(self, key, x, y ): global fullscreen print ('key:' , key) if key == b'\x1b': # ESC sys.exit() elif key == b'f' or key == b'F': #fullscreen toggle if (fullscreen == True): glutReshapeWindow(512, 512) glutPositionWindow(int((1360/2)-(512/2)), int((768/2)-(512/2))) fullscreen = False else: glutFullScreen() fullscreen = True def init(self): pass def timer(self, blah): glutPostRedisplay() glutTimerFunc( int(1/60), self.timer, 0) time.sleep(1/60.0) if __name__ == '__main__': glutInit() glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE | GLUT_DEPTH) glutInitWindowSize(512, 512) glutInitContextVersion(4,4) glutInitContextProfile(GLUT_CORE_PROFILE) w1 = glutCreateWindow('OpenGL SuperBible - Bindless Textures') glutInitWindowPosition(int((1360/2)-(512/2)), int((768/2)-(512/2))) fullscreen = False #glutFullScreen() scene = Scene(512,512) glutReshapeFunc(scene.reshape) glutDisplayFunc(scene.display) glutKeyboardFunc(scene.keyboard) glutIdleFunc(scene.display) #glutTimerFunc( int(1/60), scene.timer, 0) scene.init() glutMainLoop() 的某些版本进行培训(例如,v.3.7。*)。这是“ Loading, inferring, continuing training”的示例

fastText

由于某些原因,from gensim.test.utils import datapath model = load_facebook_model(datapath("crime-and-punishment.bin")) sent = [['lord', 'of', 'the', 'rings'], ['lord', 'of', 'the', 'semi-groups']] model.build_vocab(sent, update=True) model.train(sentences=sent, total_examples = len(sent), epochs=5) 在Windows上丢失,但在Mac的安装中存在。或者,可以使用gensim.models.fasttext.load_facebook_model()加载预先训练的模型并继续训练。

这里有各种pre-trained Wiki word models and vectors(或here)。

Another example。 “ 注意:与Word2Vec一样,您可以在使用Gensim的fastText本地实现时继续训练模型。

答案 1 :(得分:0)

尽管有一个与此问题相关的公开票证,但您可以找到here。尽管如此,官方的FastText实现目前不支持这一点。