model = Word2Vec(common_texts, size=100, window=5, min_count=1, workers=4)
方法一:model.save(path)
- 保存: model.save(path)
- 加載:Word2Vec.load(path)
- 模型可繼續(xù)訓(xùn)練
訓(xùn)練時(shí)流式的,這意味這句子可以時(shí)一個(gè)生成器葫松,動(dòng)態(tài)的從磁盤(pán)讀取輸入數(shù)據(jù)努释,而無(wú)需將整個(gè)語(yǔ)料庫(kù)加載到RAM中铃彰。這也意味著我們可以在以后繼續(xù)訓(xùn)練模型
使用以下命令初始化模型
from gensim.test.utils import common_texts, get_tmpfile
from gensim.models import Word2Vec
path = get_tmpfile("word2vec.model") #創(chuàng)建臨時(shí)文件
model = Word2Vec(sentences, size=100, window=5, min_count=1, workers=4)
model.save("word2vec.model")
#加載模型
model = Word2Vec.load("word2vec.model")
#繼續(xù)訓(xùn)練模型
model.train(["hello","world"], total_examples=1, epochs=1)
#模型調(diào)用
vector = model['computer'] # get numpy vector of a word
sims =model.wv.most_similar("computer",topn=10) #get other similar words
# 如果已經(jīng)完成了模型的培訓(xùn)(即不再更新花墩,只進(jìn)行查詢),可以切換到KeyedVectors實(shí)例
word_vectors = model.wv
del model
方法二: model.wv.save(path)
- 保存:model.wv.save(path)
- 加載:wv = KeyedVectors.load("model.wv", mmap='r')
- 模型不可繼續(xù)訓(xùn)練
from gensim.models import KeyedVectors
word_vectors = model.wv # Store just the words + their trained embeddings.
word_vectors.save("word2vec.wordvectors")
wv = KeyedVectors.load("word2vec.wordvectors", mmap='r') # Load back with memory-mapping = read-only, shared across processes.
vector = wv['computer'] # Get numpy vector of a word
link guid:https://radimrehurek.com/gensim/models/word2vec.html