import pandas as pd import numpy as np import jieba import gensim from gensim.models import Word2Vec from gensim.models.word2vec import LineSentence from sklearn.metrics.pairwise import cosine_similarity model = Word2Vec(LineSentence(open('data.txt', 'r', encoding='utf8')), sg=0, vector_size=20, window=5, min_count=1, workers=4) # 词向量保存 model.wv.save_word2vec_format('data.vector', binary=False) # 模型保存 model.save('test.model') # 1 通过模型加载词向量(recommend) model = gensim.models.Word2Vec.load('test.model') dic = model.wv.index_to_key # 词袋的情况 print(dic) print(len(dic)) # 词向量 print("1", model.wv['髋关节']) print("2", model.wv.most_similar('髋关节', topn=10)) # 2 通过词向量加载 vector = gensim.models.KeyedVectors.load_word2vec_format('data.vector') print("3", vector['髋关节'])