import pandas as pd import thulac # 加载THULAC模型 thu = thulac.thulac(user_dict=None, model_path='E:/pythonTest/nlpTest/venv/Lib/site-packages/thulac/models') # 读取Excel文件 df = pd.read_excel(r'../Result/医生分词结果.xlsx', sheet_name='Sheet4') word_lists = df['分词后的融合'] # 对列表中的每个词语列表进行词性分析 result = {} for i, word_list in enumerate(word_lists): seg_list = thu.cut(word_list, text=True) pos_list = [pair[1] for pair in thu.cut(word_list)] result[i] = {"word_list": word_list, "seg_list": seg_list, "pos_list": pos_list} # 获取每个词语在原文本中的索引位置 start_idx = 0 end_idx = 0 for pair in thu.cut(word_list): word, pos = pair start_idx = word_list.find(word, end_idx) end_idx = start_idx + len(word) result[i]["pos_index"] = result[i].get("pos_index", []) + [[start_idx, end_idx]] output_df = pd.DataFrame.from_dict(result, orient="index") output_df.to_excel("../Result/医生分词结果词性表.xlsx", index=False)