import pandas as pd import numpy as np def find_top_similar_doctors(target_indices): # 计算向量的余弦相似度 def cosine_similarity(u, v): dot_product = np.dot(u, v) norm_u = np.linalg.norm(u) norm_v = np.linalg.norm(v) similarity = dot_product / (norm_u * norm_v) return similarity # 读取 Excel 文件 df = pd.read_excel('../../data-dev/消化内科对话_已解决问题0510.xlsx', sheet_name='医生属性+') # 提取所需的列数据 columns = ['医生职位', '所属医院等级', '科室', '健康顾问(元)', '图文咨询', '指定咨询(元)', '采纳率', '回答总数', '好评数', '满意度'] data = df[columns].values # 计算相似度并保存到结果列表中 result = [] # 计算相似度并保存到 Excel 文件中 similarity_df = pd.DataFrame(columns=['idx', '相似idx', '相似度']) for i in target_indices: row_i = data[i - 1] similarities = [] for j in range(len(data)): if j != i - 1: similarity = cosine_similarity(row_i, data[j]) similarities.append((j + 1, similarity)) similarities.sort(key=lambda x: x[1], reverse=True) top_five = similarities[:5] result.append({'idx': i, 'similarities': top_five}) # 将结果保存到 Excel 文件中 similarity_df.to_excel('../../data-dev/医生属性相似度2.xlsx', index=False) return result