import pandas as pd import numpy as np # 计算向量的余弦相似度 def cosine_similarity(u, v): dot_product = np.dot(u, v) norm_u = np.linalg.norm(u) norm_v = np.linalg.norm(v) similarity = dot_product / (norm_u * norm_v) return similarity # 读取 Excel 文件 df = pd.read_excel('../../data-dev/消化内科对话_已解决问题0510.xlsx', sheet_name='医生属性+') # 提取所需的列数据 columns = ['医生职位', '所属医院等级', '科室', '健康顾问(元)', '图文咨询', '指定咨询(元)', '采纳率', '回答总数', '好评数', '满意度'] data = df[columns].values # 需要计算相似度的行 rows = [35, 285, 158, 119, 217] # 计算相似度并保存到 Excel 文件中 similarity_df = pd.DataFrame(columns=['idx', '相似idx', '相似度']) for i in rows: row_i = data[i-1] for j in range(len(data)): if j != i: similarity = cosine_similarity(row_i, data[j]) similarity_df = similarity_df.append({'idx': i, '相似idx': j+1, '相似度': similarity}, ignore_index=True) # 将结果保存到 Excel 文件中 similarity_df.to_excel('../../data-dev/医生属性相似度2.xlsx', index=False) # import numpy as np # # # 计算向量的余弦相似度 # def cosine_similarity(u, v): # dot_product = np.dot(u, v) # norm_u = np.linalg.norm(u) # norm_v = np.linalg.norm(v) # similarity = dot_product / (norm_u * norm_v) # return similarity # # # 示例评分数据 # user1_ratings = [4, 5, 2, 0, 3] # user2_ratings = [3, 4, 0, 4, 5] # # # 计算两个用户的评分向量之间的余弦相似度 # similarity = cosine_similarity(user1_ratings, user2_ratings) # print(similarity)