1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
import numpy as np
# 计算向量的余弦相似度
def cosine_similarity(u, v):
dot_product = np.dot(u, v)
norm_u = np.linalg.norm(u)
norm_v = np.linalg.norm(v)
similarity = dot_product / (norm_u * norm_v)
return similarity
# 读取 Excel 文件
df = pd.read_excel('../../data-dev/消化内科对话_已解决问题0510.xlsx', sheet_name='医生属性+')
# 提取所需的列数据
columns = ['医生职位', '所属医院等级', '科室', '健康顾问(元)', '图文咨询', '指定咨询(元)', '采纳率', '回答总数', '好评数', '满意度']
data = df[columns].values
# 需要计算相似度的行
rows = [35, 285, 158, 119, 217]
# 计算相似度并保存到 Excel 文件中
similarity_df = pd.DataFrame(columns=['idx', '相似idx', '相似度'])
for i in rows:
row_i = data[i-1]
for j in range(len(data)):
if j != i:
similarity = cosine_similarity(row_i, data[j])
similarity_df = similarity_df.append({'idx': i, '相似idx': j+1, '相似度': similarity}, ignore_index=True)
# 将结果保存到 Excel 文件中
similarity_df.to_excel('../../data-dev/医生属性相似度2.xlsx', index=False)
# import numpy as np
#
# # 计算向量的余弦相似度
# def cosine_similarity(u, v):
# dot_product = np.dot(u, v)
# norm_u = np.linalg.norm(u)
# norm_v = np.linalg.norm(v)
# similarity = dot_product / (norm_u * norm_v)
# return similarity
#
# # 示例评分数据
# user1_ratings = [4, 5, 2, 0, 3]
# user2_ratings = [3, 4, 0, 4, 5]
#
# # 计算两个用户的评分向量之间的余弦相似度
# similarity = cosine_similarity(user1_ratings, user2_ratings)
# print(similarity)