import pandas as pd def find_minimum_idx(matching_data): # 存储相同sentence的数据 same_sentence_data = {} # 遍历匹配的数据列表 for data in matching_data: sentence = data.sentence idx = data.idx # 检查sentence是否已经存在于same_sentence_data中 if sentence in same_sentence_data: # 如果已存在,则更新为更小的idx if idx < same_sentence_data[sentence]: same_sentence_data[sentence] = idx else: # 如果不存在,则添加到same_sentence_data中 same_sentence_data[sentence] = idx # 将最小idx组成的数组返回 return sorted(list(same_sentence_data.values())) def find_doctor_id_by_idx(idx): excel_file_path = '../../data-dev/消化内科对话_已解决问题0510.xlsx' # 读取包含 idx 和医生名称的 Excel 表 df = pd.read_excel(excel_file_path, sheet_name='对话数据+') # 获取 idx 列和医生名称列的数据 idx_column = df['idx'] doctor_name_column = df['医生姓名'] # 查找医生名称对应的行索引 row_index = idx_column[idx_column == idx].index[0] # 获取医生名称 doctor_name = doctor_name_column[row_index] # 查找医生名称对应的医生 ID(假设医生 ID 在另一张工作表的 '医生名称' 和 '医生ID' 列中) doctor_info_df = pd.read_excel(excel_file_path, sheet_name='医生属性+') doctor_id = doctor_info_df.loc[doctor_info_df['医生姓名'] == doctor_name, '医生标识'].values[0] return doctor_id def find_doctor_name_by_idx(idx): # 读取 Excel 文件 df = pd.read_excel('../../data-dev/消化内科对话_已解决问题0510.xlsx', sheet_name='医生属性+') # 根据 idx 列筛选匹配的行 matching_rows = df.loc[df['医生标识'] == idx] # 提取医生姓名列 doctor_names = matching_rows['医生姓名'].tolist() # 返回医生姓名列表 return doctor_names