import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer # 读取 Excel 文件 df = pd.read_excel('../datas/hebing.xlsx', sheet_name='Sheet1') # 提取 text 列的多行数据 text_data = df['合并咨询文本'].tolist() # 利用 TfidfVectorizer 进行关键词提取 vectorizer = TfidfVectorizer() tfidf = vectorizer.fit_transform(text_data) feature_names = vectorizer.get_feature_names_out() # Get the indices of the top three features for each document top_n = 3 top_features_indices = tfidf.toarray().argsort()[:, -top_n:] # Get the feature names for the top three features top_features_names = [[feature_names[idx] for idx in indices] for indices in top_features_indices] print(top_features_names)