From 022266171bd17d7d1ace9852ca427461787f052c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E7=9B=9F?= <1127928805@qq.com> Date: Tue, 4 Mar 2025 05:20:16 +0000 Subject: [PATCH] Upload New File --- json2excel.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 json2excel.py diff --git a/json2excel.py b/json2excel.py new file mode 100644 index 0000000..23c4f3b --- /dev/null +++ b/json2excel.py @@ -0,0 +1,33 @@ +import os +import re +import json +import pandas as pd + +file_list = os.listdir("/home/limeng/NLP/LLM/code/0220/result") +files_sorted = sorted(file_list, key=lambda x: int(re.search(r'(\d+)', x).group())) +json_folder_path = "/home/limeng/NLP/LLM/code/0220/result" + +for filename in files_sorted: + # 读取 JSON 文件 + file_path = os.path.join(json_folder_path, filename) + with open(file_path, 'r', encoding='utf-8') as file: + data = json.load(file) + + # 提取 extracted_info 字段 + extracted_info = data.get("extracted_info", {}) + + # 创建一个字典用于存储这条数据 + row_data = {} + + # 遍历 extracted_info 中的键值对,填充到字典中 + for key, value in extracted_info.items(): + row_data[key] = value + for column in df.columns: + if column not in row_data: + row_data[column] = "无" + # 将 row_data 添加到 DataFrame 中 + new_row = pd.DataFrame([row_data]) + df = pd.concat([df, new_row], ignore_index=True) + +# 输出结果 DataFrame +print(df) \ No newline at end of file -- 2.22.0