From 816c83fc187ab9720e2305c194915d7056e32ac9 Mon Sep 17 00:00:00 2001 From: wuzekai <3025054974@qq.com> Date: Tue, 22 Jul 2025 09:13:22 +0000 Subject: [PATCH] Delete ocr.py --- ocr.py | 165 --------------------------------------------------------- 1 file changed, 165 deletions(-) delete mode 100644 ocr.py diff --git a/ocr.py b/ocr.py deleted file mode 100644 index a7113c4..0000000 --- a/ocr.py +++ /dev/null @@ -1,165 +0,0 @@ -from flask import Blueprint, request, jsonify, send_file -import os -import subprocess -import json -import uuid -import re - -ocr = Blueprint("ocr", __name__) - -# 目录配置 -upload_dir = "./inference_pic" -output_dir = "./inference_json" -det_model_dir = "./inference_model/det_v4" -rec_model_dir = "./inference_model/rec_v4" -cls_model_dir = "./inference_model/ch_ppstructure_mobile_v2.0_SLANet_infer" -result_dir = "./inference_results/v4" - -# 创建目录 -os.makedirs(upload_dir, exist_ok=True) -os.makedirs(output_dir, exist_ok=True) -os.makedirs(result_dir, exist_ok=True) - -@ocr.route("/process", methods=["POST"]) -def ocr_inference(): - if "file" not in request.files: - return jsonify({"error": "No file provided"}), 400 - - file = request.files["file"] - if file.filename == "": - return jsonify({"error": "No selected file"}), 400 - - # 生成唯一文件名 - uuid_str = str(uuid.uuid4()) - filename = f"{uuid_str}.jpg" - image_path = os.path.join(upload_dir, filename) - file.save(image_path) - - try: - # 调用 OCR 模型并捕获 stdout - command = [ - "python", - "tools/infer/predict_system.py", - "--image_dir", image_path, - "--det_model_dir", det_model_dir, - "--rec_model_dir", rec_model_dir, - "--cls_model_dir", cls_model_dir, - "--draw_img_save_dir", result_dir, - ] - - result = s|u|b|p|r|o|c|e|s|s.run(command, check=True, capture_output=True, text=True, encoding="utf-8") - - # 提取调试信息 - debug_lines = result.stdout.splitlines() - pattern = re.compile(r"\[.*\] ppocr DEBUG: (.*?), (\d+\.\d+)") - results = [] - - for line in debug_lines: - match = pattern.search(line) - if match: - text = match.group(1).strip() - confidence = float(match.group(2)) - results.append({"text": text, "confidence": confidence}) - - if not results: - return jsonify({"error": "No valid OCR debug output found"}), 500 - - # 保存为 JSON 文件 - json_filename = f"{uuid_str}.json" - json_path = os.path.join(output_dir, json_filename) - with open(json_path, "w", encoding="utf-8") as f: - json.dump(results, f, ensure_ascii=False, indent=2) - - # 读取 JSON 文件内容并返回 - with open(json_path, "r", encoding="utf-8") as f: - json_content = json.load(f) - - # 返回结果 - return jsonify({ - "message": "File processed successfully", - "uuid": uuid_str, - "content": json_content, - }), 200 - - except subprocess.CalledProcessError as e: - return jsonify({"error": f"OCR subprocess failed: {e}"}), 500 - except Exception as e: - return jsonify({"error": f"Unexpected error: {e}"}), 500 - -@ocr.route("/text/", methods=["GET"]) -def get_text(uuid_str): - if not is_valid_uuid(uuid_str): - return jsonify({"error": "Invalid UUID format"}), 400 - - json_file = os.path.join(output_dir, f"{uuid_str}.json") - if not os.path.exists(json_file): - return jsonify({"error": "JSON file not found"}), 404 - - try: - with open(json_file, "r", encoding="utf-8") as f: - data = json.load(f) - # 提取所有 text 字段 - text_list = [item.get("text", "") for item in data if "text" in item] - - return jsonify({ - "uuid": uuid_str, - "content": text_list - }), 200 - except Exception as e: - return jsonify({"error": f"Failed to read JSON file: {str(e)}"}), 500 - -@ocr.route("/download/", methods=["GET"]) -def download_image(uuid_str): - if not is_valid_uuid(uuid_str): - return jsonify({"error": "Invalid UUID format"}), 400 - # 构建图片文件路径 - image_file = os.path.join(result_dir, f"{uuid_str}.jpg") - if not os.path.exists(image_file): - return jsonify({"error": "Image file not found"}), 404 - try: - return send_file( - image_file, - as_attachment=True, - download_name=f"{uuid_str}.jpg" - ) - except Exception as e: - return jsonify({"error": f"Failed to send image: {str(e)}"}), 500 - -@ocr.route("/delete/", methods=["DELETE"]) -def delete_files(uuid_str): - if not is_valid_uuid(uuid_str): - return jsonify({"error": "Invalid UUID format"}), 400 - deleted_files = [] - - # 删除上传的图片文件 - image_file = os.path.join(upload_dir, f"{uuid_str}.jpg") - if os.path.exists(image_file): - os.remove(image_file) - deleted_files.append(f"{uuid_str}.jpg") - - # 删除输出的 JSON 文件 - json_file = os.path.join(output_dir, f"{uuid_str}.json") - if os.path.exists(json_file): - os.remove(json_file) - deleted_files.append(f"{uuid_str}.json") - - # 删除 OCR 结果文件 - result_file = os.path.join(result_dir, f"{uuid_str}.jpg") - if os.path.exists(result_file): - os.remove(result_file) - deleted_files.append(f"{uuid_str}.jpg") - - if not deleted_files: - return jsonify({"message": "No files found for this UUID"}), 404 - - return jsonify({ - "message": "Files deleted successfully", - "deleted_files": deleted_files - }), 200 - -def is_valid_uuid(uuid_str): - try: - uuid.UUID(uuid_str) - return True - except ValueError: - return False \ No newline at end of file -- 2.22.0