from flask import Blueprint, request, jsonify, send_file import os import subprocess import json import uuid import re ocr = Blueprint("ocr", __name__) # 目录配置 upload_dir = "./inference_pic" output_dir = "./inference_json" det_model_dir = "./inference_model/det_v4" rec_model_dir = "./inference_model/rec_v4" cls_model_dir = "./inference_model/ch_ppstructure_mobile_v2.0_SLANet_infer" result_dir = "./inference_results/v4" # 创建目录 os.makedirs(upload_dir, exist_ok=True) os.makedirs(output_dir, exist_ok=True) os.makedirs(result_dir, exist_ok=True) @ocr.route("/process", methods=["POST"]) def ocr_inference(): if "file" not in request.files: return jsonify({"error": "No file provided"}), 400 file = request.files["file"] if file.filename == "": return jsonify({"error": "No selected file"}), 400 # 生成唯一文件名 uuid_str = str(uuid.uuid4()) filename = f"{uuid_str}.jpg" image_path = os.path.join(upload_dir, filename) file.save(image_path) try: # 调用 OCR 模型并捕获 stdout command = [ "python", "tools/infer/predict_system.py", "--image_dir", image_path, "--det_model_dir", det_model_dir, "--rec_model_dir", rec_model_dir, "--cls_model_dir", cls_model_dir, "--draw_img_save_dir", result_dir, ] result = s|u|b|p|r|o|c|e|s|s.run(command, check=True, capture_output=True, text=True, encoding="utf-8") # 提取调试信息 debug_lines = result.stdout.splitlines() pattern = re.compile(r"\[.*\] ppocr DEBUG: (.*?), (\d+\.\d+)") results = [] for line in debug_lines: match = pattern.search(line) if match: text = match.group(1).strip() confidence = float(match.group(2)) results.append({"text": text, "confidence": confidence}) if not results: return jsonify({"error": "No valid OCR debug output found"}), 500 # 保存为 JSON 文件 json_filename = f"{uuid_str}.json" json_path = os.path.join(output_dir, json_filename) with open(json_path, "w", encoding="utf-8") as f: json.dump(results, f, ensure_ascii=False, indent=2) # 读取 JSON 文件内容并返回 with open(json_path, "r", encoding="utf-8") as f: json_content = json.load(f) # 返回结果 return jsonify({ "message": "File processed successfully", "uuid": uuid_str, "content": json_content, }), 200 except subprocess.CalledProcessError as e: return jsonify({"error": f"OCR subprocess failed: {e}"}), 500 except Exception as e: return jsonify({"error": f"Unexpected error: {e}"}), 500 @ocr.route("/text/", methods=["GET"]) def get_text(uuid_str): if not is_valid_uuid(uuid_str): return jsonify({"error": "Invalid UUID format"}), 400 json_file = os.path.join(output_dir, f"{uuid_str}.json") if not os.path.exists(json_file): return jsonify({"error": "JSON file not found"}), 404 try: with open(json_file, "r", encoding="utf-8") as f: data = json.load(f) # 提取所有 text 字段 text_list = [item.get("text", "") for item in data if "text" in item] return jsonify({ "uuid": uuid_str, "content": text_list }), 200 except Exception as e: return jsonify({"error": f"Failed to read JSON file: {str(e)}"}), 500 @ocr.route("/download/", methods=["GET"]) def download_image(uuid_str): if not is_valid_uuid(uuid_str): return jsonify({"error": "Invalid UUID format"}), 400 # 构建图片文件路径 image_file = os.path.join(result_dir, f"{uuid_str}.jpg") if not os.path.exists(image_file): return jsonify({"error": "Image file not found"}), 404 try: return send_file( image_file, as_attachment=True, download_name=f"{uuid_str}.jpg" ) except Exception as e: return jsonify({"error": f"Failed to send image: {str(e)}"}), 500 @ocr.route("/delete/", methods=["DELETE"]) def delete_files(uuid_str): if not is_valid_uuid(uuid_str): return jsonify({"error": "Invalid UUID format"}), 400 deleted_files = [] # 删除上传的图片文件 image_file = os.path.join(upload_dir, f"{uuid_str}.jpg") if os.path.exists(image_file): os.remove(image_file) deleted_files.append(f"{uuid_str}.jpg") # 删除输出的 JSON 文件 json_file = os.path.join(output_dir, f"{uuid_str}.json") if os.path.exists(json_file): os.remove(json_file) deleted_files.append(f"{uuid_str}.json") # 删除 OCR 结果文件 result_file = os.path.join(result_dir, f"{uuid_str}.jpg") if os.path.exists(result_file): os.remove(result_file) deleted_files.append(f"{uuid_str}.jpg") if not deleted_files: return jsonify({"message": "No files found for this UUID"}), 404 return jsonify({ "message": "Files deleted successfully", "deleted_files": deleted_files }), 200 def is_valid_uuid(uuid_str): try: uuid.UUID(uuid_str) return True except ValueError: return False