Commit 816c83fc authored by wuzekai's avatar wuzekai

Delete ocr.py

parent ee5b191c
from flask import Blueprint, request, jsonify, send_file
import os
import subprocess
import json
import uuid
import re
ocr = Blueprint("ocr", __name__)
# 目录配置
upload_dir = "./inference_pic"
output_dir = "./inference_json"
det_model_dir = "./inference_model/det_v4"
rec_model_dir = "./inference_model/rec_v4"
cls_model_dir = "./inference_model/ch_ppstructure_mobile_v2.0_SLANet_infer"
result_dir = "./inference_results/v4"
# 创建目录
os.makedirs(upload_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok=True)
os.makedirs(result_dir, exist_ok=True)
@ocr.route("/process", methods=["POST"])
def ocr_inference():
if "file" not in request.files:
return jsonify({"error": "No file provided"}), 400
file = request.files["file"]
if file.filename == "":
return jsonify({"error": "No selected file"}), 400
# 生成唯一文件名
uuid_str = str(uuid.uuid4())
filename = f"{uuid_str}.jpg"
image_path = os.path.join(upload_dir, filename)
file.save(image_path)
try:
# 调用 OCR 模型并捕获 stdout
command = [
"python",
"tools/infer/predict_system.py",
"--image_dir", image_path,
"--det_model_dir", det_model_dir,
"--rec_model_dir", rec_model_dir,
"--cls_model_dir", cls_model_dir,
"--draw_img_save_dir", result_dir,
]
result = s|u|b|p|r|o|c|e|s|s.run(command, check=True, capture_output=True, text=True, encoding="utf-8")
# 提取调试信息
debug_lines = result.stdout.splitlines()
pattern = re.compile(r"\[.*\] ppocr DEBUG: (.*?), (\d+\.\d+)")
results = []
for line in debug_lines:
match = pattern.search(line)
if match:
text = match.group(1).strip()
confidence = float(match.group(2))
results.append({"text": text, "confidence": confidence})
if not results:
return jsonify({"error": "No valid OCR debug output found"}), 500
# 保存为 JSON 文件
json_filename = f"{uuid_str}.json"
json_path = os.path.join(output_dir, json_filename)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(results, f, ensure_ascii=False, indent=2)
# 读取 JSON 文件内容并返回
with open(json_path, "r", encoding="utf-8") as f:
json_content = json.load(f)
# 返回结果
return jsonify({
"message": "File processed successfully",
"uuid": uuid_str,
"content": json_content,
}), 200
except subprocess.CalledProcessError as e:
return jsonify({"error": f"OCR subprocess failed: {e}"}), 500
except Exception as e:
return jsonify({"error": f"Unexpected error: {e}"}), 500
@ocr.route("/text/<uuid_str>", methods=["GET"])
def get_text(uuid_str):
if not is_valid_uuid(uuid_str):
return jsonify({"error": "Invalid UUID format"}), 400
json_file = os.path.join(output_dir, f"{uuid_str}.json")
if not os.path.exists(json_file):
return jsonify({"error": "JSON file not found"}), 404
try:
with open(json_file, "r", encoding="utf-8") as f:
data = json.load(f)
# 提取所有 text 字段
text_list = [item.get("text", "") for item in data if "text" in item]
return jsonify({
"uuid": uuid_str,
"content": text_list
}), 200
except Exception as e:
return jsonify({"error": f"Failed to read JSON file: {str(e)}"}), 500
@ocr.route("/download/<uuid_str>", methods=["GET"])
def download_image(uuid_str):
if not is_valid_uuid(uuid_str):
return jsonify({"error": "Invalid UUID format"}), 400
# 构建图片文件路径
image_file = os.path.join(result_dir, f"{uuid_str}.jpg")
if not os.path.exists(image_file):
return jsonify({"error": "Image file not found"}), 404
try:
return send_file(
image_file,
as_attachment=True,
download_name=f"{uuid_str}.jpg"
)
except Exception as e:
return jsonify({"error": f"Failed to send image: {str(e)}"}), 500
@ocr.route("/delete/<uuid_str>", methods=["DELETE"])
def delete_files(uuid_str):
if not is_valid_uuid(uuid_str):
return jsonify({"error": "Invalid UUID format"}), 400
deleted_files = []
# 删除上传的图片文件
image_file = os.path.join(upload_dir, f"{uuid_str}.jpg")
if os.path.exists(image_file):
os.remove(image_file)
deleted_files.append(f"{uuid_str}.jpg")
# 删除输出的 JSON 文件
json_file = os.path.join(output_dir, f"{uuid_str}.json")
if os.path.exists(json_file):
os.remove(json_file)
deleted_files.append(f"{uuid_str}.json")
# 删除 OCR 结果文件
result_file = os.path.join(result_dir, f"{uuid_str}.jpg")
if os.path.exists(result_file):
os.remove(result_file)
deleted_files.append(f"{uuid_str}.jpg")
if not deleted_files:
return jsonify({"message": "No files found for this UUID"}), 404
return jsonify({
"message": "Files deleted successfully",
"deleted_files": deleted_files
}), 200
def is_valid_uuid(uuid_str):
try:
uuid.UUID(uuid_str)
return True
except ValueError:
return False
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment