import cv2
import pytesseract
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageGrab
import numpy as np
import os
import pandas as pd


tesseract_cmd = r".\tesseract-ocr\tesseract.exe"

for root, dirs, files in os.walk("labimage/"):
    for file in files:
        image = Image.open(root + "/" + file)
        content = pytesseract.image_to_data(image, lang="chi_sim43", output_type="dict")
        for i in range(len(content["text"])):
            if 0 < len(content["text"][i]):
                if content["text"][i] == "姓名" or (
                    content["text"][i] == "姓" and content["text"][i + 1] == "名"
                ):
                    (x, y, w, h) = (
                        content["left"][i],
                        content["top"][i],
                        content["width"][i],
                        content["height"][i],
                    )
                    print(x, y, w, h)
                    img = image.crop((x - 10, y - 10, x + w + 400, y + h + 30))
                    content = pytesseract.image_to_string(
                        img, lang="chi_sim43", output_type="dict"
                    )
                    if content["text"] == "":
                        filename = "./Cache/" + file
                        img.save(filename)
                        img = cv2.imread(filename)
                        content = pytesseract.image_to_string(
                            img, lang="chi_sim43", output_type="dict"
                        )
                    cnt = content["text"]
                    cnt = cnt.replace(" ", "")
                    cnt = cnt[3:]
                    print(cnt)
                    image.paste((0, 0, 0), (x - 10, y - 10, x + w + 400, y + h + 30))
                    image.save("./Output/" + file)
                    data = pd.DataFrame({"name": [cnt], "dir": [root + "/" + file]})
                    data.to_csv("name2file.csv", mode="a", header=False)
                    break