Commit 7f96cd8a authored by EvilCalf's avatar EvilCalf

create Demo

parent 79943924
# Vue
.DS_Store
node_modules
/dist
# Eclipse
.classpath
.project
.settings/
# Intel Idea
.idea
*.iml
*.iws
# Maven
log
target
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionBackup
pom.xml.next
release.properties
dependcy-reduced-pom.xml
buildNumber.properties
# java
*.class
*.war
*.ear
# bak
*.bak
/bin/
# sbt
/target/
/project/target/
/project/project/target/
/project/project/project/target/
/build-sbt/
local.sbt
# spring
*.springBeans
# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw*
# local env files
.env.local
.env.*.local
# Log files
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# python
__pycache__
# VSC Counter
/.VSCodeCounter/
# data
/data/
# cache
/Cache/*.jpg
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
import cv2
import pytesseract
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont
from PIL import ImageGrab
import numpy as np
import os
import pandas as pd
tesseract_cmd = r'.\tesseract-ocr\tesseract.exe'
for root, dirs, files in os.walk("labimage/"):
for file in files:
image = Image.open(root + "/" + file)
content = pytesseract.image_to_data(
image, lang="chi_sim43", output_type="dict"
)
for i in range(len(content["text"])):
if 0 < len(content["text"][i]):
if content["text"][i] == "姓名" or (
content["text"][i] == "姓" and content["text"][i + 1] == "名"
):
(x, y, w, h) = (
content["left"][i],
content["top"][i],
content["width"][i],
content["height"][i],
)
print(x, y, w, h)
img = image.crop((x - 10, y - 10, x + w + 400, y + h + 30))
content = pytesseract.image_to_string(
img, lang="chi_sim43", output_type="dict"
)
if content["text"] == "":
filename = "./Cache/" + file
img.save(filename)
img = cv2.imread(filename)
content = pytesseract.image_to_string(
img, lang="chi_sim43", output_type="dict"
)
cnt = content["text"]
cnt = cnt.replace(" ", "")
cnt = cnt[3:]
print(cnt)
image.paste((0, 0, 0), (x - 10, y - 10, x + w + 400, y + h + 30))
image.save("./Output/" + file)
data = pd.DataFrame({'name': [cnt], 'dir': [root + "/" + file]})
data.to_csv("name2file.csv",mode='a',header=False)
break
,name,dir
0,黄梅梅,labimage/42/154045368169150956.jpg
0,何世云,labimage/43/154045384343030182.jpg
0,刘春佛,labimage/44/154045446763420735.jpg
0,林美兰,labimage/45/154045530108690559.jpg
0,许赞国,labimage/46/154045562332620522.jpg
0,黄守云,labimage/47/154045606255320567.jpg
0,黄秀明,labimage/48/154045651362010523.jpg
0,刘建霞,labimage/49/154045651362010599.jpg
Ray Smith (lead developer) <theraysmith@gmail.com>
Ahmad Abdulkader
Rika Antonova
Nicholas Beato
Jeff Breidenbach
Samuel Charron
Phil Cheatle
Simon Crouch
David Eger
Sheelagh Huddleston
Dan Johnson
Rajesh Katikam
Thomas Kielbus
Dar-Shyang Lee
Zongyi (Joe) Liu
Robert Moss
Chris Newton
Michael Reimer
Marius Renn
Raquel Romano
Christy Russon
Shobhit Saxena
Mark Seaman
Faisal Shafait
Hiroshi Takenaka
Ranjith Unnikrishnan
Joern Wanke
Ping Ping Xiu
Andrew Ziem
Oscar Zuniga
Community Contributors:
Zdenko Podobný (Maintainer)
Jim Regan (Maintainer)
James R Barlow
Amit Dovev
Martin Ettl
Shree Devi Kumar
Noah Metzger
Tom Morris
Tobias Müller
Egor Pugin
Sundar M. Vaidya
Stefan Weil
This package contains the Tesseract Open Source OCR Engine.
Originally developed at Hewlett Packard Laboratories Bristol and
at Hewlett Packard Co, Greeley Colorado, all the code
in this distribution is now licensed under the Apache License:
** Licensed under the Apache License, Version 2.0 (the "License");
** you may not use this file except in compliance with the License.
** You may obtain a copy of the License at
** http://www.apache.org/licenses/LICENSE-2.0
** Unless required by applicable law or agreed to in writing, software
** distributed under the License is distributed on an "AS IS" BASIS,
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
** See the License for the specific language governing permissions and
** limitations under the License.
Other Dependencies and Licenses:
================================
Tesseract uses Leptonica library (http://leptonica.com/) which essentially
uses a BSD 2-clause license. (http://leptonica.com/about-the-license.html)
tessedit_ambigs_training 1
load_freq_dawg 0
load_punc_dawg 0
load_system_dawg 0
load_number_dawg 0
ambigs_debug_level 3
load_fixed_length_dawgs 0
load_bigram_dawg True
tessedit_enable_bigram_correction True
tessedit_bigram_debug 3
save_raw_choices True
save_alt_choices True
disable_character_fragments T
file_type .bl
textord_fast_pitch_test T
tessedit_single_match 0
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
il1_adaption_test 1
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
textord_no_rejects T
file_type .bl
#tessedit_use_nn F
textord_fast_pitch_test T
tessedit_single_match 0
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
il1_adaption_test 1
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_resegment_from_boxes T
tessedit_train_from_boxes T
#textord_repeat_extraction F
textord_no_rejects T
tessedit_char_whitelist 0123456789-.
tessedit_create_hocr 1
hocr_font_info 0
interactive_display_mode T
tessedit_display_outwords T
textord_skewsmooth_offset 8
textord_skewsmooth_offset2 8
textord_merge_desc 0.5
textord_no_rejects 1
tessedit_resegment_from_line_boxes 1
tessedit_make_boxes_from_boxes 1
disable_character_fragments T
file_type .bl
textord_fast_pitch_test T
tessedit_single_match 0
tessedit_zero_rejection T
tessedit_minimal_rejection F
tessedit_write_rep_codes F
il1_adaption_test 1
edges_children_fix F
edges_childarea 0.65
edges_boxarea 0.9
tessedit_train_line_recognizer T
textord_no_rejects T
stopper_debug_level 1
classify_debug_level 1
segsearch_debug_level 1
language_model_debug_level 3
tessedit_resegment_from_boxes 1
tessedit_make_boxes_from_boxes 1
textord_show_blobs 0
textord_debug_tabfind 3
textord_tabfind_show_partitions 1
textord_tabfind_show_initial_partitions 1
textord_tabfind_show_columns 1
textord_tabfind_show_blocks 1
textord_tabfind_show_initialtabs 1
textord_tabfind_show_finaltabs 1
textord_tabfind_show_strokewidths 1
textord_tabfind_show_vlines 0
textord_tabfind_show_images 1
tessedit_dump_pageseg_images 0
# This config file should be used with other cofig files which creates renderers.
# usage example: tesseract eurotext.tif eurotext txt hocr pdf
tessedit_create_txt 1
tessedit_write_unlv 1
unlv_tilde_crunching T
1-\d\d\d-GOOG-411
www.\n\\\*.com
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment