create Demo

7f96cd8a · EvilCalf · 79943924 · 7f96cd8a · 7f96cd8a · 7f96cd8a
Commit 7f96cd8a authored Dec 09, 2019 by EvilCalf
114 changed files
--- a/.gitignore
+++ b/.gitignore
+# Vue
+.DS_Store
+node_modules
+/dist
+# Eclipse
+.classpath
+.project
+.settings/
+# Intel Idea
+.idea
+*.iml
+*.iws
+# Maven
+log
+target
+pom.xml.tag
+pom.xml.releaseBackup
+pom.xml.versionBackup
+pom.xml.next
+release.properties
+dependcy-reduced-pom.xml
+buildNumber.properties
+# java
+*.class
+*.war
+*.ear
+# bak
+*.bak
+/bin/
+# sbt
+/target/
+/project/target/
+/project/project/target/
+/project/project/project/target/
+/build-sbt/
+local.sbt
+# spring
+*.springBeans
+# Editor directories and files
+.idea
+.vscode
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw*
+# local env files
+.env.local
+.env.*.local
+# Log files
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+# python
+__pycache__
+# VSC Counter
+/.VSCodeCounter/
+# data
+/data/
+# cache
+/Cache/*.jpg
--- a/Output/154045368169150956.jpg
+++ b/Output/154045368169150956.jpg
--- a/Output/154045384343030182.jpg
+++ b/Output/154045384343030182.jpg
--- a/Output/154045446763420735.jpg
+++ b/Output/154045446763420735.jpg
--- a/Output/154045530108690559.jpg
+++ b/Output/154045530108690559.jpg
--- a/Output/154045562332620522.jpg
+++ b/Output/154045562332620522.jpg
--- a/Output/154045606255320567.jpg
+++ b/Output/154045606255320567.jpg
--- a/Output/154045651362010523.jpg
+++ b/Output/154045651362010523.jpg
--- a/Output/154045651362010599.jpg
+++ b/Output/154045651362010599.jpg
--- a/README.md
+++ b/README.md
--- a/labimage/40/153828071104640671.jpg
+++ b/labimage/40/153828071104640671.jpg
--- a/labimage/41/153828093038760836.jpg
+++ b/labimage/41/153828093038760836.jpg
--- a/labimage/42/154045368169150956.jpg
+++ b/labimage/42/154045368169150956.jpg
--- a/labimage/43/154045384343030182.jpg
+++ b/labimage/43/154045384343030182.jpg
--- a/labimage/44/154045446763420735.jpg
+++ b/labimage/44/154045446763420735.jpg
--- a/labimage/45/154045530108690559.jpg
+++ b/labimage/45/154045530108690559.jpg
--- a/labimage/46/154045562332620522.jpg
+++ b/labimage/46/154045562332620522.jpg
--- a/labimage/47/154045606255320567.jpg
+++ b/labimage/47/154045606255320567.jpg
--- a/labimage/48/154045651362010523.jpg
+++ b/labimage/48/154045651362010523.jpg
--- a/labimage/49/154045651362010599.jpg
+++ b/labimage/49/154045651362010599.jpg
--- a/main.py
+++ b/main.py
+import cv2
+import pytesseract
+from PIL import Image
+from PIL import ImageDraw
+from PIL import ImageFont
+from PIL import ImageGrab
+import numpy as np
+import os
+import pandas as pd
+tesseract_cmd = r'.\tesseract-ocr\tesseract.exe'
+for root, dirs, files in os.walk("labimage/"):
+    for file in files:
+        image = Image.open(root + "/" + file)
+        content = pytesseract.image_to_data(
+            image, lang="chi_sim43", output_type="dict"
+        ) 
+        for i in range(len(content["text"])):
+            if 0 < len(content["text"][i]):
+                if content["text"][i] == "姓名" or (
+                    content["text"][i] == "姓" and content["text"][i + 1] == "名"
+                ):
+                    (x, y, w, h) = (
+                        content["left"][i],
+                        content["top"][i],
+                        content["width"][i],
+                        content["height"][i],
+                    )
+                    print(x, y, w, h)
+                    img = image.crop((x - 10, y - 10, x + w + 400, y + h + 30))
+                    content = pytesseract.image_to_string(
+                        img, lang="chi_sim43", output_type="dict"
+                    )
+                    if content["text"] == "":
+                        filename = "./Cache/" + file
+                        img.save(filename)
+                        img = cv2.imread(filename)
+                        content = pytesseract.image_to_string(
+                            img, lang="chi_sim43", output_type="dict"
+                        ) 
+                    cnt = content["text"]
+                    cnt = cnt.replace(" ", "")
+                    cnt = cnt[3:]
+                    print(cnt)
+                    image.paste((0, 0, 0), (x - 10, y - 10, x + w + 400, y + h + 30))
+                    image.save("./Output/" + file)
+                    data = pd.DataFrame({'name': [cnt], 'dir': [root + "/" + file]})
+                    data.to_csv("name2file.csv",mode='a',header=False)
+                    break
--- a/name2file.csv
+++ b/name2file.csv
+,name,dir
+0,黄梅梅,labimage/42/154045368169150956.jpg
+0,何世云,labimage/43/154045384343030182.jpg
+0,刘春佛,labimage/44/154045446763420735.jpg
+0,林美兰,labimage/45/154045530108690559.jpg
+0,许赞国,labimage/46/154045562332620522.jpg
+0,黄守云,labimage/47/154045606255320567.jpg
+0,黄秀明,labimage/48/154045651362010523.jpg
+0,刘建霞,labimage/49/154045651362010599.jpg
--- a/tesseract-ocr/ambiguous_words.exe
+++ b/tesseract-ocr/ambiguous_words.exe
--- a/tesseract-ocr/classifier_tester.exe
+++ b/tesseract-ocr/classifier_tester.exe
--- a/tesseract-ocr/cntraining.exe
+++ b/tesseract-ocr/cntraining.exe
--- a/tesseract-ocr/combine_lang_model.exe
+++ b/tesseract-ocr/combine_lang_model.exe
--- a/tesseract-ocr/combine_tessdata.exe
+++ b/tesseract-ocr/combine_tessdata.exe
--- a/tesseract-ocr/dawg2wordlist.exe
+++ b/tesseract-ocr/dawg2wordlist.exe
--- a/tesseract-ocr/doc/AUTHORS
+++ b/tesseract-ocr/doc/AUTHORS
+Ray Smith (lead developer) <theraysmith@gmail.com>
+Ahmad Abdulkader
+Rika Antonova
+Nicholas Beato
+Jeff Breidenbach
+Samuel Charron
+Phil Cheatle
+Simon Crouch
+David Eger
+Sheelagh Huddleston
+Dan Johnson
+Rajesh Katikam
+Thomas Kielbus
+Dar-Shyang Lee
+Zongyi (Joe) Liu
+Robert Moss
+Chris Newton
+Michael Reimer
+Marius Renn
+Raquel Romano
+Christy Russon
+Shobhit Saxena
+Mark Seaman
+Faisal Shafait
+Hiroshi Takenaka
+Ranjith Unnikrishnan
+Joern Wanke
+Ping Ping Xiu
+Andrew Ziem
+Oscar Zuniga
+Community Contributors:
+Zdenko Podobný (Maintainer)
+Jim Regan (Maintainer)
+James R Barlow
+Amit Dovev
+Martin Ettl
+Shree Devi Kumar
+Noah Metzger
+Tom Morris
+Tobias Müller
+Egor Pugin
+Sundar M. Vaidya
+Stefan Weil
--- a/tesseract-ocr/doc/COPYING
+++ b/tesseract-ocr/doc/COPYING
+This package contains the Tesseract Open Source OCR Engine.
+Originally developed at Hewlett Packard Laboratories Bristol and
+at Hewlett Packard Co, Greeley Colorado, all the code
+in this distribution is now licensed under the Apache License:
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+** http://www.apache.org/licenses/LICENSE-2.0
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+Other Dependencies and Licenses:
+================================
+Tesseract uses Leptonica library (http://leptonica.com/) which essentially
+uses a BSD 2-clause license. (http://leptonica.com/about-the-license.html)
--- a/tesseract-ocr/iconv.dll
+++ b/tesseract-ocr/iconv.dll
--- a/tesseract-ocr/icudata57.dll
+++ b/tesseract-ocr/icudata57.dll
--- a/tesseract-ocr/icudt57.dll
+++ b/tesseract-ocr/icudt57.dll
--- a/tesseract-ocr/icui18n57.dll
+++ b/tesseract-ocr/icui18n57.dll
--- a/tesseract-ocr/icuuc57.dll
+++ b/tesseract-ocr/icuuc57.dll
--- a/tesseract-ocr/java/ScrollView.jar
+++ b/tesseract-ocr/java/ScrollView.jar
--- a/tesseract-ocr/java/jaxb-api-2.3.1.jar
+++ b/tesseract-ocr/java/jaxb-api-2.3.1.jar
--- a/tesseract-ocr/java/piccolo2d-core-3.0.jar
+++ b/tesseract-ocr/java/piccolo2d-core-3.0.jar
--- a/tesseract-ocr/java/piccolo2d-extras-3.0.jar
+++ b/tesseract-ocr/java/piccolo2d-extras-3.0.jar
--- a/tesseract-ocr/libbz2-1.dll
+++ b/tesseract-ocr/libbz2-1.dll
--- a/tesseract-ocr/libcairo-2.dll
+++ b/tesseract-ocr/libcairo-2.dll
--- a/tesseract-ocr/libexpat-1.dll
+++ b/tesseract-ocr/libexpat-1.dll
--- a/tesseract-ocr/libffi-6.dll
+++ b/tesseract-ocr/libffi-6.dll
--- a/tesseract-ocr/libfontconfig-1.dll
+++ b/tesseract-ocr/libfontconfig-1.dll
--- a/tesseract-ocr/libfreetype-6.dll
+++ b/tesseract-ocr/libfreetype-6.dll
--- a/tesseract-ocr/libgcc_s_seh-1.dll
+++ b/tesseract-ocr/libgcc_s_seh-1.dll
--- a/tesseract-ocr/libgcc_s_sjlj-1.dll
+++ b/tesseract-ocr/libgcc_s_sjlj-1.dll
--- a/tesseract-ocr/libgif-7.dll
+++ b/tesseract-ocr/libgif-7.dll
--- a/tesseract-ocr/libglib-2.0-0.dll
+++ b/tesseract-ocr/libglib-2.0-0.dll
--- a/tesseract-ocr/libgobject-2.0-0.dll
+++ b/tesseract-ocr/libgobject-2.0-0.dll
--- a/tesseract-ocr/libgomp-1.dll
+++ b/tesseract-ocr/libgomp-1.dll
--- a/tesseract-ocr/libharfbuzz-0.dll
+++ b/tesseract-ocr/libharfbuzz-0.dll
--- a/tesseract-ocr/libintl-8.dll
+++ b/tesseract-ocr/libintl-8.dll
--- a/tesseract-ocr/libjbig-2.dll
+++ b/tesseract-ocr/libjbig-2.dll
--- a/tesseract-ocr/libjpeg-8.dll
+++ b/tesseract-ocr/libjpeg-8.dll
--- a/tesseract-ocr/liblept-5.dll
+++ b/tesseract-ocr/liblept-5.dll
--- a/tesseract-ocr/liblzma-5.dll
+++ b/tesseract-ocr/liblzma-5.dll
--- a/tesseract-ocr/libopenjp2.dll
+++ b/tesseract-ocr/libopenjp2.dll
--- a/tesseract-ocr/libpango-1.0-0.dll
+++ b/tesseract-ocr/libpango-1.0-0.dll
--- a/tesseract-ocr/libpangocairo-1.0-0.dll
+++ b/tesseract-ocr/libpangocairo-1.0-0.dll
--- a/tesseract-ocr/libpangoft2-1.0-0.dll
+++ b/tesseract-ocr/libpangoft2-1.0-0.dll
--- a/tesseract-ocr/libpangowin32-1.0-0.dll
+++ b/tesseract-ocr/libpangowin32-1.0-0.dll
--- a/tesseract-ocr/libpcre-1.dll
+++ b/tesseract-ocr/libpcre-1.dll
--- a/tesseract-ocr/libpixman-1-0.dll
+++ b/tesseract-ocr/libpixman-1-0.dll
--- a/tesseract-ocr/libpng16-16.dll
+++ b/tesseract-ocr/libpng16-16.dll
--- a/tesseract-ocr/libstdc++-6.dll
+++ b/tesseract-ocr/libstdc++-6.dll
--- a/tesseract-ocr/libtesseract-4.dll
+++ b/tesseract-ocr/libtesseract-4.dll
--- a/tesseract-ocr/libtiff-5.dll
+++ b/tesseract-ocr/libtiff-5.dll
--- a/tesseract-ocr/libwebp-7.dll
+++ b/tesseract-ocr/libwebp-7.dll
--- a/tesseract-ocr/libwinpthread-1.dll
+++ b/tesseract-ocr/libwinpthread-1.dll
--- a/tesseract-ocr/lstmeval.exe
+++ b/tesseract-ocr/lstmeval.exe
--- a/tesseract-ocr/lstmtraining.exe
+++ b/tesseract-ocr/lstmtraining.exe
--- a/tesseract-ocr/merge_unicharsets.exe
+++ b/tesseract-ocr/merge_unicharsets.exe
--- a/tesseract-ocr/mftraining.exe
+++ b/tesseract-ocr/mftraining.exe
--- a/tesseract-ocr/set_unicharset_properties.exe
+++ b/tesseract-ocr/set_unicharset_properties.exe
--- a/tesseract-ocr/shapeclustering.exe
+++ b/tesseract-ocr/shapeclustering.exe
--- a/tesseract-ocr/tessdata/chi_sim43.traineddata
+++ b/tesseract-ocr/tessdata/chi_sim43.traineddata
--- a/tesseract-ocr/tessdata/configs/ambigs.train
+++ b/tesseract-ocr/tessdata/configs/ambigs.train
+tessedit_ambigs_training	1
+load_freq_dawg	0
+load_punc_dawg	0
+load_system_dawg	0
+load_number_dawg	0
+ambigs_debug_level	3
+load_fixed_length_dawgs	0
--- a/tesseract-ocr/tessdata/configs/api_config
+++ b/tesseract-ocr/tessdata/configs/api_config
+tessedit_zero_rejection T
--- a/tesseract-ocr/tessdata/configs/bigram
+++ b/tesseract-ocr/tessdata/configs/bigram
+load_bigram_dawg	True
+tessedit_enable_bigram_correction	True
+tessedit_bigram_debug	3
+save_raw_choices	True
+save_alt_choices	True
--- a/tesseract-ocr/tessdata/configs/box.train
+++ b/tesseract-ocr/tessdata/configs/box.train
+disable_character_fragments T
+file_type                   .bl
+textord_fast_pitch_test	T
+tessedit_single_match	0
+tessedit_zero_rejection T
+tessedit_minimal_rejection F
+tessedit_write_rep_codes F
+il1_adaption_test 1
+edges_children_fix F
+edges_childarea 0.65
+edges_boxarea 0.9
+tessedit_resegment_from_boxes T
+tessedit_train_from_boxes T
+textord_no_rejects T
--- a/tesseract-ocr/tessdata/configs/box.train.stderr
+++ b/tesseract-ocr/tessdata/configs/box.train.stderr
+file_type .bl
+#tessedit_use_nn F
+textord_fast_pitch_test T
+tessedit_single_match 0
+tessedit_zero_rejection T
+tessedit_minimal_rejection F
+tessedit_write_rep_codes F
+il1_adaption_test 1
+edges_children_fix F
+edges_childarea 0.65
+edges_boxarea 0.9
+tessedit_resegment_from_boxes T
+tessedit_train_from_boxes T
+#textord_repeat_extraction F
+textord_no_rejects T
--- a/tesseract-ocr/tessdata/configs/digits
+++ b/tesseract-ocr/tessdata/configs/digits
+tessedit_char_whitelist 0123456789-.
--- a/tesseract-ocr/tessdata/configs/hocr
+++ b/tesseract-ocr/tessdata/configs/hocr
+tessedit_create_hocr 1
+hocr_font_info 0
--- a/tesseract-ocr/tessdata/configs/inter
+++ b/tesseract-ocr/tessdata/configs/inter
+interactive_display_mode				T
+tessedit_display_outwords		T
--- a/tesseract-ocr/tessdata/configs/kannada
+++ b/tesseract-ocr/tessdata/configs/kannada
+textord_skewsmooth_offset 8
+textord_skewsmooth_offset2 8
+textord_merge_desc 0.5
+textord_no_rejects 1
--- a/tesseract-ocr/tessdata/configs/linebox
+++ b/tesseract-ocr/tessdata/configs/linebox
+tessedit_resegment_from_line_boxes 1
+tessedit_make_boxes_from_boxes 1
--- a/tesseract-ocr/tessdata/configs/logfile
+++ b/tesseract-ocr/tessdata/configs/logfile
+debug_file tesseract.log
--- a/tesseract-ocr/tessdata/configs/lstm.train
+++ b/tesseract-ocr/tessdata/configs/lstm.train
+disable_character_fragments T
+file_type                   .bl
+textord_fast_pitch_test	T
+tessedit_single_match	0
+tessedit_zero_rejection T
+tessedit_minimal_rejection F
+tessedit_write_rep_codes F
+il1_adaption_test 1
+edges_children_fix F
+edges_childarea 0.65
+edges_boxarea 0.9
+tessedit_train_line_recognizer T
+textord_no_rejects T
--- a/tesseract-ocr/tessdata/configs/lstmdebug
+++ b/tesseract-ocr/tessdata/configs/lstmdebug
+stopper_debug_level 1
+classify_debug_level 1
+segsearch_debug_level 1
+language_model_debug_level 3
--- a/tesseract-ocr/tessdata/configs/makebox
+++ b/tesseract-ocr/tessdata/configs/makebox
+tessedit_create_boxfile 1
--- a/tesseract-ocr/tessdata/configs/pdf
+++ b/tesseract-ocr/tessdata/configs/pdf
+tessedit_create_pdf 1
--- a/tesseract-ocr/tessdata/configs/quiet
+++ b/tesseract-ocr/tessdata/configs/quiet
+debug_file /dev/null
--- a/tesseract-ocr/tessdata/configs/rebox
+++ b/tesseract-ocr/tessdata/configs/rebox
+tessedit_resegment_from_boxes 1
+tessedit_make_boxes_from_boxes 1
--- a/tesseract-ocr/tessdata/configs/strokewidth
+++ b/tesseract-ocr/tessdata/configs/strokewidth
+textord_show_blobs 0
+textord_debug_tabfind 3
+textord_tabfind_show_partitions 1
+textord_tabfind_show_initial_partitions 1
+textord_tabfind_show_columns 1
+textord_tabfind_show_blocks 1
+textord_tabfind_show_initialtabs 1
+textord_tabfind_show_finaltabs 1
+textord_tabfind_show_strokewidths 1
+textord_tabfind_show_vlines 0
+textord_tabfind_show_images 1
+tessedit_dump_pageseg_images 0
--- a/tesseract-ocr/tessdata/configs/tsv
+++ b/tesseract-ocr/tessdata/configs/tsv
+tessedit_create_tsv 1
--- a/tesseract-ocr/tessdata/configs/txt
+++ b/tesseract-ocr/tessdata/configs/txt
+# This config file should be used with other cofig files which creates renderers.
+# usage example: tesseract eurotext.tif eurotext txt hocr pdf
+tessedit_create_txt 1
--- a/tesseract-ocr/tessdata/configs/unlv
+++ b/tesseract-ocr/tessdata/configs/unlv
+tessedit_write_unlv 1
+unlv_tilde_crunching T
--- a/tesseract-ocr/tessdata/eng.traineddata
+++ b/tesseract-ocr/tessdata/eng.traineddata
--- a/tesseract-ocr/tessdata/eng.user-patterns
+++ b/tesseract-ocr/tessdata/eng.user-patterns
+1-\d\d\d-GOOG-411
+www.\n\\\*.com
--- a/tesseract-ocr/tessdata/eng.user-words
+++ b/tesseract-ocr/tessdata/eng.user-words
--- a/tesseract-ocr/tessdata/osd.traineddata
+++ b/tesseract-ocr/tessdata/osd.traineddata
--- a/tesseract-ocr/tessdata/pdf.ttf
+++ b/tesseract-ocr/tessdata/pdf.ttf
--- a/tesseract-ocr/tessdata/tessconfigs/batch
+++ b/tesseract-ocr/tessdata/tessconfigs/batch
--- a/tesseract-ocr/tessdata/tessconfigs/batch.nochop
+++ b/tesseract-ocr/tessdata/tessconfigs/batch.nochop
--- a/tesseract-ocr/tessdata/tessconfigs/matdemo
+++ b/tesseract-ocr/tessdata/tessconfigs/matdemo
--- a/tesseract-ocr/tessdata/tessconfigs/msdemo
+++ b/tesseract-ocr/tessdata/tessconfigs/msdemo
--- a/tesseract-ocr/tessdata/tessconfigs/nobatch
+++ b/tesseract-ocr/tessdata/tessconfigs/nobatch
--- a/tesseract-ocr/tessdata/tessconfigs/segdemo
+++ b/tesseract-ocr/tessdata/tessconfigs/segdemo
--- a/tesseract-ocr/tesseract.exe
+++ b/tesseract-ocr/tesseract.exe
--- a/tesseract-ocr/text2image.exe
+++ b/tesseract-ocr/text2image.exe
--- a/tesseract-ocr/unicharset_extractor.exe
+++ b/tesseract-ocr/unicharset_extractor.exe
--- a/tesseract-ocr/wordlist2dawg.exe
+++ b/tesseract-ocr/wordlist2dawg.exe
--- a/tesseract-ocr/zlib1.dll
+++ b/tesseract-ocr/zlib1.dll