init

5dd18bcf · 陶书衡 · 5dd18bcf · 5dd18bcf · 5dd18bcf · 5dd18bcf
Commit 5dd18bcf authored Dec 08, 2021 by 陶书衡
27 changed files
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/OralAPI.iml
+++ b/.idea/OralAPI.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (tf-latest-base)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (tf-latest-base)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/OralAPI.iml" filepath="$PROJECT_DIR$/.idea/OralAPI.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/BiLSTMCRF.py
+++ b/BiLSTMCRF.py
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.callbacks import TensorBoard
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.optimizers import Adam
+from tensorflow.python import keras
+
+from CRF import CRF
+
+# from CRF import CRF
+
+
+class BiLSTMCRF:
+    def __init__(self, vocabSize, maxLen, tagIndexDict, tagSum, sequenceLengths=None, vecSize=100, learning_rate=0.01):
+        keras.backend.clear_session()
+        self.vocabSize = vocabSize
+        self.vecSize = vecSize
+        self.maxLen = maxLen
+        self.tagSum = tagSum
+        self.sequenceLengths = sequenceLengths
+        self.tagIndexDict = tagIndexDict
+        self.learning_rate = learning_rate
+
+        self.buildBiLSTMCRF()
+
+    def getTransParam(self, y, tagIndexDict):
+        self.trainY = np.argmax(y, axis=-1)
+        yList = self.trainY.tolist()
+        transParam = np.zeros(
+            [len(list(tagIndexDict.keys())), len(list(tagIndexDict.keys()))])
+        for rowI in range(len(yList)):
+            for colI in range(len(yList[rowI])-1):
+                transParam[yList[rowI][colI]][yList[rowI][colI+1]] += 1
+        for rowI in range(transParam.shape[0]):
+            transParam[rowI] = transParam[rowI]/np.sum(transParam[rowI])
+        return transParam
+
+    def buildBiLSTMCRF(self):
+
+        model = Sequential()
+        model.add(tf.keras.layers.Input(shape=(self.maxLen,)))
+        model.add(tf.keras.layers.Embedding(self.vocabSize, self.vecSize))
+        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
+            self.tagSum, return_sequences=True, activation="tanh"), merge_mode='sum'))
+        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
+            self.tagSum, return_sequences=True, activation="softmax"), merge_mode='sum'))
+        crf = CRF(self.tagSum, name='crf_layer')
+        model.add(crf)
+        model.compile(Adam(learning_rate=self.learning_rate), loss={
+            'crf_layer': crf.get_loss}, metrics=[crf.get_accuracy])
+        self.net = model
+
+    def fit(self, X, y, epochs=100, batchsize=32):
+        if len(y.shape) == 3:
+            y = np.argmax(y, axis=-1)
+        if self.sequenceLengths is None:
+            self.sequenceLengths = [row.shape[0] for row in y]
+        callbacks_list = [
+            tf.keras.callbacks.History(),
+            tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5,
+                                                 verbose=1, mode='auto', min_lr=1e-9),
+            tf.keras.callbacks.ModelCheckpoint("model/model.h5", monitor='get_accuracy',
+                                               verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1),
+            tf.keras.callbacks.EarlyStopping(
+                monitor='loss', min_delta=1e-5, patience=10),
+            TensorBoard(log_dir="logs", histogram_freq=1)
+            # WeightsSaver(1)
+        ]
+        history = self.net.fit(
+            X, y, epochs=epochs, callbacks=callbacks_list, batch_size=batchsize)
+
+        return history
+
+    def predict(self, X):
+        preYArr = self.net.predict(X)
+        return preYArr
+
+    def load_weights(self, model_path):
+        self.net.load_weights(model_path)
--- a/CRF.py
+++ b/CRF.py
--- a/README.md
+++ b/README.md
+# 肺结节CT影像报告实体提取
--- a/__pycache__/BiLSTMCRF.cpython-37.pyc
+++ b/__pycache__/BiLSTMCRF.cpython-37.pyc
--- a/__pycache__/CRF.cpython-37.pyc
+++ b/__pycache__/CRF.cpython-37.pyc
--- a/__pycache__/api.cpython-37.pyc
+++ b/__pycache__/api.cpython-37.pyc
--- a/__pycache__/model.cpython-37.pyc
+++ b/__pycache__/model.cpython-37.pyc
--- a/__pycache__/predict.cpython-37.pyc
+++ b/__pycache__/predict.cpython-37.pyc
--- a/__pycache__/splittxt.cpython-37.pyc
+++ b/__pycache__/splittxt.cpython-37.pyc
--- a/__pycache__/tools.cpython-37.pyc
+++ b/__pycache__/tools.cpython-37.pyc
--- a/api.py
+++ b/api.py
+import json
+import copy
+import requests
+import uuid
+from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
+
+from model import Oral
+
+oral_api = Blueprint('oral', __name__)
+
+
+@oral_api.route('/')
+def show():
+    return 'This is oral api.'
+
+
+@oral_api.route('/recg/', methods = ['POST'])
+def recognize():
+    if request.method == 'POST':
+        finding = request.form.get('finding')
+        conclusion = request.form.get('conclusion')
+        verbose = request.form.get('verbose', default = 0)
+        try:
+            verbose = int(verbose)
+        except Exception as e:
+            return jsonify({'success': False, 'description': {'error msg': 'verbose can be only 0 or 1'}}), 500
+        if verbose != 0:
+            print()
+            print(finding)
+            print(conclusion)
+        if finding is None or conclusion is None:
+            return jsonify({'success': False, 'description': {'error msg': 'invalid post body fields'}}), 500
+        elif finding == '' or conclusion == '':
+            return jsonify(
+                {'success': False, 'description': {'error msg': 'findings or conclusions cannot be empty'}}), 500
+        else:
+            try:
+                print('' if verbose == 0 else 'verbose out:')
+                oral = Oral(finding, conclusion, verbose = False if verbose == 0 else 1)
+                data = oral.get_json()
+                return jsonify({'success': True, 'description': {'data': data}}), 200
+            except Exception as e:
+                print("/n******ERROR SRART******/n")
+                print(e)
+                print("----------findind----------")
+                print(finding)
+                print("---------conclusion--------")
+                print(conclusion)
+                print("/n*******ERROR END*******/n")
+                return jsonify({'success': False, 'description': {'error msg': e}}), 500
+    else:
+        return jsonify({'success': False, 'description': {'error msg': 'Invalid methods'}}), 404
--- a/config.txt
+++ b/config.txt
+vocabSize:497
+maxLen:177
+classSum:21
--- a/main.py
+++ b/main.py
+from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
+
+from api import oral_api
+
+app = Flask(__name__)
+app.secret_key = '1234567'
+app.register_blueprint(oral_api, url_prefix = '/oral')
+
+if __name__ == '__main__':
+    # from werkzeug.contrib.fixers import ProxyFix
+    # app.wsgi_app = ProxyFix(app.wsgi_app)
+    app.run(debug = True, port = 5004, host = '0.0.0.0')
--- a/model.py
+++ b/model.py
--- a/model/model.h5
+++ b/model/model.h5
--- a/model/vocab.txt
+++ b/model/vocab.txt
+、
+白
+M
+点
+提
+钛
+h
+,
+>
+野
+Z
+图
+春
+L
+轻
+芽
+口
+内
+损
+请
+测
+处
+颞
+；
+颗
+硬
+周
+组
+皮
+发
+右
+骨
+多
+H
+先
+”
+侧
+痣
+构
+腮
+级
+部
+样
+层
+胡
+密
+解
+富
+颏
+巢
+缘
+固
+＞
+院
+6
+黑
+单
+物
+缺
+角
+透
+反
+大
+行
+张
+p
+面
+包
+宇
+左
+华
+粒
+i
+折
+伴
+到
+小
+G
+治
+囊
+3
+支
+欠
+以
+顶
+敏
+V
+灰
+F
+（
+探
+韧
+壁
+实
+像
+带
+石
+泡
+果
+血
+额
+甲
+头
+后
+清
+锁
+能
+报
+均
+碎
+.
+坏
+且
+个
+腹
+梁
+刘
+眶
+(
+切
+空
+整
+号
+审
+背
+W
+咬
+史
+肉
+含
+髓
+<
+R
+限
+质
+殊
+信
+子
+别
+谷
+粗
+影
+在
+纤
+润
+叶
+量
+占
+对
+累
+症
+浅
+失
+明
+肪
+侵
+静
+O
+或
+堆
+残
+“
+符
+覆
+颊
+倍
+送
+被
+≤
+薄
+神
+其
+度
+据
+疫
+术
+软
+T
+唇
+病
+孙
+桃
+冻
+：
+垫
+UNK
+隐
+首
+溃
+瘤
+窦
+萎
+鳞
+材
+腔
+增
+细
+开
+的
+显
+复
+照
+瑜
+低
+颈
+附
+"
+肤
+即
+成
+必
+尚
+围
+阳
+团
+b
+示
+局
+剖
+菜
+放
+腺
+孔
+综
+t
+双
+巨
+核
+待
+鞘
+查
+主
+;
+A
+底
+喉
+片
+牙
+者
+突
+分
+临
+察
+下
+已
+为
+扩
+腭
+灶
+蜡
+红
+髁
+共
+节
+重
+完
+）
+随
+高
+厌
+:
+1
+合
+约
+浸
+玻
+P
+S
+菲
+似
+期
+比
+槽
+应
+颅
+现
+簇
+]
+线
+留
+生
+端
+舌
+色
+基
+他
+位
+早
+经
+K
+[
+花
+状
+扁
+化
+及
+理
+糙
+会
+关
+另
+区
+虑
+。
+脑
+见
+疡
+黏
+平
+界
+旁
+态
+衬
+变
+范
+王
+形
+体
+相
+疗
+诊
+/
+前
+医
+径
+极
+I
+液
+证
+等
+，
+U
+检
+—
+淋
+具
+彻
+景
+直
+良
+排
+深
+外
+恶
+筛
+稍
+缩
+间
+脱
+未
+北
+巴
+破
+活
+肯
+黄
+？
+混
+龈
+断
+翼
+死
+散
+型
+犯
+m
+继
+转
+异
+a
+特
+B
+C
+%
+织
+8
+常
+D
+南
+枚
+粘
+离
+-
+升
+少
+中
+磨
+l
+璃
+肌
+考
+4
+免
+议
+梭
+E
+可
+只
+N
+视
+钙
+c
+典
+Y
+取
+该
+向
+倾
+域
+癌
+告
+)
+因
+n
+难
+导
+≥
+隔
+进
+冰
+有
+胞
+性
+慢
+晶
+乳
+×
+呈
+炎
+*
+观
+0
+r
+胸
+字
+上
+源
+床
+注
+2
+染
+原
+丰
+＜
+窝
+安
+长
+绿
+涎
+建
+感
+块
+5
+?
+9
+里
+各
+阴
+边
+肿
+来
+7
+除
+丽
+热
+膜
+w
+余
+－
+根
+移
+裂
+制
+管
+表
+
+学
+脉
+板
+针
+刮
+咽
+步
+结
+颌
+不
+鼻
+与
+蝶
+困
+定
+全
+近
+袭
+脂
+维
\ No newline at end of file
--- a/predict.py
+++ b/predict.py
+import csv
+import json
+import os
+
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+
+from BiLSTMCRF import BiLSTMCRF
+
+model_path = 'model/model.h5'
+vocab_path = 'model/vocab.txt'
+class_dict = {
+    "O": 0,
+    "B-NUMBER": 1,
+    "I-NUMBER": 2,
+    "B-SIZE": 3,
+    "I-SIZE": 4,
+    "B-ENE": 5,
+    "I-ENE": 6,
+    "B-ANATOMY": 7,
+    "I-ANATOMY": 8,
+    "B-SQUAMOUS": 9,
+    "I-SQUAMOUS": 10,
+    "B-INVASION": 11,
+    "I-INVASION": 12,
+    "B-PN": 13,
+    "I-PN": 14,
+    "B-LEVEL": 15,
+    "I-LEVEL": 16,
+    "B-OTHER": 17,
+    "I-OTHER": 18,
+    "B-DOI": 19,
+    "I-DOI": 20
+}
+maxLen = 500
+classSum = 21
+
+
+def build_input(text):
+    x = []
+    for char in text:
+        if char not in word_dict:
+            char = 'UNK'
+        x.append(word_dict.get(char))
+    x = pad_sequences([x], padding = 'post', maxlen = maxLen)
+    return x
+
+
+def load_worddict():
+    vocabs = [line.strip()
+              for line in open(vocab_path, encoding = 'utf-8')]
+    word_dict = {wd: index for index, wd in enumerate(vocabs)}
+    return word_dict
+
+
+def predict(text):
+    y_pre = []
+    str = build_input(text)
+    raw = model.predict(str)[0]
+    chars = [i for i in text]
+    tags = [label_dict[i] for i in raw][:len(text)]
+    res = list(zip(chars, tags))
+    for i, tag in enumerate(tags):
+        y_pre.append(tag)
+    return res, y_pre
+
+
+def output(txt, cnt):
+    output = []
+    flag = 0
+    start = []
+    end = []
+    tags = []
+    for i, tag in enumerate(cnt):
+        if tag == 'O':
+            if flag == 1:
+                end = i-1
+                output.append([tags, txt[start:end+1], start, end])
+            flag = 0
+            continue
+        if tag.split("-")[0] == 'B':
+            if flag == 1:
+                end = i
+                output.append([tags, txt[start:end], start, end-1])
+            flag = 1
+            start = i
+            tags = tag.split("-")[1]
+            continue
+    return output
+
+
+word_dict = load_worddict()
+vocabSize = len(word_dict) + 1
+label_dict = {j: i for i, j in class_dict.items()}
+
+model = BiLSTMCRF(vocabSize = vocabSize, maxLen = maxLen,
+                  tagIndexDict = class_dict, tagSum = classSum)
+model.load_weights(model_path)
+
+if __name__ == '__main__':
+    s = """
+“右舌”鳞状细胞癌（复发），高-中分化，灶性多核巨细胞浸润，肿瘤侵犯神经。送检淋巴结：“左颌下”1只、“颏下”1只均阴性（-）
+"""
+    a = predict(s)
+    for i in a[0]:
+        print(i)
+    b = output(s, a[1])
+    print(b)
--- a/requirements.txt
+++ b/requirements.txt
+absl-py==0.14.0
+appnope==0.1.2
+argcomplete==1.12.3
+argon2-cffi==21.1.0
+astunparse==1.6.3
+attrs==21.2.0
+backcall==0.2.0
+bleach==4.1.0
+cachetools==4.2.2
+certifi==2021.10.8
+cffi==1.14.6
+charset-normalizer==2.0.6
+click==8.0.3
+cn2an==0.5.11
+debugpy==1.4.3
+decorator==5.1.0
+defusedxml==0.7.1
+entrypoints==0.3
+Flask==2.0.2
+Flask-Login==0.5.0
+flatbuffers==1.12
+gast==0.3.3
+google-auth==1.35.0
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.32.0
+h5py==2.10.0
+idna==3.2
+importlib-metadata==4.8.1
+ipykernel==6.4.1
+ipython==7.28.0
+ipython-genutils==0.2.0
+ipywidgets==7.6.5
+itsdangerous==2.0.1
+jedi==0.18.0
+Jinja2==1.2
+jsonschema==3.2.0
+jupyter==1.0.0
+jupyter-client==7.0.3
+jupyter-console==6.4.0
+jupyter-core==4.8.1
+jupyterlab-pygments==0.1.2
+jupyterlab-widgets==1.0.2
+Keras-Preprocessing==1.1.2
+Markdown==3.3.4
+MarkupSafe==2.0.1
+matplotlib-inline==0.1.3
+mistune==0.8.4
+nbclient==0.5.4
+nbconvert==6.2.0
+nbformat==5.1.3
+nest-asyncio==1.5.1
+notebook==6.4.4
+numpy==1.19.5
+oauthlib==3.1.1
+opt-einsum==3.3.0
+packaging==21.0
+pandas==1.3.3
+pandocfilters==1.5.0
+parso==0.8.2
+pexpect==4.8.0
+pickleshare==0.7.5
+prometheus-client==0.11.0
+prompt-toolkit==3.0.20
+protobuf==3.18.0
+ptyprocess==0.7.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+Pygments==2.10.0
+pyparsing==2.4.7
+pyrsistent==0.18.0
+python-dateutil==2.8.2
+pytz==2021.1
+PyYAML==5.4.1
+pyzmq==22.3.0
+qtconsole==5.1.1
+QtPy==1.11.2
+requests==2.26.0
+requests-oauthlib==1.3.0
+rsa==4.7.2
+Send2Trash==1.8.0
+six==1.15.0
+tensorboard==2.6.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorflow==2.4.0
+tensorflow-addons==0.14.0
+tensorflow-estimator==2.4.0
+termcolor==1.1.0
+terminado==0.12.1
+testpath==0.5.0
+tornado==6.1
+traitlets==5.1.0
+typeguard==2.12.1
+typing-extensions==3.7.4.3
+urllib3==1.26.7
+wcwidth==0.2.5
+webencodings==0.5.1
+Werkzeug==2.0.2
+widgetsnbextension==3.5.1
+wrapt==1.12.1
+zipp==3.5.0
\ No newline at end of file
--- a/splittxt.py
+++ b/splittxt.py
+# coding:utf-8
+def splittxt(ImagingConclusion):
+    ImagingConclusion = ImagingConclusion + "\n"
+    ImagingConclusion = ImagingConclusion.replace("\nAE1/AE3", " AE1/AE3")
+    ImagingConclusion = ImagingConclusion.replace("\nEGFR", " EGFR")
+    ImagingConclusion = ImagingConclusion.replace("\nCK", " CK")
+    ImagingConclusion = ImagingConclusion.replace("：\n", "： ")
+    txt = []
+    text = []
+    ImagingConclusionFrist = ""
+    CuttingEdge = ""
+    CuttingLymph = ""
+    MolecularResults = ""
+    Immunohistochemistry = ""
+    CuttingEdgeID = -1
+    CuttingLymphID = -1
+    MolecularResultsID = -1
+    ImmunohistochemistryID = -1
+    delete = []
+    for i, char in enumerate(ImagingConclusion):
+        if char != '\n':
+            txt.append(char)
+        else:
+            if txt != []:
+                str = ''.join(txt)
+                text.append(str)
+                txt = []
+    for i, block in enumerate(text):
+        if block.find("送检切缘") != -1:
+            CuttingEdgeID = i
+            CuttingEdge = CuttingEdge + block + "\n"
+            delete.append(CuttingEdgeID)
+        elif block.find("送检淋巴结") != -1:
+            CuttingLymphID = i
+            CuttingLymph = CuttingLymph + block + "\n"
+            delete.append(CuttingLymphID)
+        elif block.find("分子结果") != -1:
+            MolecularResultsID = i
+            MolecularResults = MolecularResults + block + "\n"
+            delete.append(MolecularResultsID)
+        elif block.find("免疫组化结果") != -1:
+            ImmunohistochemistryID = i
+            Immunohistochemistry = Immunohistochemistry + block + "\n"
+            delete.append(ImmunohistochemistryID)
+    j = 0
+    for i in range(len(text)):
+        if i in delete:
+            text.pop(j)
+        else:
+            ImagingConclusionFrist = ImagingConclusionFrist + text[j] + "\n"
+            j = j + 1
+    
+    MolecularResults.replace(":", "")
+    Immunohistochemistry.replace(":", "")
+    MolecularResults = MolecularResults[MolecularResults.find("分子结果") + 4:]
+    Immunohistochemistry = Immunohistochemistry[Immunohistochemistry.find("免疫组化结果") + 6:]
+    return ImagingConclusionFrist, CuttingEdge, CuttingLymph, MolecularResults, Immunohistochemistry
+
+
+# print(splittxt(
+#     "原发灶：一带黏膜组织6*5*3cm，切面见一肿块3*2*2cm，灰白，界不清（1）\n送检切缘：前、后、内、外、底均0.5cm\n左颈大块：6*4*2cm，为脂肪血管及少量腺体，灰黄。\n左I区: 7只直径0.8-1.2cm。\n左II区: 1只直径1cm。\n左III区: 1只直径1.5cm。\n\n“左舌”黏膜鳞状细胞癌，高-中分化，DOI＞10mm\n送检切缘：“前、后、内、外、底”均阴性（-）\n“左颌下腺”轻度慢性炎\n送检淋巴结：“左”“I”1/7只有肿瘤转移（+），余及“II”1只（为软组织），“III”1只（为软组织）均阴性（-）\n免疫组化结果NI21-668\nAE1/AE3+ CKH+ CK5/6+ EGFR部分+ Ki67部分+ CD31- S-100- P16-\n北院分子结果(NM2021-0302)：EGFR扩增探针 FISH（未见明显扩增（-））\n"))
+
+if __name__ == '__main__':
+    a = splittxt("""
+“右上颌”黏膜鳞状细胞癌，高-中分化，DOI＞10mm
+“右颌下腺”慢性炎
+送检淋巴结：“右I区”1/5只(其中1只为软组织)有肿瘤转移（+），余及“右II区”6只、“右III区”6只、“右IV区”1只（为软组织）、“右V区”10只均阴性（-）
+南院分子结果(M2021-1469)：EGFR扩增探针 FISH（-）
+南院免疫组化结果(I2021-3111)：CKH（+），CK5/6（+），P16（-），Ki67（热点区约30－40%+），CD31（-），S100（-），EGFR（+），P53（-）。
+""")
+    for i in a:
+        print(i.strip())
+        print('---------------------------')
--- a/tools.py
+++ b/tools.py
+import decimal
+
+
+def pN(num, d, ENE):
+    cnt = ""
+    if num == 0:
+        cnt = "pN0"
+    elif num == 1 and d <= 3 and ENE == '无':
+        cnt = "pN1"
+    else:
+        cnt = "pN2+"
+    return cnt
+
+
+def differentiation(txt):
+    cnt = [0, 0, 0]
+    ans = ""
+    if txt.find("高") != -1:
+        cnt[0] = 1
+        ans = ans + "Ⅰ级高分化\n"
+    if txt.find("中") != -1:
+        cnt[1] = 1
+        ans = ans + "Ⅱ级中分化\n"
+    if txt.find("低") != -1:
+        cnt[2] = 1
+        ans = ans + "Ⅲ级低分化\n"
+    if cnt == [0, 0, 0]:
+        ans = ans + "Ⅳ级未分化\n"
+    return cnt, ans
+
+
+def exactNumber(txt):
+    cnt = []
+    number = ""
+    for i, char in enumerate(txt):
+        if char in "0123456789./":
+            number = number + char
+        else:
+            cnt.append(number)
+            number = ""
+    cnt.append(number)
+    cnt = [i for i in cnt if i != '']
+    return cnt
+
+
+def pT(txt):
+    txt.replace(" ", "")
+    cnt = ''
+    if txt.find(">10mm") != -1:
+        cnt = "pt3"
+        return cnt
+    elif txt.find(">5mm") != -1:
+        cnt = "pt2"
+        return cnt
+    score = max([decimal.Decimal(i) for i in exactNumber(txt)])
+    if score <= 5:
+        cnt = "pT1"
+    elif score > 5 and score <= 10:
+        cnt = "pT2"
+    elif score > 10:
+        cnt = "pT3"
+    return cnt
+
+
+def findDegree(txt):
+    cnt = [0, 0, 0]
+    ans = ""
+    if txt.find("轻") != -1:
+        cnt[0] = 1
+        ans = ans + "轻度\n"
+    if txt.find("中") != -1:
+        cnt[1] = 1
+        ans = ans + "中度\n"
+    if txt.find("重") != -1:
+        cnt[2] = 1
+        ans = ans + "重度\n"
+    if cnt == [0, 0, 0]:
+        ans = ans + ""
+    return ans
+
+
+def findlymph(txt):
+    if txt.find("淋巴结") != -1:
+        return 1
+    else:
+        return 0
+
+
+def CuttingEdgePathology(txt):
+    cnt = ""
+    if txt.find("阳性") != -1 or txt.find("+") != -1:
+        cnt = "阳性（+）"
+    elif txt.find("异常增生") != -1:
+        cnt = "有黏膜上皮异常增生"
+    elif txt.find("阴性") != -1 or txt.find("-") != -1:
+        cnt = "阴性（-）"
+    else:
+        cnt = "其他情况"
+    return cnt
+
+
+def FindChar(txt):
+    cnt = []
+    ans = []
+    charlist = ["分子结果", "免疫组化结果", "(", "（", ":", "："]
+    for i, char in enumerate(charlist):
+        cnt.append(txt.find(char))
+    for i, flag in enumerate(cnt):
+        if flag != -1:
+            ans.append(flag)
+    ans.append(-1)
+    return ans
+
+if __name__ == '__main__':
+    print(exactNumber('mm'))
\ No newline at end of file