init

5dd18bcf · 陶书衡 · 5dd18bcf · 5dd18bcf · 5dd18bcf · 5dd18bcf
Commit 5dd18bcf authored Dec 08, 2021 by 陶书衡
27 changed files
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/OralAPI.iml
+++ b/.idea/OralAPI.iml
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="Python 3.7 (tf-latest-base)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ b/.idea/inspectionProfiles/Project_Default.xml
+<component name="InspectionProjectProfileManager">
+  <profile version="1.0">
+    <option name="myName" value="Project Default" />
+    <inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <Languages>
+        <language minSize="147" name="Python" />
+      </Languages>
+    </inspection_tool>
+    <inspection_tool class="JupyterPackageInspection" enabled="false" level="WARNING" enabled_by_default="false" />
+    <inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
+      <option name="ignoredPackages">
+        <value>
+          <list size="127">
+            <item index="0" class="java.lang.String" itemvalue="h5py" />
+            <item index="1" class="java.lang.String" itemvalue="six" />
+            <item index="2" class="java.lang.String" itemvalue="keras-bert" />
+            <item index="3" class="java.lang.String" itemvalue="keras-transformer" />
+            <item index="4" class="java.lang.String" itemvalue="absl-py" />
+            <item index="5" class="java.lang.String" itemvalue="google-pasta" />
+            <item index="6" class="java.lang.String" itemvalue="protobuf" />
+            <item index="7" class="java.lang.String" itemvalue="decorator" />
+            <item index="8" class="java.lang.String" itemvalue="tensorflow-estimator" />
+            <item index="9" class="java.lang.String" itemvalue="joblib" />
+            <item index="10" class="java.lang.String" itemvalue="threadpoolctl" />
+            <item index="11" class="java.lang.String" itemvalue="opt-einsum" />
+            <item index="12" class="java.lang.String" itemvalue="scikit-learn" />
+            <item index="13" class="java.lang.String" itemvalue="PyYAML" />
+            <item index="14" class="java.lang.String" itemvalue="cycler" />
+            <item index="15" class="java.lang.String" itemvalue="gast" />
+            <item index="16" class="java.lang.String" itemvalue="numpy" />
+            <item index="17" class="java.lang.String" itemvalue="importlib-metadata" />
+            <item index="18" class="java.lang.String" itemvalue="Keras-Preprocessing" />
+            <item index="19" class="java.lang.String" itemvalue="tensorflow" />
+            <item index="20" class="java.lang.String" itemvalue="Pygments" />
+            <item index="21" class="java.lang.String" itemvalue="pyzmq" />
+            <item index="22" class="java.lang.String" itemvalue="certifi" />
+            <item index="23" class="java.lang.String" itemvalue="prompt-toolkit" />
+            <item index="24" class="java.lang.String" itemvalue="cached-property" />
+            <item index="25" class="java.lang.String" itemvalue="Markdown" />
+            <item index="26" class="java.lang.String" itemvalue="scipy" />
+            <item index="27" class="java.lang.String" itemvalue="Werkzeug" />
+            <item index="28" class="java.lang.String" itemvalue="opencv-python" />
+            <item index="29" class="java.lang.String" itemvalue="parso" />
+            <item index="30" class="java.lang.String" itemvalue="wrapt" />
+            <item index="31" class="java.lang.String" itemvalue="astor" />
+            <item index="32" class="java.lang.String" itemvalue="ipython" />
+            <item index="33" class="java.lang.String" itemvalue="kiwisolver" />
+            <item index="34" class="java.lang.String" itemvalue="typing-extensions" />
+            <item index="35" class="java.lang.String" itemvalue="jupyter-client" />
+            <item index="36" class="java.lang.String" itemvalue="ipykernel" />
+            <item index="37" class="java.lang.String" itemvalue="Keras-Applications" />
+            <item index="38" class="java.lang.String" itemvalue="appnope" />
+            <item index="39" class="java.lang.String" itemvalue="pandas" />
+            <item index="40" class="java.lang.String" itemvalue="termcolor" />
+            <item index="41" class="java.lang.String" itemvalue="tensorboard" />
+            <item index="42" class="java.lang.String" itemvalue="matplotlib" />
+            <item index="43" class="java.lang.String" itemvalue="grpcio" />
+            <item index="44" class="java.lang.String" itemvalue="Keras" />
+            <item index="45" class="java.lang.String" itemvalue="pytz" />
+            <item index="46" class="java.lang.String" itemvalue="Pillow" />
+            <item index="47" class="java.lang.String" itemvalue="seqeval" />
+            <item index="48" class="java.lang.String" itemvalue="keras-embed-sim" />
+            <item index="49" class="java.lang.String" itemvalue="sklearn" />
+            <item index="50" class="java.lang.String" itemvalue="keras-position-wise-feed-forward" />
+            <item index="51" class="java.lang.String" itemvalue="keras-pos-embd" />
+            <item index="52" class="java.lang.String" itemvalue="keras-self-attention" />
+            <item index="53" class="java.lang.String" itemvalue="keras-layer-normalization" />
+            <item index="54" class="java.lang.String" itemvalue="keras-multi-head" />
+            <item index="55" class="java.lang.String" itemvalue="jedi" />
+            <item index="56" class="java.lang.String" itemvalue="pyDeprecate" />
+            <item index="57" class="java.lang.String" itemvalue="pytorch-lightning" />
+            <item index="58" class="java.lang.String" itemvalue="aiohttp" />
+            <item index="59" class="java.lang.String" itemvalue="packaging" />
+            <item index="60" class="java.lang.String" itemvalue="torch" />
+            <item index="61" class="java.lang.String" itemvalue="pyparsing" />
+            <item index="62" class="java.lang.String" itemvalue="torchvision" />
+            <item index="63" class="java.lang.String" itemvalue="traitlets" />
+            <item index="64" class="java.lang.String" itemvalue="testpath" />
+            <item index="65" class="java.lang.String" itemvalue="pickleshare" />
+            <item index="66" class="java.lang.String" itemvalue="python-dateutil" />
+            <item index="67" class="java.lang.String" itemvalue="defusedxml" />
+            <item index="68" class="java.lang.String" itemvalue="nbclient" />
+            <item index="69" class="java.lang.String" itemvalue="QtPy" />
+            <item index="70" class="java.lang.String" itemvalue="MarkupSafe" />
+            <item index="71" class="java.lang.String" itemvalue="pycparser" />
+            <item index="72" class="java.lang.String" itemvalue="pyasn1-modules" />
+            <item index="73" class="java.lang.String" itemvalue="ipython-genutils" />
+            <item index="74" class="java.lang.String" itemvalue="jupyterlab-widgets" />
+            <item index="75" class="java.lang.String" itemvalue="bleach" />
+            <item index="76" class="java.lang.String" itemvalue="oauthlib" />
+            <item index="77" class="java.lang.String" itemvalue="astunparse" />
+            <item index="78" class="java.lang.String" itemvalue="entrypoints" />
+            <item index="79" class="java.lang.String" itemvalue="jsonschema" />
+            <item index="80" class="java.lang.String" itemvalue="notebook" />
+            <item index="81" class="java.lang.String" itemvalue="qtconsole" />
+            <item index="82" class="java.lang.String" itemvalue="terminado" />
+            <item index="83" class="java.lang.String" itemvalue="argcomplete" />
+            <item index="84" class="java.lang.String" itemvalue="tensorboard-data-server" />
+            <item index="85" class="java.lang.String" itemvalue="pexpect" />
+            <item index="86" class="java.lang.String" itemvalue="jupyterlab-pygments" />
+            <item index="87" class="java.lang.String" itemvalue="nbconvert" />
+            <item index="88" class="java.lang.String" itemvalue="attrs" />
+            <item index="89" class="java.lang.String" itemvalue="cn2an" />
+            <item index="90" class="java.lang.String" itemvalue="flatbuffers" />
+            <item index="91" class="java.lang.String" itemvalue="backcall" />
+            <item index="92" class="java.lang.String" itemvalue="widgetsnbextension" />
+            <item index="93" class="java.lang.String" itemvalue="charset-normalizer" />
+            <item index="94" class="java.lang.String" itemvalue="idna" />
+            <item index="95" class="java.lang.String" itemvalue="rsa" />
+            <item index="96" class="java.lang.String" itemvalue="jupyter-core" />
+            <item index="97" class="java.lang.String" itemvalue="tensorflow-addons" />
+            <item index="98" class="java.lang.String" itemvalue="matplotlib-inline" />
+            <item index="99" class="java.lang.String" itemvalue="ptyprocess" />
+            <item index="100" class="java.lang.String" itemvalue="cffi" />
+            <item index="101" class="java.lang.String" itemvalue="pandocfilters" />
+            <item index="102" class="java.lang.String" itemvalue="wcwidth" />
+            <item index="103" class="java.lang.String" itemvalue="pyasn1" />
+            <item index="104" class="java.lang.String" itemvalue="requests" />
+            <item index="105" class="java.lang.String" itemvalue="Jinja2" />
+            <item index="106" class="java.lang.String" itemvalue="typeguard" />
+            <item index="107" class="java.lang.String" itemvalue="pyrsistent" />
+            <item index="108" class="java.lang.String" itemvalue="requests-oauthlib" />
+            <item index="109" class="java.lang.String" itemvalue="jupyter" />
+            <item index="110" class="java.lang.String" itemvalue="tensorboard-plugin-wit" />
+            <item index="111" class="java.lang.String" itemvalue="zipp" />
+            <item index="112" class="java.lang.String" itemvalue="nest-asyncio" />
+            <item index="113" class="java.lang.String" itemvalue="urllib3" />
+            <item index="114" class="java.lang.String" itemvalue="ipywidgets" />
+            <item index="115" class="java.lang.String" itemvalue="tornado" />
+            <item index="116" class="java.lang.String" itemvalue="google-auth-oauthlib" />
+            <item index="117" class="java.lang.String" itemvalue="nbformat" />
+            <item index="118" class="java.lang.String" itemvalue="Send2Trash" />
+            <item index="119" class="java.lang.String" itemvalue="prometheus-client" />
+            <item index="120" class="java.lang.String" itemvalue="mistune" />
+            <item index="121" class="java.lang.String" itemvalue="jupyter-console" />
+            <item index="122" class="java.lang.String" itemvalue="cachetools" />
+            <item index="123" class="java.lang.String" itemvalue="debugpy" />
+            <item index="124" class="java.lang.String" itemvalue="argon2-cffi" />
+            <item index="125" class="java.lang.String" itemvalue="webencodings" />
+            <item index="126" class="java.lang.String" itemvalue="google-auth" />
+          </list>
+        </value>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="E501" />
+          <option value="E122" />
+          <option value="W292" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
+      <option name="ignoredErrors">
+        <list>
+          <option value="N803" />
+          <option value="N802" />
+          <option value="N806" />
+        </list>
+      </option>
+    </inspection_tool>
+    <inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
+      <option name="processCode" value="true" />
+      <option name="processLiterals" value="true" />
+      <option name="processComments" value="true" />
+    </inspection_tool>
+  </profile>
+</component>
\ No newline at end of file
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (tf-latest-base)" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/OralAPI.iml" filepath="$PROJECT_DIR$/.idea/OralAPI.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
--- a/BiLSTMCRF.py
+++ b/BiLSTMCRF.py
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.callbacks import TensorBoard
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.optimizers import Adam
+from tensorflow.python import keras
+
+from CRF import CRF
+
+# from CRF import CRF
+
+
+class BiLSTMCRF:
+    def __init__(self, vocabSize, maxLen, tagIndexDict, tagSum, sequenceLengths=None, vecSize=100, learning_rate=0.01):
+        keras.backend.clear_session()
+        self.vocabSize = vocabSize
+        self.vecSize = vecSize
+        self.maxLen = maxLen
+        self.tagSum = tagSum
+        self.sequenceLengths = sequenceLengths
+        self.tagIndexDict = tagIndexDict
+        self.learning_rate = learning_rate
+
+        self.buildBiLSTMCRF()
+
+    def getTransParam(self, y, tagIndexDict):
+        self.trainY = np.argmax(y, axis=-1)
+        yList = self.trainY.tolist()
+        transParam = np.zeros(
+            [len(list(tagIndexDict.keys())), len(list(tagIndexDict.keys()))])
+        for rowI in range(len(yList)):
+            for colI in range(len(yList[rowI])-1):
+                transParam[yList[rowI][colI]][yList[rowI][colI+1]] += 1
+        for rowI in range(transParam.shape[0]):
+            transParam[rowI] = transParam[rowI]/np.sum(transParam[rowI])
+        return transParam
+
+    def buildBiLSTMCRF(self):
+
+        model = Sequential()
+        model.add(tf.keras.layers.Input(shape=(self.maxLen,)))
+        model.add(tf.keras.layers.Embedding(self.vocabSize, self.vecSize))
+        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
+            self.tagSum, return_sequences=True, activation="tanh"), merge_mode='sum'))
+        model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
+            self.tagSum, return_sequences=True, activation="softmax"), merge_mode='sum'))
+        crf = CRF(self.tagSum, name='crf_layer')
+        model.add(crf)
+        model.compile(Adam(learning_rate=self.learning_rate), loss={
+            'crf_layer': crf.get_loss}, metrics=[crf.get_accuracy])
+        self.net = model
+
+    def fit(self, X, y, epochs=100, batchsize=32):
+        if len(y.shape) == 3:
+            y = np.argmax(y, axis=-1)
+        if self.sequenceLengths is None:
+            self.sequenceLengths = [row.shape[0] for row in y]
+        callbacks_list = [
+            tf.keras.callbacks.History(),
+            tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5,
+                                                 verbose=1, mode='auto', min_lr=1e-9),
+            tf.keras.callbacks.ModelCheckpoint("model/model.h5", monitor='get_accuracy',
+                                               verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1),
+            tf.keras.callbacks.EarlyStopping(
+                monitor='loss', min_delta=1e-5, patience=10),
+            TensorBoard(log_dir="logs", histogram_freq=1)
+            # WeightsSaver(1)
+        ]
+        history = self.net.fit(
+            X, y, epochs=epochs, callbacks=callbacks_list, batch_size=batchsize)
+
+        return history
+
+    def predict(self, X):
+        preYArr = self.net.predict(X)
+        return preYArr
+
+    def load_weights(self, model_path):
+        self.net.load_weights(model_path)
--- a/CRF.py
+++ b/CRF.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Orginal implementation from keras_contrib/layers/crf
+# ==============================================================================
+"""Implementing Conditional Random Field layer."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow_addons.text.crf import crf_decode, crf_log_likelihood
+from tensorflow_addons.utils import keras_utils
+
+
+# @keras_utils.register_keras_custom_object
+class CRF(tf.keras.layers.Layer):
+    """Linear chain conditional random field (CRF).
+
+    Examples:
+
+    ```python
+        from tensorflow_addons.layers import CRF
+
+        model = Sequential()
+        model.add(Embedding(3001, 300, mask_zero=True)
+
+        crf = CRF(10, name='crf_layer')
+        model.add(crf)
+
+        model.compile('adam', loss={'crf_layer': crf.loss})
+
+        model.fit(x, y)
+    ```
+
+    Arguments:
+        units: Positive integer, dimensionality of the output space,
+            should equal to tag num.
+        chain_initializer: Initializer for the `chain_kernel` weights matrix,
+            used for the CRF chain energy.
+            (see [initializers](../initializers.md)).
+        chain_regularizer: Regularizer function applied to
+            the `chain_kernel` weights matrix.
+        chain_constraint: Constraint function applied to
+            the `chain_kernel` weights matrix.
+        use_boundary: Boolean (default True), indicating if trainable
+            start-end chain energies should be added to model.
+        boundary_initializer: Initializer for the `left_boundary`,
+            'right_boundary' weights vectors,
+            used for the start/left and end/right boundary energy.
+        boundary_regularizer: Regularizer function applied to
+            the 'left_boundary', 'right_boundary' weight vectors.
+        boundary_constraint: Constraint function applied to
+            the `left_boundary`, `right_boundary` weights vectors.
+        use_kernel: Boolean (default True), indicating if apply
+            a fully connected layer before CRF op.
+        kernel_initializer: Initializer for the `kernel` weights matrix,
+            used for the linear transformation of the inputs.
+        kernel_regularizer: Regularizer function applied to
+            the `kernel` weights matrix.
+        kernel_constraint: Constraint function applied to
+            the `kernel` weights matrix.
+        use_bias: Boolean (default True), whether the layer uses a bias vector.
+        bias_initializer: Initializer for the bias vector.
+        bias_regularizer: Regularizer function applied to the bias vector.
+        bias_constraint: Constraint function applied to the bias vector.
+        activation: default value is 'linear', Activation function to use.
+
+    Input shape:
+        3D tensor with shape: `(batch_size, sequence_length, feature_size)`.
+
+    Output shape:
+        2D tensor (dtype: int32) with shape: `(batch_size, sequence_length)`.
+
+    Masking:
+        This layer supports masking
+        (2D tensor, shape: `(batch_size, sequence_length)`)
+        for input data with a variable number of timesteps.
+        This layer output same make tensor,
+        NOTICE this may cause issue when you
+        use some keras loss and metrics function which usually expect 1D mask.
+
+    Loss function:
+        Due to the TF 2.0 version support eager execution be default,
+        there is no way can implement CRF loss as independent loss function.
+        Thus, user should use loss method of this layer.
+        See Examples (above) for detailed usage.
+
+    References:
+        - [Conditional Random Field](https://en.wikipedia.org/wiki/Conditional_random_field)
+    """
+
+    def __init__(self,
+                 units,
+                 chain_initializer="orthogonal",
+                 chain_regularizer=None,
+                 chain_constraint=None,
+                 use_boundary=True,
+                 boundary_initializer="zeros",
+                 boundary_regularizer=None,
+                 boundary_constraint=None,
+                 use_kernel=True,
+                 kernel_initializer="glorot_uniform",
+                 kernel_regularizer=None,
+                 kernel_constraint=None,
+                 use_bias=True,
+                 bias_initializer="zeros",
+                 bias_regularizer=None,
+                 bias_constraint=None,
+                 activation="linear",
+                 **kwargs):
+        super(CRF, self).__init__(**kwargs)
+
+        # setup mask supporting flag, used by base class (the Layer)
+        # because base class's init method will set it to False unconditionally
+        # So this assigned must be executed after call base class's init method
+        self.supports_masking = True
+
+        self.units = units  # numbers of tags
+
+        self.use_boundary = use_boundary
+        self.use_bias = use_bias
+        self.use_kernel = use_kernel
+
+        self.activation = tf.keras.activations.get(activation)
+
+        self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
+        self.chain_initializer = tf.keras.initializers.get(chain_initializer)
+        self.boundary_initializer = tf.keras.initializers.get(
+            boundary_initializer)
+        self.bias_initializer = tf.keras.initializers.get(bias_initializer)
+
+        self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
+        self.chain_regularizer = tf.keras.regularizers.get(chain_regularizer)
+        self.boundary_regularizer = tf.keras.regularizers.get(
+            boundary_regularizer)
+        self.bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
+
+        self.kernel_constraint = tf.keras.constraints.get(kernel_constraint)
+        self.chain_constraint = tf.keras.constraints.get(chain_constraint)
+        self.boundary_constraint = tf.keras.constraints.get(
+            boundary_constraint)
+        self.bias_constraint = tf.keras.constraints.get(bias_constraint)
+
+        # values will be assigned in method
+        self.input_spec = None
+
+        # value remembered for loss/metrics function
+        self.potentials = None
+        self.sequence_length = None
+        self.mask = None
+
+        # global variable
+        self.kernel = None
+        self.chain_kernel = None
+        self.bias = None
+        self.left_boundary = None
+        self.right_boundary = None
+
+    def build(self, input_shape):
+        input_shape = tuple(tf.TensorShape(input_shape).as_list())
+
+        # see API docs of InputSpec for more detail
+        self.input_spec = [tf.keras.layers.InputSpec(shape=input_shape)]
+
+        feature_size = input_shape[-1]
+
+        if self.use_kernel:
+            # weights that mapping arbitrary tensor to correct shape
+            self.kernel = self.add_weight(
+                shape=(feature_size, self.units),
+                name="kernel",
+                initializer=self.kernel_initializer,
+                regularizer=self.kernel_regularizer,
+                constraint=self.kernel_constraint,
+            )
+
+        # weights that work as transfer probability of each tags
+        self.chain_kernel = self.add_weight(
+            shape=(self.units, self.units),
+            name="chain_kernel",
+            initializer=self.chain_initializer,
+            regularizer=self.chain_regularizer,
+            constraint=self.chain_constraint,
+        )
+
+        # bias that works with self.kernel
+        if self.use_kernel and self.use_bias:
+            self.bias = self.add_weight(
+                shape=(self.units, ),
+                name="bias",
+                initializer=self.bias_initializer,
+                regularizer=self.bias_regularizer,
+                constraint=self.bias_constraint,
+            )
+        else:
+            self.bias = 0
+
+        # weight of <START> to tag probability and tag to <END> probability
+        if self.use_boundary:
+            self.left_boundary = self.add_weight(
+                shape=(self.units, ),
+                name="left_boundary",
+                initializer=self.boundary_initializer,
+                regularizer=self.boundary_regularizer,
+                constraint=self.boundary_constraint,
+            )
+            self.right_boundary = self.add_weight(
+                shape=(self.units, ),
+                name="right_boundary",
+                initializer=self.boundary_initializer,
+                regularizer=self.boundary_regularizer,
+                constraint=self.boundary_constraint,
+            )
+
+        # or directly call self.built = True
+        super(CRF, self).build(input_shape)
+
+    def call(self, inputs, mask=None, **kwargs):
+        # mask: Tensor(shape=(batch_size, sequence_length), dtype=bool) or None
+
+        if mask is not None:
+            assert (tf.keras.backend.ndim(mask) == 2
+                    ), "Input mask to CRF must have dim 2 if not None"
+
+        # left padding of mask is not supported, due the underline CRF function
+        # detect it and report it to user
+        first_mask = None
+        if mask is not None:
+            left_boundary_mask = self._compute_mask_left_boundary(mask)
+            first_mask = left_boundary_mask[:, 0]
+
+        # remember this value for later use
+        self.mask = mask
+
+        if first_mask is not None:
+            with tf.control_dependencies([
+                    tf.debugging.assert_equal(
+                        tf.math.reduce_all(first_mask),
+                        tf.constant(True),
+                        message="Currently, CRF layer do not support left padding"
+                    )
+            ]):
+                self.potentials = self._dense_layer(inputs)
+        else:
+            self.potentials = self._dense_layer(inputs)
+
+        # appending boundary probability info
+        if self.use_boundary:
+            self.potentials = self.add_boundary_energy(
+                self.potentials, mask, self.left_boundary, self.right_boundary)
+
+        self.sequence_length = self._get_sequence_length(inputs, mask)
+
+        decoded_sequence, _ = self.get_viterbi_decoding(
+            self.potentials, self.sequence_length)
+
+        return decoded_sequence
+
+    def _get_sequence_length(self, input_, mask):
+        """
+        Currently underline CRF fucntion (provided by tensorflow_addons.text.crf)
+        do not support bi-direction masking (left padding / right padding),
+        it support right padding by tell it the sequence length.
+
+        this function is compute the sequence length from input and mask.
+        """
+        if mask is not None:
+            int_mask = tf.keras.backend.cast(mask, tf.int8)
+            sequence_length = self.mask_to_sequence_length(int_mask)
+        else:
+            # make a mask tensor from input, then used to generate sequence_length
+            input_energy_shape = tf.shape(input_)
+            raw_input_shape = tf.slice(input_energy_shape, [0], [2])
+            alt_mask = tf.ones(raw_input_shape)
+
+            sequence_length = self.mask_to_sequence_length(alt_mask)
+
+        return sequence_length
+
+    def mask_to_sequence_length(self, mask):
+        """
+        compute sequence length from mask
+        """
+        sequence_length = tf.keras.backend.cast(
+            tf.keras.backend.sum(mask, 1), tf.int64)
+        return sequence_length
+
+    @staticmethod
+    def _compute_mask_right_boundary(mask):
+        """
+        input mask: 0011100, output left_boundary: 0000100
+        """
+        # shift mask to left by 1: 0011100 => 0111000
+        offset = 1
+        left_shifted_mask = tf.keras.backend.concatenate(
+            [mask[:, offset:],
+             tf.keras.backend.zeros_like(mask[:, :offset])],
+            axis=1)
+
+        # TODO(howl-anderson): for below code
+        # Original code in keras_contrib:
+        # end_mask = K.cast(
+        #   K.greater(self.shift_left(mask), mask),
+        #   K.floatx()
+        # )
+        # May have a bug, it's better confirmed
+        # by the original keras_contrib maintainer
+        # Luiz Felix (github: lzfelix),
+        # mailed him already and waiting for reply.
+
+        # 0011100 > 0111000 => 0000100
+        right_boundary = tf.keras.backend.greater(mask, left_shifted_mask)
+
+        return right_boundary
+
+    @staticmethod
+    def _compute_mask_left_boundary(mask):
+        """
+        input mask: 0011100, output left_boundary: 0010000
+        """
+        # shift mask to right by 1: 0011100 => 0001110
+        offset = 1
+        right_shifted_mask = tf.keras.backend.concatenate(
+            [tf.keras.backend.zeros_like(mask[:, :offset]), mask[:, :-offset]],
+            axis=1)
+
+        # 0011100 > 0001110 => 0010000
+        left_boundary = tf.keras.backend.greater(mask, right_shifted_mask)
+
+        return left_boundary
+
+    def add_boundary_energy(self, potentials, mask, start, end):
+        def expend_scalar_to_3d(x):
+            # expend tensor from shape (x, ) to (1, 1, x)
+            return tf.keras.backend.expand_dims(
+                tf.keras.backend.expand_dims(x, 0), 0)
+
+        start = expend_scalar_to_3d(start)
+        end = expend_scalar_to_3d(end)
+        if mask is None:
+            potentials = tf.keras.backend.concatenate(
+                [potentials[:, :1, :] + start, potentials[:, 1:, :]], axis=1)
+            potentials = tf.keras.backend.concatenate(
+                [potentials[:, :-1, :], potentials[:, -1:, :] + end], axis=1)
+        else:
+            mask = tf.keras.backend.expand_dims(
+                tf.keras.backend.cast(mask, start.dtype), axis=-1)
+            start_mask = tf.keras.backend.cast(
+                self._compute_mask_left_boundary(mask),
+                start.dtype,
+            )
+
+            end_mask = tf.keras.backend.cast(
+                self._compute_mask_right_boundary(mask),
+                end.dtype,
+            )
+            potentials = potentials + start_mask * start
+            potentials = potentials + end_mask * end
+        return potentials
+
+    def get_viterbi_decoding(self, potentials, sequence_length):
+        # decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`
+        decode_tags, best_score = crf_decode(potentials, self.chain_kernel,
+                                             sequence_length)
+
+        return decode_tags, best_score
+
+    def get_config(self):
+        # used for loading model from disk
+        config = {
+            "units":
+            self.units,
+            "use_boundary":
+            self.use_boundary,
+            "use_bias":
+            self.use_bias,
+            "use_kernel":
+            self.use_kernel,
+            "kernel_initializer":
+            tf.keras.initializers.serialize(self.kernel_initializer),
+            "chain_initializer":
+            tf.keras.initializers.serialize(self.chain_initializer),
+            "boundary_initializer":
+            tf.keras.initializers.serialize(self.boundary_initializer),
+            "bias_initializer":
+            tf.keras.initializers.serialize(self.bias_initializer),
+            "activation":
+            tf.keras.activations.serialize(self.activation),
+            "kernel_regularizer":
+            tf.keras.regularizers.serialize(self.kernel_regularizer),
+            "chain_regularizer":
+            tf.keras.regularizers.serialize(self.chain_regularizer),
+            "boundary_regularizer":
+            tf.keras.regularizers.serialize(self.boundary_regularizer),
+            "bias_regularizer":
+            tf.keras.regularizers.serialize(self.bias_regularizer),
+            "kernel_constraint":
+            tf.keras.constraints.serialize(self.kernel_constraint),
+            "chain_constraint":
+            tf.keras.constraints.serialize(self.chain_constraint),
+            "boundary_constraint":
+            tf.keras.constraints.serialize(self.boundary_constraint),
+            "bias_constraint":
+            tf.keras.constraints.serialize(self.bias_constraint)
+        }
+        base_config = super(CRF, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def compute_output_shape(self, input_shape):
+        output_shape = input_shape[:2]
+        return output_shape
+
+    def compute_mask(self, input_, mask=None):
+        # """
+        # Set output mask to be 1D tensor, so loss method of this class can work without error.
+        # But there is big short come:
+        # layer, loss and metrics after this layer
+        # can not access meaningful mask. Which mean they can not work correctly.
+        # User only can get correct loss and metrics value from methods of this layer.
+        # """
+        # if mask is not None:
+        #     # transform mask from shape (?, ?) to (?, )
+        #     new_mask = tf.keras.backend.any(mask, axis=1)
+        #     return new_mask
+
+        return mask
+
+    def get_negative_log_likelihood(self, y_true):
+        # TODO: remove typing cast
+        self.potentials = tf.keras.backend.cast(self.potentials, tf.float32)
+        y_true = tf.keras.backend.cast(y_true, tf.int32)
+        self.sequence_length = tf.keras.backend.cast(self.sequence_length,
+                                                     tf.int32)
+        # self.chain_kernel = tf.keras.backend.cast(self.chain_kernel,
+        #                                           tf.float32)
+
+        log_likelihood, _ = crf_log_likelihood(
+            self.potentials, y_true, self.sequence_length, self.chain_kernel)
+
+        return -log_likelihood
+
+    def get_loss(self, y_true, y_pred):
+        # we don't use y_pred, but caller pass it anyway, ignore it
+        return self.get_negative_log_likelihood(y_true)
+
+    def get_accuracy(self, y_true, y_pred):
+        judge = tf.keras.backend.cast(
+            tf.keras.backend.equal(y_pred, y_true), tf.keras.backend.floatx())
+        if self.mask is None:
+            return tf.keras.backend.mean(judge)
+        else:
+            mask = tf.keras.backend.cast(self.mask, tf.keras.backend.floatx())
+            return (tf.keras.backend.sum(judge * mask) /
+                    tf.keras.backend.sum(mask))
+
+    def _dense_layer(self, input_):
+        if self.use_kernel:
+            output = self.activation(
+                tf.keras.backend.dot(input_, self.kernel) + self.bias)
+        else:
+            output = input_
+
+        return tf.keras.backend.cast(output, self.chain_kernel.dtype)
+
+    def __call__(self, inputs, *args, **kwargs):
+        outputs = super(CRF, self).__call__(inputs, *args, **kwargs)
+
+        # A hack that add _keras_history to EagerTensor, make it more like normal Tensor
+        for tensor in tf.nest.flatten(outputs):
+            if not hasattr(tensor, '_keras_history'):
+                tensor._keras_history = (self, 0, 0)
+
+        return outputs
+
+    @property
+    def _compute_dtype(self):
+        # fixed output dtype from underline CRF functions
+        return tf.int32
--- a/README.md
+++ b/README.md
+# 肺结节CT影像报告实体提取
--- a/__pycache__/BiLSTMCRF.cpython-37.pyc
+++ b/__pycache__/BiLSTMCRF.cpython-37.pyc
--- a/__pycache__/CRF.cpython-37.pyc
+++ b/__pycache__/CRF.cpython-37.pyc
--- a/__pycache__/api.cpython-37.pyc
+++ b/__pycache__/api.cpython-37.pyc
--- a/__pycache__/model.cpython-37.pyc
+++ b/__pycache__/model.cpython-37.pyc
--- a/__pycache__/predict.cpython-37.pyc
+++ b/__pycache__/predict.cpython-37.pyc
--- a/__pycache__/splittxt.cpython-37.pyc
+++ b/__pycache__/splittxt.cpython-37.pyc
--- a/__pycache__/tools.cpython-37.pyc
+++ b/__pycache__/tools.cpython-37.pyc
--- a/api.py
+++ b/api.py
+import json
+import copy
+import requests
+import uuid
+from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
+
+from model import Oral
+
+oral_api = Blueprint('oral', __name__)
+
+
+@oral_api.route('/')
+def show():
+    return 'This is oral api.'
+
+
+@oral_api.route('/recg/', methods = ['POST'])
+def recognize():
+    if request.method == 'POST':
+        finding = request.form.get('finding')
+        conclusion = request.form.get('conclusion')
+        verbose = request.form.get('verbose', default = 0)
+        try:
+            verbose = int(verbose)
+        except Exception as e:
+            return jsonify({'success': False, 'description': {'error msg': 'verbose can be only 0 or 1'}}), 500
+        if verbose != 0:
+            print()
+            print(finding)
+            print(conclusion)
+        if finding is None or conclusion is None:
+            return jsonify({'success': False, 'description': {'error msg': 'invalid post body fields'}}), 500
+        elif finding == '' or conclusion == '':
+            return jsonify(
+                {'success': False, 'description': {'error msg': 'findings or conclusions cannot be empty'}}), 500
+        else:
+            try:
+                print('' if verbose == 0 else 'verbose out:')
+                oral = Oral(finding, conclusion, verbose = False if verbose == 0 else 1)
+                data = oral.get_json()
+                return jsonify({'success': True, 'description': {'data': data}}), 200
+            except Exception as e:
+                print("/n******ERROR SRART******/n")
+                print(e)
+                print("----------findind----------")
+                print(finding)
+                print("---------conclusion--------")
+                print(conclusion)
+                print("/n*******ERROR END*******/n")
+                return jsonify({'success': False, 'description': {'error msg': e}}), 500
+    else:
+        return jsonify({'success': False, 'description': {'error msg': 'Invalid methods'}}), 404
--- a/config.txt
+++ b/config.txt
+vocabSize:497
+maxLen:177
+classSum:21
--- a/main.py
+++ b/main.py
+from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
+
+from api import oral_api
+
+app = Flask(__name__)
+app.secret_key = '1234567'
+app.register_blueprint(oral_api, url_prefix = '/oral')
+
+if __name__ == '__main__':
+    # from werkzeug.contrib.fixers import ProxyFix
+    # app.wsgi_app = ProxyFix(app.wsgi_app)
+    app.run(debug = True, port = 5004, host = '0.0.0.0')
--- a/model.py
+++ b/model.py
+##
+import os
+import re
+
+os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+import splittxt
+import tools
+import predict
+import tensorflow
+
+
+##
+class Oral:
+    def __init__(self, ImagingFindings, ImagingConclusion, verbose = True):
+        self.verbose = verbose
+        self._Conclusion = ''
+        self._Finding = ''
+        
+        if '送检淋巴结' in ImagingConclusion:
+            self._Conclusion = ImagingConclusion
+            self._Finding = ImagingFindings
+        else:
+            self._Conclusion = ImagingFindings
+            self._Finding = ImagingConclusion
+        
+        self._Conclusion = self._Conclusion.strip('"').strip() \
+            .replace("大于", ">").replace("小于", "<").replace("大于等于", "≥").replace("小于等于", "≤").replace("＞", ">").replace(
+            "＜", "<")
+        self._Finding = self._Finding.strip('"').strip() \
+            .replace("大于", ">").replace("小于", "<").replace("大于等于", "≥").replace("小于等于", "≤").replace("＞", ">").replace(
+            "＜", "<")
+        
+        self.ImmunohistochemistryContent = ''
+        self.MolecularResultsContent = ''
+        self.Degree = ''
+        self.CuttingEdgePathologyOther = ''
+        self.CuttingEdgePathology = ''
+        
+        _, CuttingEdge1, _, MolecularResults1, Immunohistochemistry1 = splittxt.splittxt(
+            self._Conclusion)
+        _, CuttingEdge2, _, MolecularResults2, Immunohistochemistry2 = splittxt.splittxt(
+            self._Finding)
+        
+        self.ConclusionCuttingEdge = CuttingEdge1 + CuttingEdge2
+        self.ConclusionMolecularResults = MolecularResults1 + MolecularResults2
+        if Immunohistochemistry1 != Immunohistochemistry2:
+            self.ConclusionImmunohistochemistry = Immunohistochemistry1 + Immunohistochemistry2
+        else:
+            self.ConclusionImmunohistochemistry = Immunohistochemistry1
+        
+        # self.ConclusionFrist, self.ConclusionCuttingEdge, self.ConclusionMolecularResults, self.ConclusionImmunohistochemistry = splittxt.splittxt(
+        #     self._Conclusion + self._Finding)
+        
+        # self.FindingFrist, self.FindingCuttingEdge, self.FindingMolecularResults, self.FindingImmunohistochemistry = splittxt.splittxt(
+        #     self._Finding + self._Conclusion)
+        
+        if self.ConclusionCuttingEdge != "":
+            self.CuttingEdgePathology = tools.CuttingEdgePathology(self.ConclusionCuttingEdge)  # 术后病理切缘
+            self.CuttingEdgePathologyOther = ""
+            if self.CuttingEdgePathology == "其他情况":
+                CuttingEdgeID = tools.FindChar(self.ConclusionCuttingEdge)[0] + 1
+                self.CuttingEdgePathologyOther = self.ConclusionCuttingEdge[CuttingEdgeID:]  # 其他术后病理切缘情况
+            self.Degree = tools.findDegree(self.ConclusionCuttingEdge)  # 黏膜上皮异常增生程度
+            # print(self.Degree)
+            self.Degree = self.getDegree(self.Degree)
+        if self.CuttingEdgePathologyOther == '':
+            self.CuttingEdgePathologyOther = '无'
+        
+        # 分子结果
+        if self.ConclusionMolecularResults != "":
+            MolecularResultsID = tools.FindChar(self.ConclusionMolecularResults)[0]
+            self.MolecularResultsContent = self.ConclusionMolecularResults[MolecularResultsID:]  # 分子结果
+        if self.MolecularResultsContent == "":
+            self.MolecularResultsContent = "无"
+        
+        # 免疫组化
+        # print(self.ConclusionImmunohistochemistry)
+        if self.ConclusionImmunohistochemistry != "":
+            self.Immunohistochemistryisornot = "有"  # 免疫组化有无
+            # print(self.Immunohistochemistryisornot)
+            # ImmunohistochemistryID = tools.FindChar(self.ConclusionImmunohistochemistry)[0]
+            # print(tools.FindChar(self.ConclusionImmunohistochemistry))
+            # print(self.ConclusionImmunohistochemistry)
+            # print(self.ConclusionImmunohistochemistry[ImmunohistochemistryID:])
+            # self.ImmunohistochemistryContent = self.ConclusionImmunohistochemistry[ImmunohistochemistryID:]  # 免疫组化结果
+            self.ImmunohistochemistryContent = self.ConclusionImmunohistochemistry  # 免疫组化结果
+        
+        else:
+            self.Immunohistochemistryisornot = "无"  # 免疫组化有无
+        
+        # self.print_original_data()
+        
+        self.ConclusionFrist, _, self.ConclusionCuttingLymph, _, _ = splittxt.splittxt(
+            self._Conclusion)
+        
+        self.FindingFrist, _, self.FindingCuttingLymph, _, _ = splittxt.splittxt(
+            self._Finding)
+        
+        self.ConclusionFrist = self.ConclusionFrist.replace('肿物', '肿块').replace('\n', '。')
+        tensorflow.keras.backend.clear_session()
+        if self.verbose:
+            print(self.ConclusionFrist + self.ConclusionCuttingLymph)
+        ans, y_pre = predict.predict(self.ConclusionFrist + self.ConclusionCuttingLymph)
+        self._y_pre = predict.output(self.ConclusionFrist + self.ConclusionCuttingLymph, y_pre)
+        if self.verbose:
+            self.print_list_item(ans)
+        
+        self.FindingFrist = self.FindingFrist.replace('肿物', '肿块').replace('\n', '。')
+        tensorflow.keras.backend.clear_session()
+        if self.verbose:
+            print(self.FindingFrist + self.FindingCuttingLymph)
+        ans_o, y_pre_o = predict.predict(self.FindingFrist + self.FindingCuttingLymph)
+        self._y_pre_o = predict.output(self.FindingFrist + self.FindingCuttingLymph, y_pre_o)
+        if self.verbose:
+            self.print_list_item(ans_o)
+        
+        tensorflow.keras.backend.clear_session()
+        if self.verbose:
+            print(self.ConclusionCuttingLymph)
+        ans_lymph, y_pre_lymph = predict.predict(self.ConclusionCuttingLymph)
+        self._y_pre_lymph = predict.output(self.ConclusionCuttingLymph, y_pre_lymph)
+        # if self.verbose:
+        #     self.print_list_item(ans_lymph)
+    
+    def _get_entity_with_O(self, y_pre, with_o):
+        all = []
+        if with_o:
+            for i in range(len(y_pre)):
+                if i == 0:
+                    # print('O', ImagingConclusionFrist[0:y_pre[i][2]].replace('\n', ' '), str(0), str(y_pre[i][2]))
+                    # print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
+                    all.append({'tag': 'O', 'words': self.FindingFrist[0:y_pre[i][2]].replace('\n', ' '), 'h': 0,
+                                'r': y_pre[i][2]})
+                    all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
+                else:
+                    O_h = y_pre[i - 1][3] + 1
+                    O_r = y_pre[i][2]
+                    # print('O', ImagingConclusionFrist[O_h:O_r].replace('\n', ' '), str(y_pre[i - 1][3] + 1), str(y_pre[i][2]))
+                    # print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
+                    all.append({'tag': 'O', 'words': self.FindingFrist[O_h:O_r].replace('\n', ' '),
+                                'h': y_pre[i - 1][3] + 1, 'r': y_pre[i][2]})
+                    all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
+        
+        else:
+            for i in range(len(y_pre)):
+                if i == 0:
+                    # print('O', ImagingConclusionFrist[0:y_pre[i][2]].replace('\n', ' '), str(0), str(y_pre[i][2]))
+                    # print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
+                    all.append({'tag': 'O', 'words': self.ConclusionFrist[0:y_pre[i][2]].replace('\n', ' '), 'h': 0,
+                                'r': y_pre[i][2]})
+                    all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
+                else:
+                    O_h = y_pre[i - 1][3] + 1
+                    O_r = y_pre[i][2]
+                    # print('O', ImagingConclusionFrist[O_h:O_r].replace('\n', ' '), str(y_pre[i - 1][3] + 1), str(y_pre[i][2]))
+                    # print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
+                    all.append({'tag': 'O', 'words': self.ConclusionFrist[O_h:O_r].replace('\n', ' '),
+                                'h': y_pre[i - 1][3] + 1, 'r': y_pre[i][2]})
+                    all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
+        
+        return all
+    
+    def max_size(self, type):
+        haveSIZE = False
+        for i in self._get_entity_with_O(self._y_pre, False):
+            if i['tag'] == 'SIZE':
+                haveSIZE = True
+        
+        all = []
+        if not haveSIZE:
+            all = self._get_entity_with_O(self._y_pre_o, True)
+        else:
+            all = self._get_entity_with_O(self._y_pre, False)
+        
+        # for i in all:
+        #     print(i)
+        
+        max = 0
+        max_i = 0
+        numbers = []
+        types = []
+        for i in range(0, len(all)):
+            
+            if type in all[i]['words']:
+                types.append(all[i]['words'])
+            if all[i]['tag'] == 'SIZE' and len(types) != 0:
+                numbers.append(tools.exactNumber(all[i]['words']))
+                types = []
+        
+        if len(numbers) == 0:
+            return ""
+        
+        for arr_i in range(0, len(numbers)):
+            for num in numbers[arr_i]:
+                if (re.match("^\d+?\.\d+?$", str(num)) or num.isdigit()) and (
+                        re.match("^\d+?\.\d+?$", str(max)) or num.isdigit()):
+                    if float(num) > float(max):
+                        max = num
+                        max_i = arr_i
+        
+        s = ''
+        for num in numbers[max_i]:
+            s += str(num) + '*'
+        return s.strip("*").strip("cm")
+    
+    def get_DOI(self):
+        haveDOI = False
+        for i in self._get_entity_with_O(self._y_pre_o, True):
+            if i['tag'] == 'DOI':
+                haveDOI = True
+        
+        all = []
+        if not haveDOI:
+            all = self._get_entity_with_O(self._y_pre, False)
+        else:
+            all = self._get_entity_with_O(self._y_pre_o, True)
+        DOI_txt = ''
+        # all = self._get_entity_with_O(self._y_pre, False)
+        # for i in all:
+        #     print(i)
+        for i in all:
+            if i['tag'] == 'DOI':
+                DOI_txt += i['words'] + '\n'
+        
+        return DOI_txt
+    
+    def get_pT(self):
+        pT_txt = ''
+        haveDOI = False
+        for i in self._get_entity_with_O(self._y_pre_o, True):
+            if i['tag'] == 'DOI':
+                haveDOI = True
+        
+        all = []
+        if not haveDOI:
+            all = self._get_entity_with_O(self._y_pre, False)
+        else:
+            all = self._get_entity_with_O(self._y_pre_o, True)
+        for i in all:
+            if i['tag'] == 'DOI':
+                pT_txt += tools.pT(i['words']) + '\n'
+        return pT_txt
+    
+    def get_differentiation(self):
+        differentiation_txt = ''
+        differentiations = []
+        all = self._get_entity_with_O(self._y_pre, False)
+        # print('self._y_pre:')
+        for i in all:
+            # print(i)
+            if i['tag'] == 'LEVEL':
+                _, ans = tools.differentiation(i['words'])
+                differentiations.append(ans)
+                # differentiation_txt += ans + '\n'
+        all = self._get_entity_with_O(self._y_pre_o, True)
+        # print('self._y_pre_o:')
+        for i in all:
+            # print(i)
+            if i['tag'] == 'LEVEL':
+                _, ans = tools.differentiation(i['words'])
+                differentiations.append(ans.strip())
+        if ('中-低分化' in (self.ConclusionFrist + self.ConclusionCuttingLymph)) or (
+                '中-低分化' in (self.FindingFrist + self.FindingCuttingLymph)):
+            differentiations.append("Ⅱ级中分化")
+            differentiations.append("Ⅲ级低分化")
+        differentiations = set(differentiations)
+        for i in differentiations:
+            differentiation_txt += i + '\n'
+        return differentiation_txt
+    
+    def get_invasion(self, type):
+        all = self._get_entity_with_O(self._y_pre, False)
+        for i in all:
+            if i['tag'] == 'INVASION':
+                if type in i['words']:
+                    return '是'
+        return '否'
+    
+    def getENE(self):
+        all = self._get_entity_with_O(self._y_pre, False)
+        for i in all:
+            if i['tag'] == 'ENE':
+                return '有'
+        return '无'
+    
+    def getDegree(self, dgr):
+        if '-' in dgr:
+            dgr_list = dgr.split('-')
+            for i in range(len(dgr_list)):
+                if "度" not in dgr_list[i]:
+                    dgr_list[i] += "度"
+            rt_dgr = ""
+            for i in dgr_list:
+                rt_dgr += i
+                rt_dgr += '\n'
+            rt_dgr.strip('\n')
+            return rt_dgr
+        else:
+            return dgr
+    
+    def getANATOMY(self):
+        all = self._get_entity_with_O(self._y_pre, False)
+        have_anatomy = False
+        for i in all:
+            if i['tag'] == 'ANATOMY':
+                have_anatomy = True
+        if not have_anatomy:
+            all = self._get_entity_with_O(self._y_pre_o, True)
+        count_i = 0
+        count_o = 0
+        anatomy_list_init = []
+        for i in all:
+            if i['tag'] == 'ANATOMY':
+                if ('I' in i['words'] or 'V' in i['words'] or i['words'] == '左' or i['words'] == '右') and (
+                        'DOI' not in i['words'] and 'b' not in i['words'] and 'a' not in i['words'] and 'A' not in i[
+                    'words'] and 'B' not in i['words']):
+                    count_i += 1
+                    anatomy_list_init.append(i['words'])
+                    # if i['words'] == '右' or i['words'] == '左':
+                    #     rt_txt += i['words']
+                    # else:
+                    #     rt_txt += i['words'] + '、'
+                else:
+                    count_o += 1
+        
+        # print(anatomy_list_init)
+        anatomy_list_rt = []
+        l_or_r = ''
+        for i in range(len(anatomy_list_init)):
+            if anatomy_list_init[i] == '左' or anatomy_list_init[i] == '右':
+                l_or_r = anatomy_list_init[i]
+            elif ('左' not in anatomy_list_init[i] and '右' not in anatomy_list_init[i]) and (
+                    'I' in anatomy_list_init[i] or 'V' in anatomy_list_init[i]):
+                if l_or_r != '':
+                    anatomy_list_rt.append(l_or_r + anatomy_list_init[i].strip('区').strip('淋巴结') + '区')
+                else:
+                    count_o += 1
+            elif '左' in anatomy_list_init[i] or '右' in anatomy_list_init[i]:
+                anatomy_list_rt.append(anatomy_list_init[i].strip('区').strip('淋巴结') + '区')
+        
+        anatomy_set_rt = set(anatomy_list_rt)
+        # print(anatomy_set_rt)
+        rt_txt = ''
+        for i in anatomy_set_rt:
+            rt_txt += (i + '、')
+        
+        if count_o != 0:
+            if count_i == 0:
+                rt_txt = (rt_txt.strip('、') + '其他')
+            else:
+                rt_txt = (rt_txt.strip('、') + '、其他')
+        return rt_txt.strip('、')
+    
+    def getANATOMY_o(self):
+        all = self._get_entity_with_O(self._y_pre, False)
+        have_anatomy = False
+        for i in all:
+            if i['tag'] == 'ANATOMY':
+                have_anatomy = True
+        if not have_anatomy:
+            all = self._get_entity_with_O(self._y_pre_o, True)
+        rt_txt = ''
+        anatomy_o_list = []
+        anatomy_list = []
+        for i in all:
+            if i['tag'] == 'ANATOMY':
+                if ('I' in i['words'] or 'V' in i['words'] or i['words'] == '左' or i['words'] == '右') and (
+                        'DOI' not in i['words'] and 'b' not in i['words'] and 'a' not in i['words'] and 'A' not in i[
+                    'words'] and 'B' not in i['words']):
+                    anatomy_list.append(i['words'])
+                    continue
+                else:
+                    anatomy_o_list.append(i['words'])
+        
+        l_or_r = ''
+        for i in range(len(anatomy_list)):
+            if anatomy_list[i] == '左' or anatomy_list[i] == '右':
+                l_or_r = anatomy_list[i]
+            elif ('左' not in anatomy_list[i] and '右' not in anatomy_list[i]) and (
+                    'I' in anatomy_list[i] or 'V' in anatomy_list[i]):
+                if l_or_r == '':
+                    anatomy_o_list.append(anatomy_list[i])
+            elif '左' in anatomy_list[i] or '右' in anatomy_list[i]:
+                continue
+        
+        # print(anatomy_o_list)
+        if len(anatomy_o_list) == 0:
+            return '无'
+        anatomy_o_list = set(anatomy_o_list)
+        for i in anatomy_o_list:
+            if ('I' in i or 'V' in i) and ('区' not in i):
+                rt_txt += i + '区、'
+            else:
+                rt_txt += i + '、'
+        return rt_txt.strip('、')
+    
+    def get_histological_type(self):
+        all = self._get_entity_with_O(self._y_pre, False)
+        # print(all)
+        count_s = 0
+        count_o = 0
+        rt_txt = ''
+        for i in all:
+            if i['tag'] == 'SQUAMOUS':
+                count_s += 1
+            if i['tag'] == 'OTHER':
+                if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
+                    'words']) and ('鳞状细胞' not in i['words']):
+                    count_o += 1
+        
+        all = self._get_entity_with_O(self._y_pre_o, True)
+        # print(all)
+        for i in all:
+            if i['tag'] == 'SQUAMOUS':
+                count_s += 1
+            if i['tag'] == 'OTHER':
+                if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
+                    'words']) and ('鳞状细胞' not in i['words']):
+                    # print(i['words'])
+                    count_o += 1
+        
+        if '鳞状细胞癌' in self._Conclusion or '鳞状细胞癌' in self._Finding:
+            count_s += 1
+        
+        if count_s > 0:
+            rt_txt += '鳞状细胞癌\n'
+        if count_o > 0:
+            rt_txt += '其他'
+        if count_o == 0 and count_s == 0:
+            rt_txt = '无'
+        return rt_txt.strip('、')
+    
+    def get_other_type(self):
+        all = self._get_entity_with_O(self._y_pre, False)
+        # for i in all:
+        #     print(i)
+        rt_txt = ''
+        count = 0
+        entity = []
+        for i in all:
+            if i['tag'] == 'OTHER':
+                count += 1
+                if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
+                    'words']) and ('鳞状细胞' not in i['words']):
+                    entity.append(i['words'])
+        
+        all = self._get_entity_with_O(self._y_pre_o, True)
+        for i in all:
+            if i['tag'] == 'OTHER':
+                count += 1
+                if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
+                    'words']) and ('鳞状细胞' not in i['words']):
+                    entity.append(i['words'])
+        
+        entity = set(entity)
+        for i in entity:
+            rt_txt += i + '\n'
+        return '无' if count == 0 else rt_txt
+    
+    def get_number(self):
+        # all = get_entity_with_O(y_pre)
+        count = 0
+        # for i in self._y_pre:
+        #     print(i)
+        for i in range(1, len(self._y_pre)):
+            if self._y_pre[i][0] == 'NUMBER' and self._y_pre[i - 1][0] == 'ANATOMY':
+                # print(y_pre[i][1])
+                if '各' in self._y_pre[i][1]:
+                    # print(self._y_pre[i][1])
+                    count_a = 0
+                    for j in range(1, len(self._y_pre)):
+                        if self._y_pre[j][0] == 'ANATOMY':
+                            count_a += 1
+                    # print(self._y_pre[i][1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip(
+                    #     '枚').strip('各'))
+                    count = float(self._y_pre[i][1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip(
+                        '枚').strip('各')) * count_a
+                    return count
+                n = self._y_pre[i][1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip('枚').strip('各')
+                if '/' in n:
+                    count += float(n.split('/')[1]) if (len(tools.exactNumber(
+                        str(n.split('/')[1]))) != 0) and n.split('/')[1] != '' and ((re.match(
+                        "^\d+?\.\d+?$", str(n.split('/')[1]))) or str(n.split('/')[1]).isdigit()) \
+                        else float(0)
+                else:
+                    count += float(n) if (len(tools.exactNumber(str(n)))) and (
+                            re.match("^\d+?\.\d+?$", str(n)) or str(n).isdigit()) != 0 else float(0)
+        return count
+    
+    def get_p_number(self):
+        count = 0
+        num_list = []
+        for item in self._y_pre_lymph:
+            if item[0] == "NUMBER":
+                num_list.append(item)
+            if item[0] == "PN":
+                if item[1] == '阳性（+）' or item[1] == '阳性(+)' or item[1] == '阳性' or item[1] == '（+）' or item[
+                    1] == '(+)' or item[1] == '+':
+                    # print(num_list)
+                    if len(num_list) == 0:
+                        pass
+                    else:
+                        # self.print_y_pred()
+                        for p_item in num_list:
+                            n_str = p_item[1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip(
+                                '枚').strip('各')
+                            # print(n_str)
+                            if '/' in n_str:
+                                # print(n_str.split('/')[0])
+                                # if len(tools.exactNumber(str(n_str.split('/')[0]))) != 0:
+                                #     print("*")
+                                # if n_str.split('/')[0] != '':
+                                #     print("**")
+                                # if (re.match("^\d+?\.\d+?$", str(n_str.split('/')[0]))) or str(n_str.split('/')[0]).isdigit():
+                                #     print("***")
+                                
+                                count += float(n_str.split('/')[0]) if (len(tools.exactNumber(
+                                    str(n_str.split('/')[0]))) != 0) and n_str.split('/')[0] != '' and ((re.match(
+                                    "^\d+?\.\d+?$", str(n_str.split('/')[0]))) or str(n_str.split('/')[0]).isdigit()) \
+                                    else float(0)
+                            else:
+                                count += float(n_str) if (len(tools.exactNumber(str(n_str))) != 0) and (
+                                        (re.match("^\d+?\.\d+?$", str(n_str))) or str(n_str).isdigit()) else float(0)
+                elif item[1] == '阴性（-）' or item[1] == '阴性(-)' or item[1] == '阴性' or item[1] == '（-）' or item[
+                    1] == '(-)' or item[1] == '-':
+                    num_list = []
+        return count
+    
+    def get_p_max(self):
+        p_list = []
+        p_list_tmp = []
+        size_list = []
+        size_list_tmp = []
+        # print('----------')
+        for item in self._y_pre_lymph:
+            if item[0] == "ANATOMY":
+                p_list_tmp.append(item)
+            if item[0] == "SIZE":
+                size_list_tmp.append(item)
+            if item[0] == "NUMBER" and '/' in item[1]:
+                if len(p_list_tmp) == 0:
+                    pass
+                else:
+                    for p_item in p_list_tmp:
+                        p_list.append(p_item)
+                    for size_item in size_list_tmp:
+                        size_list.append(size_item[1])
+                p_list_tmp = []
+                size_list_tmp = []
+            if item[0] == "PN":
+                if item[1] == '阳性（+）' or item[1] == '阳性(+)' or item[1] == '阳性' or item[1] == '（+）' or item[
+                    1] == '(+)' or item[1] == '+':
+                    if len(p_list_tmp) == 0:
+                        pass
+                    else:
+                        for p_item in p_list_tmp:
+                            p_list.append(p_item)
+                        for size_item in size_list_tmp:
+                            size_list.append(size_item[1])
+                    p_list_tmp = []
+                    size_list_tmp = []
+                else:
+                    p_list_tmp = []
+                    size_list_tmp = []
+        
+        # print(p_list)
+        # print(size_list)
+        
+        if len(p_list) != 0:
+            contains_single_left = False
+            contains_single_right = False
+            for i in p_list:
+                if i[1] == '左':
+                    contains_single_left = True
+                if i[1] == '右':
+                    contains_single_right = True
+            if contains_single_left or contains_single_right == True:
+                p_list = self._handle_single(p_list)
+            # for i in p_list:
+            #     print(i)
+            # print('-------------')
+            # self.print_y_pred_o()
+            p_anatomy = ''
+            for i in range(len(self._y_pre_o)):
+                if self._y_pre_o[i][0] == 'ANATOMY':
+                    p_anatomy = self._y_pre_o[i][1].strip().strip('区')
+                if self._y_pre_o[i][0] == 'SIZE':
+                    # print(y_pre_o[i][1])
+                    if p_anatomy != '':
+                        for j in range(0, len(p_list)):
+                            # print(p_list[j][1].strip().strip('区'))
+                            if p_list[j][1].strip().strip('区')[0] == '左' or p_list[j][1].strip().strip('区')[0] == '右':
+                                if p_anatomy[0] != p_list[j][1].strip().strip('区')[0]:
+                                    p_anatomy = p_list[j][1].strip().strip('区')[0] + p_anatomy
+                                    # print(p_anatomy)
+                            # print(p_list[j][1].strip().strip('区'))
+                            # print(p_anatomy)
+                            if p_list[j][1].strip().strip('区') == p_anatomy:
+                                size_list.append(self._y_pre_o[i][1])
+                        p_anatomy = ''
+            all_size = []
+            # print(size_list)
+            for i in size_list:
+                for j in tools.exactNumber(i):
+                    all_size.append(j)
+            for i in range(len(all_size)):
+                if re.match("^\d+?\.\d+?$", all_size[i]) or all_size[i].isdigit():
+                    all_size[i] = float(all_size[i])
+            # print(all_size)
+            if len(all_size) != 0:
+                return max(all_size)
+        return 0
+        
+        # print(len(size_list))
+    
+    def _handle_single(self, p_list):
+        rt_list = []
+        lr = ''
+        for i in p_list:
+            if i[1] == '左' or i[1] == '右':
+                lr = i[1]
+            else:
+                rt_list += [[i[0], lr + i[1], i[2], i[3]]]
+        return rt_list
+    
+    def get_pN(self):
+        # if type(self.get_p_max())== int or type(self.get_p_max())== float:
+        # print(self.get_p_max())
+        return tools.pN(self.get_p_number(), float(self.get_p_max()), self.getENE())
+    
+    def findSJ(self):
+        if '送检淋巴结' in self._Conclusion:
+            return '是'
+        else:
+            return '否'
+    
+    def get_Info(self):
+        print("术后病理切缘:")
+        print(str(self.CuttingEdgePathology).strip())
+        print("其他术后病理切缘情况:")
+        print(str(self.CuttingEdgePathologyOther).strip())
+        print("黏膜上皮异常增生程度:")
+        print(str(self.Degree).strip())
+        print("分子结果:")
+        print(str(self.MolecularResultsContent).strip())
+        print("免疫组化:")
+        print(str(self.Immunohistochemistryisornot).strip())
+        print("免疫组化结果:")
+        print(str(self.ImmunohistochemistryContent).strip())
+        print("送检组织大小cm:")
+        print(str(self.max_size("组织")).strip())
+        print("肿块大小:")
+        print(str(self.max_size("肿块")).strip())
+        print("浸润深度（DOI）mm:")
+        print(str(self.get_DOI()).strip())
+        print("pT:")
+        print(str(self.get_pT()).strip())
+        print("分化程度")
+        print(str(self.get_differentiation()).strip())
+        print("神经侵犯:")
+        print(str(self.get_invasion('神经')).strip())
+        print("血管侵犯:")
+        print(str(self.get_invasion('血管')).strip())
+        print("淋巴结包膜外ENE(+):")
+        print(str(self.getENE()).strip())
+        print("送检淋巴结部位:")
+        print(str(self.getANATOMY().strip('、')).strip())
+        print("其他送检淋巴结部位:")
+        print(str(self.getANATOMY_o().strip('、')).strip())
+        print("组织学类型:")
+        print(str(self.get_histological_type()).strip())
+        print("其他组织学类型:")
+        print(str(self.get_other_type()).strip())
+        if self.findSJ() == '是':
+            print('送检淋巴结数目:')
+            print(str(self.get_number()).strip())
+            print("阳性淋巴结数量:")
+            print(str(self.get_p_number()).strip())
+            print("阳性淋巴结最大直径cm:")
+            print(str(self.get_p_max()).strip())
+            print("pN:")
+            print(str(self.get_pN()).strip())
+    
+    def print_original_data(self):
+        print(self._Finding)
+        print(self._Conclusion)
+    
+    def print_y_pred(self):
+        for i in self._y_pre:
+            print(i)
+    
+    def print_y_pred_o(self):
+        for i in self._y_pre_o:
+            print(i)
+    
+    def print_list_item(self, l):
+        for i in l:
+            print(i)
+    
+    def get_json(self):
+        if self.verbose:
+            self.print_y_pred()
+            print("-----------------")
+            self.print_y_pred_o()
+            print("-----------------")
+            self.get_Info()
+        return {
+            "送检组织大小cm": str(self.max_size("组织")).strip(),
+            "肿块大小": str(self.max_size("肿块")).strip(),
+            "组织学类型": str(self.get_histological_type()).strip(),
+            "其他组织学类型": str(self.get_other_type()).strip(),
+            "分化程度": str(self.get_differentiation()).strip(),
+            "浸润深度（DOI）mm": str(self.get_DOI()).strip(),
+            "pT": str(self.get_pT()).strip(),
+            "神经侵犯": str(self.get_invasion('神经')).strip(),
+            "血管侵犯": str(self.get_invasion('血管')).strip(),
+            "术后病理切缘": str(self.CuttingEdgePathology).strip(),
+            "其他术后病理切缘情况": str(self.CuttingEdgePathologyOther).strip(),
+            "黏膜上皮异常增生程度": str(self.Degree).strip(),
+            "免疫组化": str(self.Immunohistochemistryisornot).strip(),
+            "免疫组化结果": str(self.ImmunohistochemistryContent).strip(),
+            "分子结果": str(self.MolecularResultsContent).strip(),
+            "是否送检淋巴结": str(self.findSJ()).strip(),
+            "送检淋巴结部位": str(self.getANATOMY()).strip() if str(self.findSJ()).strip() == '是' else '',
+            "其他送检淋巴结部位": str(self.getANATOMY_o().strip('、')).strip() if str(self.findSJ()).strip() == '是' else '',
+            "送检淋巴结数目": str(self.get_number()).strip() if str(self.findSJ()).strip() == '是' else '',
+            "阳性淋巴结数目": str(self.get_p_number()).strip() if str(self.findSJ()).strip() == '是' else '',
+            "阳性淋巴结最大直径cm": str(self.get_p_max()).strip() if str(self.findSJ()).strip() == '是' else '',
+            "淋巴结包膜外ENE(+)": str(self.getENE()).strip() if str(self.findSJ()).strip() == '是' else '',
+            "pN": str(self.get_pN()).strip() if str(self.findSJ()).strip() == '是' else '',
+        }
+
+
+##
+if __name__ == '__main__':
+    Finding = """
+" 左颈大块：6*5*4cm，一侧见一腺体3*3*2cm，灰黄分叶，余为脂肪血管。
+左I区: 3只直径0.2-1.2cm。
+左II区: 3只直径0.5-1.2cm。
+左III区: 3只直径0.5-1cm。
+左IV区: 3只直径0.5-0.8cm。
+右I区：3只直径1-2cm。
+右II区: 3只直径1cm。
+右III区: 3只直径0.5-0.8cm。
+右IV区: 3只直径0.5-0.6cm。
+右颈淋巴：7*7*6cm，內见一腺体3*3*2cm，灰黄分叶，余为脂肪血管。"
+"""
+    Conclusion = """
+"“左颌下腺”慢性涎腺炎
+“右颌下腺”慢性涎腺炎
+送检淋巴结：“左”“I区”1/3只、“II区”1/3只、“III区”1/3只（肿瘤位于软组织内）及“右”“I区”2/3只（其中1只肿瘤侵犯至包膜外）、“II区”2/3只（肿瘤侵犯至包膜外）有肿瘤转移（+），余及“左IV区”3只、“右”“III区”3只、“IV区”3只均阴性（-）"
+"""
+    
+    oral = Oral(Finding, Conclusion)
+    oral.print_y_pred()
+    print("-----------------")
+    oral.print_y_pred_o()
+    print("-----------------")
+    oral.get_Info()
+    print(oral.get_json())
--- a/model/model.h5
+++ b/model/model.h5
--- a/model/vocab.txt
+++ b/model/vocab.txt
+、
+白
+M
+点
+提
+钛
+h
+,
+>
+野
+Z
+图
+春
+L
+轻
+芽
+口
+内
+损
+请
+测
+处
+颞
+；
+颗
+硬
+周
+组
+皮
+发
+右
+骨
+多
+H
+先
+”
+侧
+痣
+构
+腮
+级
+部
+样
+层
+胡
+密
+解
+富
+颏
+巢
+缘
+固
+＞
+院
+6
+黑
+单
+物
+缺
+角
+透
+反
+大
+行
+张
+p
+面
+包
+宇
+左
+华
+粒
+i
+折
+伴
+到
+小
+G
+治
+囊
+3
+支
+欠
+以
+顶
+敏
+V
+灰
+F
+（
+探
+韧
+壁
+实
+像
+带
+石
+泡
+果
+血
+额
+甲
+头
+后
+清
+锁
+能
+报
+均
+碎
+.
+坏
+且
+个
+腹
+梁
+刘
+眶
+(
+切
+空
+整
+号
+审
+背
+W
+咬
+史
+肉
+含
+髓
+<
+R
+限
+质
+殊
+信
+子
+别
+谷
+粗
+影
+在
+纤
+润
+叶
+量
+占
+对
+累
+症
+浅
+失
+明
+肪
+侵
+静
+O
+或
+堆
+残
+“
+符
+覆
+颊
+倍
+送
+被
+≤
+薄
+神
+其
+度
+据
+疫
+术
+软
+T
+唇
+病
+孙
+桃
+冻
+：
+垫
+UNK
+隐
+首
+溃
+瘤
+窦
+萎
+鳞
+材
+腔
+增
+细
+开
+的
+显
+复
+照
+瑜
+低
+颈
+附
+"
+肤
+即
+成
+必
+尚
+围
+阳
+团
+b
+示
+局
+剖
+菜
+放
+腺
+孔
+综
+t
+双
+巨
+核
+待
+鞘
+查
+主
+;
+A
+底
+喉
+片
+牙
+者
+突
+分
+临
+察
+下
+已
+为
+扩
+腭
+灶
+蜡
+红
+髁
+共
+节
+重
+完
+）
+随
+高
+厌
+:
+1
+合
+约
+浸
+玻
+P
+S
+菲
+似
+期
+比
+槽
+应
+颅
+现
+簇
+]
+线
+留
+生
+端
+舌
+色
+基
+他
+位
+早
+经
+K
+[
+花
+状
+扁
+化
+及
+理
+糙
+会
+关
+另
+区
+虑
+。
+脑
+见
+疡
+黏
+平
+界
+旁
+态
+衬
+变
+范
+王
+形
+体
+相
+疗
+诊
+/
+前
+医
+径
+极
+I
+液
+证
+等
+，
+U
+检
+—
+淋
+具
+彻
+景
+直
+良
+排
+深
+外
+恶
+筛
+稍
+缩
+间
+脱
+未
+北
+巴
+破
+活
+肯
+黄
+？
+混
+龈
+断
+翼
+死
+散
+型
+犯
+m
+继
+转
+异
+a
+特
+B
+C
+%
+织
+8
+常
+D
+南
+枚
+粘
+离
+-
+升
+少
+中
+磨
+l
+璃
+肌
+考
+4
+免
+议
+梭
+E
+可
+只
+N
+视
+钙
+c
+典
+Y
+取
+该
+向
+倾
+域
+癌
+告
+)
+因
+n
+难
+导
+≥
+隔
+进
+冰
+有
+胞
+性
+慢
+晶
+乳
+×
+呈
+炎
+*
+观
+0
+r
+胸
+字
+上
+源
+床
+注
+2
+染
+原
+丰
+＜
+窝
+安
+长
+绿
+涎
+建
+感
+块
+5
+?
+9
+里
+各
+阴
+边
+肿
+来
+7
+除
+丽
+热
+膜
+w
+余
+－
+根
+移
+裂
+制
+管
+表
+
+学
+脉
+板
+针
+刮
+咽
+步
+结
+颌
+不
+鼻
+与
+蝶
+困
+定
+全
+近
+袭
+脂
+维
\ No newline at end of file
--- a/predict.py
+++ b/predict.py
+import csv
+import json
+import os
+
+from tensorflow.keras.preprocessing.sequence import pad_sequences
+
+from BiLSTMCRF import BiLSTMCRF
+
+model_path = 'model/model.h5'
+vocab_path = 'model/vocab.txt'
+class_dict = {
+    "O": 0,
+    "B-NUMBER": 1,
+    "I-NUMBER": 2,
+    "B-SIZE": 3,
+    "I-SIZE": 4,
+    "B-ENE": 5,
+    "I-ENE": 6,
+    "B-ANATOMY": 7,
+    "I-ANATOMY": 8,
+    "B-SQUAMOUS": 9,
+    "I-SQUAMOUS": 10,
+    "B-INVASION": 11,
+    "I-INVASION": 12,
+    "B-PN": 13,
+    "I-PN": 14,
+    "B-LEVEL": 15,
+    "I-LEVEL": 16,
+    "B-OTHER": 17,
+    "I-OTHER": 18,
+    "B-DOI": 19,
+    "I-DOI": 20
+}
+maxLen = 500
+classSum = 21
+
+
+def build_input(text):
+    x = []
+    for char in text:
+        if char not in word_dict:
+            char = 'UNK'
+        x.append(word_dict.get(char))
+    x = pad_sequences([x], padding = 'post', maxlen = maxLen)
+    return x
+
+
+def load_worddict():
+    vocabs = [line.strip()
+              for line in open(vocab_path, encoding = 'utf-8')]
+    word_dict = {wd: index for index, wd in enumerate(vocabs)}
+    return word_dict
+
+
+def predict(text):
+    y_pre = []
+    str = build_input(text)
+    raw = model.predict(str)[0]
+    chars = [i for i in text]
+    tags = [label_dict[i] for i in raw][:len(text)]
+    res = list(zip(chars, tags))
+    for i, tag in enumerate(tags):
+        y_pre.append(tag)
+    return res, y_pre
+
+
+def output(txt, cnt):
+    output = []
+    flag = 0
+    start = []
+    end = []
+    tags = []
+    for i, tag in enumerate(cnt):
+        if tag == 'O':
+            if flag == 1:
+                end = i-1
+                output.append([tags, txt[start:end+1], start, end])
+            flag = 0
+            continue
+        if tag.split("-")[0] == 'B':
+            if flag == 1:
+                end = i
+                output.append([tags, txt[start:end], start, end-1])
+            flag = 1
+            start = i
+            tags = tag.split("-")[1]
+            continue
+    return output
+
+
+word_dict = load_worddict()
+vocabSize = len(word_dict) + 1
+label_dict = {j: i for i, j in class_dict.items()}
+
+model = BiLSTMCRF(vocabSize = vocabSize, maxLen = maxLen,
+                  tagIndexDict = class_dict, tagSum = classSum)
+model.load_weights(model_path)
+
+if __name__ == '__main__':
+    s = """
+“右舌”鳞状细胞癌（复发），高-中分化，灶性多核巨细胞浸润，肿瘤侵犯神经。送检淋巴结：“左颌下”1只、“颏下”1只均阴性（-）
+"""
+    a = predict(s)
+    for i in a[0]:
+        print(i)
+    b = output(s, a[1])
+    print(b)
--- a/requirements.txt
+++ b/requirements.txt
+absl-py==0.14.0
+appnope==0.1.2
+argcomplete==1.12.3
+argon2-cffi==21.1.0
+astunparse==1.6.3
+attrs==21.2.0
+backcall==0.2.0
+bleach==4.1.0
+cachetools==4.2.2
+certifi==2021.10.8
+cffi==1.14.6
+charset-normalizer==2.0.6
+click==8.0.3
+cn2an==0.5.11
+debugpy==1.4.3
+decorator==5.1.0
+defusedxml==0.7.1
+entrypoints==0.3
+Flask==2.0.2
+Flask-Login==0.5.0
+flatbuffers==1.12
+gast==0.3.3
+google-auth==1.35.0
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.32.0
+h5py==2.10.0
+idna==3.2
+importlib-metadata==4.8.1
+ipykernel==6.4.1
+ipython==7.28.0
+ipython-genutils==0.2.0
+ipywidgets==7.6.5
+itsdangerous==2.0.1
+jedi==0.18.0
+Jinja2==1.2
+jsonschema==3.2.0
+jupyter==1.0.0
+jupyter-client==7.0.3
+jupyter-console==6.4.0
+jupyter-core==4.8.1
+jupyterlab-pygments==0.1.2
+jupyterlab-widgets==1.0.2
+Keras-Preprocessing==1.1.2
+Markdown==3.3.4
+MarkupSafe==2.0.1
+matplotlib-inline==0.1.3
+mistune==0.8.4
+nbclient==0.5.4
+nbconvert==6.2.0
+nbformat==5.1.3
+nest-asyncio==1.5.1
+notebook==6.4.4
+numpy==1.19.5
+oauthlib==3.1.1
+opt-einsum==3.3.0
+packaging==21.0
+pandas==1.3.3
+pandocfilters==1.5.0
+parso==0.8.2
+pexpect==4.8.0
+pickleshare==0.7.5
+prometheus-client==0.11.0
+prompt-toolkit==3.0.20
+protobuf==3.18.0
+ptyprocess==0.7.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+Pygments==2.10.0
+pyparsing==2.4.7
+pyrsistent==0.18.0
+python-dateutil==2.8.2
+pytz==2021.1
+PyYAML==5.4.1
+pyzmq==22.3.0
+qtconsole==5.1.1
+QtPy==1.11.2
+requests==2.26.0
+requests-oauthlib==1.3.0
+rsa==4.7.2
+Send2Trash==1.8.0
+six==1.15.0
+tensorboard==2.6.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.0
+tensorflow==2.4.0
+tensorflow-addons==0.14.0
+tensorflow-estimator==2.4.0
+termcolor==1.1.0
+terminado==0.12.1
+testpath==0.5.0
+tornado==6.1
+traitlets==5.1.0
+typeguard==2.12.1
+typing-extensions==3.7.4.3
+urllib3==1.26.7
+wcwidth==0.2.5
+webencodings==0.5.1
+Werkzeug==2.0.2
+widgetsnbextension==3.5.1
+wrapt==1.12.1
+zipp==3.5.0
\ No newline at end of file
--- a/splittxt.py
+++ b/splittxt.py
+# coding:utf-8
+def splittxt(ImagingConclusion):
+    ImagingConclusion = ImagingConclusion + "\n"
+    ImagingConclusion = ImagingConclusion.replace("\nAE1/AE3", " AE1/AE3")
+    ImagingConclusion = ImagingConclusion.replace("\nEGFR", " EGFR")
+    ImagingConclusion = ImagingConclusion.replace("\nCK", " CK")
+    ImagingConclusion = ImagingConclusion.replace("：\n", "： ")
+    txt = []
+    text = []
+    ImagingConclusionFrist = ""
+    CuttingEdge = ""
+    CuttingLymph = ""
+    MolecularResults = ""
+    Immunohistochemistry = ""
+    CuttingEdgeID = -1
+    CuttingLymphID = -1
+    MolecularResultsID = -1
+    ImmunohistochemistryID = -1
+    delete = []
+    for i, char in enumerate(ImagingConclusion):
+        if char != '\n':
+            txt.append(char)
+        else:
+            if txt != []:
+                str = ''.join(txt)
+                text.append(str)
+                txt = []
+    for i, block in enumerate(text):
+        if block.find("送检切缘") != -1:
+            CuttingEdgeID = i
+            CuttingEdge = CuttingEdge + block + "\n"
+            delete.append(CuttingEdgeID)
+        elif block.find("送检淋巴结") != -1:
+            CuttingLymphID = i
+            CuttingLymph = CuttingLymph + block + "\n"
+            delete.append(CuttingLymphID)
+        elif block.find("分子结果") != -1:
+            MolecularResultsID = i
+            MolecularResults = MolecularResults + block + "\n"
+            delete.append(MolecularResultsID)
+        elif block.find("免疫组化结果") != -1:
+            ImmunohistochemistryID = i
+            Immunohistochemistry = Immunohistochemistry + block + "\n"
+            delete.append(ImmunohistochemistryID)
+    j = 0
+    for i in range(len(text)):
+        if i in delete:
+            text.pop(j)
+        else:
+            ImagingConclusionFrist = ImagingConclusionFrist + text[j] + "\n"
+            j = j + 1
+    
+    MolecularResults.replace(":", "")
+    Immunohistochemistry.replace(":", "")
+    MolecularResults = MolecularResults[MolecularResults.find("分子结果") + 4:]
+    Immunohistochemistry = Immunohistochemistry[Immunohistochemistry.find("免疫组化结果") + 6:]
+    return ImagingConclusionFrist, CuttingEdge, CuttingLymph, MolecularResults, Immunohistochemistry
+
+
+# print(splittxt(
+#     "原发灶：一带黏膜组织6*5*3cm，切面见一肿块3*2*2cm，灰白，界不清（1）\n送检切缘：前、后、内、外、底均0.5cm\n左颈大块：6*4*2cm，为脂肪血管及少量腺体，灰黄。\n左I区: 7只直径0.8-1.2cm。\n左II区: 1只直径1cm。\n左III区: 1只直径1.5cm。\n\n“左舌”黏膜鳞状细胞癌，高-中分化，DOI＞10mm\n送检切缘：“前、后、内、外、底”均阴性（-）\n“左颌下腺”轻度慢性炎\n送检淋巴结：“左”“I”1/7只有肿瘤转移（+），余及“II”1只（为软组织），“III”1只（为软组织）均阴性（-）\n免疫组化结果NI21-668\nAE1/AE3+ CKH+ CK5/6+ EGFR部分+ Ki67部分+ CD31- S-100- P16-\n北院分子结果(NM2021-0302)：EGFR扩增探针 FISH（未见明显扩增（-））\n"))
+
+if __name__ == '__main__':
+    a = splittxt("""
+“右上颌”黏膜鳞状细胞癌，高-中分化，DOI＞10mm
+“右颌下腺”慢性炎
+送检淋巴结：“右I区”1/5只(其中1只为软组织)有肿瘤转移（+），余及“右II区”6只、“右III区”6只、“右IV区”1只（为软组织）、“右V区”10只均阴性（-）
+南院分子结果(M2021-1469)：EGFR扩增探针 FISH（-）
+南院免疫组化结果(I2021-3111)：CKH（+），CK5/6（+），P16（-），Ki67（热点区约30－40%+），CD31（-），S100（-），EGFR（+），P53（-）。
+""")
+    for i in a:
+        print(i.strip())
+        print('---------------------------')
--- a/tools.py
+++ b/tools.py
+import decimal
+
+
+def pN(num, d, ENE):
+    cnt = ""
+    if num == 0:
+        cnt = "pN0"
+    elif num == 1 and d <= 3 and ENE == '无':
+        cnt = "pN1"
+    else:
+        cnt = "pN2+"
+    return cnt
+
+
+def differentiation(txt):
+    cnt = [0, 0, 0]
+    ans = ""
+    if txt.find("高") != -1:
+        cnt[0] = 1
+        ans = ans + "Ⅰ级高分化\n"
+    if txt.find("中") != -1:
+        cnt[1] = 1
+        ans = ans + "Ⅱ级中分化\n"
+    if txt.find("低") != -1:
+        cnt[2] = 1
+        ans = ans + "Ⅲ级低分化\n"
+    if cnt == [0, 0, 0]:
+        ans = ans + "Ⅳ级未分化\n"
+    return cnt, ans
+
+
+def exactNumber(txt):
+    cnt = []
+    number = ""
+    for i, char in enumerate(txt):
+        if char in "0123456789./":
+            number = number + char
+        else:
+            cnt.append(number)
+            number = ""
+    cnt.append(number)
+    cnt = [i for i in cnt if i != '']
+    return cnt
+
+
+def pT(txt):
+    txt.replace(" ", "")
+    cnt = ''
+    if txt.find(">10mm") != -1:
+        cnt = "pt3"
+        return cnt
+    elif txt.find(">5mm") != -1:
+        cnt = "pt2"
+        return cnt
+    score = max([decimal.Decimal(i) for i in exactNumber(txt)])
+    if score <= 5:
+        cnt = "pT1"
+    elif score > 5 and score <= 10:
+        cnt = "pT2"
+    elif score > 10:
+        cnt = "pT3"
+    return cnt
+
+
+def findDegree(txt):
+    cnt = [0, 0, 0]
+    ans = ""
+    if txt.find("轻") != -1:
+        cnt[0] = 1
+        ans = ans + "轻度\n"
+    if txt.find("中") != -1:
+        cnt[1] = 1
+        ans = ans + "中度\n"
+    if txt.find("重") != -1:
+        cnt[2] = 1
+        ans = ans + "重度\n"
+    if cnt == [0, 0, 0]:
+        ans = ans + ""
+    return ans
+
+
+def findlymph(txt):
+    if txt.find("淋巴结") != -1:
+        return 1
+    else:
+        return 0
+
+
+def CuttingEdgePathology(txt):
+    cnt = ""
+    if txt.find("阳性") != -1 or txt.find("+") != -1:
+        cnt = "阳性（+）"
+    elif txt.find("异常增生") != -1:
+        cnt = "有黏膜上皮异常增生"
+    elif txt.find("阴性") != -1 or txt.find("-") != -1:
+        cnt = "阴性（-）"
+    else:
+        cnt = "其他情况"
+    return cnt
+
+
+def FindChar(txt):
+    cnt = []
+    ans = []
+    charlist = ["分子结果", "免疫组化结果", "(", "（", ":", "："]
+    for i, char in enumerate(charlist):
+        cnt.append(txt.find(char))
+    for i, flag in enumerate(cnt):
+        if flag != -1:
+            ans.append(flag)
+    ans.append(-1)
+    return ans
+
+if __name__ == '__main__':
+    print(exactNumber('mm'))
\ No newline at end of file