Commit 5dd18bcf authored by 陶书衡's avatar 陶书衡

init

parents
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.7 (tf-latest-base)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="DuplicatedCode" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<Languages>
<language minSize="147" name="Python" />
</Languages>
</inspection_tool>
<inspection_tool class="JupyterPackageInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="127">
<item index="0" class="java.lang.String" itemvalue="h5py" />
<item index="1" class="java.lang.String" itemvalue="six" />
<item index="2" class="java.lang.String" itemvalue="keras-bert" />
<item index="3" class="java.lang.String" itemvalue="keras-transformer" />
<item index="4" class="java.lang.String" itemvalue="absl-py" />
<item index="5" class="java.lang.String" itemvalue="google-pasta" />
<item index="6" class="java.lang.String" itemvalue="protobuf" />
<item index="7" class="java.lang.String" itemvalue="decorator" />
<item index="8" class="java.lang.String" itemvalue="tensorflow-estimator" />
<item index="9" class="java.lang.String" itemvalue="joblib" />
<item index="10" class="java.lang.String" itemvalue="threadpoolctl" />
<item index="11" class="java.lang.String" itemvalue="opt-einsum" />
<item index="12" class="java.lang.String" itemvalue="scikit-learn" />
<item index="13" class="java.lang.String" itemvalue="PyYAML" />
<item index="14" class="java.lang.String" itemvalue="cycler" />
<item index="15" class="java.lang.String" itemvalue="gast" />
<item index="16" class="java.lang.String" itemvalue="numpy" />
<item index="17" class="java.lang.String" itemvalue="importlib-metadata" />
<item index="18" class="java.lang.String" itemvalue="Keras-Preprocessing" />
<item index="19" class="java.lang.String" itemvalue="tensorflow" />
<item index="20" class="java.lang.String" itemvalue="Pygments" />
<item index="21" class="java.lang.String" itemvalue="pyzmq" />
<item index="22" class="java.lang.String" itemvalue="certifi" />
<item index="23" class="java.lang.String" itemvalue="prompt-toolkit" />
<item index="24" class="java.lang.String" itemvalue="cached-property" />
<item index="25" class="java.lang.String" itemvalue="Markdown" />
<item index="26" class="java.lang.String" itemvalue="scipy" />
<item index="27" class="java.lang.String" itemvalue="Werkzeug" />
<item index="28" class="java.lang.String" itemvalue="opencv-python" />
<item index="29" class="java.lang.String" itemvalue="parso" />
<item index="30" class="java.lang.String" itemvalue="wrapt" />
<item index="31" class="java.lang.String" itemvalue="astor" />
<item index="32" class="java.lang.String" itemvalue="ipython" />
<item index="33" class="java.lang.String" itemvalue="kiwisolver" />
<item index="34" class="java.lang.String" itemvalue="typing-extensions" />
<item index="35" class="java.lang.String" itemvalue="jupyter-client" />
<item index="36" class="java.lang.String" itemvalue="ipykernel" />
<item index="37" class="java.lang.String" itemvalue="Keras-Applications" />
<item index="38" class="java.lang.String" itemvalue="appnope" />
<item index="39" class="java.lang.String" itemvalue="pandas" />
<item index="40" class="java.lang.String" itemvalue="termcolor" />
<item index="41" class="java.lang.String" itemvalue="tensorboard" />
<item index="42" class="java.lang.String" itemvalue="matplotlib" />
<item index="43" class="java.lang.String" itemvalue="grpcio" />
<item index="44" class="java.lang.String" itemvalue="Keras" />
<item index="45" class="java.lang.String" itemvalue="pytz" />
<item index="46" class="java.lang.String" itemvalue="Pillow" />
<item index="47" class="java.lang.String" itemvalue="seqeval" />
<item index="48" class="java.lang.String" itemvalue="keras-embed-sim" />
<item index="49" class="java.lang.String" itemvalue="sklearn" />
<item index="50" class="java.lang.String" itemvalue="keras-position-wise-feed-forward" />
<item index="51" class="java.lang.String" itemvalue="keras-pos-embd" />
<item index="52" class="java.lang.String" itemvalue="keras-self-attention" />
<item index="53" class="java.lang.String" itemvalue="keras-layer-normalization" />
<item index="54" class="java.lang.String" itemvalue="keras-multi-head" />
<item index="55" class="java.lang.String" itemvalue="jedi" />
<item index="56" class="java.lang.String" itemvalue="pyDeprecate" />
<item index="57" class="java.lang.String" itemvalue="pytorch-lightning" />
<item index="58" class="java.lang.String" itemvalue="aiohttp" />
<item index="59" class="java.lang.String" itemvalue="packaging" />
<item index="60" class="java.lang.String" itemvalue="torch" />
<item index="61" class="java.lang.String" itemvalue="pyparsing" />
<item index="62" class="java.lang.String" itemvalue="torchvision" />
<item index="63" class="java.lang.String" itemvalue="traitlets" />
<item index="64" class="java.lang.String" itemvalue="testpath" />
<item index="65" class="java.lang.String" itemvalue="pickleshare" />
<item index="66" class="java.lang.String" itemvalue="python-dateutil" />
<item index="67" class="java.lang.String" itemvalue="defusedxml" />
<item index="68" class="java.lang.String" itemvalue="nbclient" />
<item index="69" class="java.lang.String" itemvalue="QtPy" />
<item index="70" class="java.lang.String" itemvalue="MarkupSafe" />
<item index="71" class="java.lang.String" itemvalue="pycparser" />
<item index="72" class="java.lang.String" itemvalue="pyasn1-modules" />
<item index="73" class="java.lang.String" itemvalue="ipython-genutils" />
<item index="74" class="java.lang.String" itemvalue="jupyterlab-widgets" />
<item index="75" class="java.lang.String" itemvalue="bleach" />
<item index="76" class="java.lang.String" itemvalue="oauthlib" />
<item index="77" class="java.lang.String" itemvalue="astunparse" />
<item index="78" class="java.lang.String" itemvalue="entrypoints" />
<item index="79" class="java.lang.String" itemvalue="jsonschema" />
<item index="80" class="java.lang.String" itemvalue="notebook" />
<item index="81" class="java.lang.String" itemvalue="qtconsole" />
<item index="82" class="java.lang.String" itemvalue="terminado" />
<item index="83" class="java.lang.String" itemvalue="argcomplete" />
<item index="84" class="java.lang.String" itemvalue="tensorboard-data-server" />
<item index="85" class="java.lang.String" itemvalue="pexpect" />
<item index="86" class="java.lang.String" itemvalue="jupyterlab-pygments" />
<item index="87" class="java.lang.String" itemvalue="nbconvert" />
<item index="88" class="java.lang.String" itemvalue="attrs" />
<item index="89" class="java.lang.String" itemvalue="cn2an" />
<item index="90" class="java.lang.String" itemvalue="flatbuffers" />
<item index="91" class="java.lang.String" itemvalue="backcall" />
<item index="92" class="java.lang.String" itemvalue="widgetsnbextension" />
<item index="93" class="java.lang.String" itemvalue="charset-normalizer" />
<item index="94" class="java.lang.String" itemvalue="idna" />
<item index="95" class="java.lang.String" itemvalue="rsa" />
<item index="96" class="java.lang.String" itemvalue="jupyter-core" />
<item index="97" class="java.lang.String" itemvalue="tensorflow-addons" />
<item index="98" class="java.lang.String" itemvalue="matplotlib-inline" />
<item index="99" class="java.lang.String" itemvalue="ptyprocess" />
<item index="100" class="java.lang.String" itemvalue="cffi" />
<item index="101" class="java.lang.String" itemvalue="pandocfilters" />
<item index="102" class="java.lang.String" itemvalue="wcwidth" />
<item index="103" class="java.lang.String" itemvalue="pyasn1" />
<item index="104" class="java.lang.String" itemvalue="requests" />
<item index="105" class="java.lang.String" itemvalue="Jinja2" />
<item index="106" class="java.lang.String" itemvalue="typeguard" />
<item index="107" class="java.lang.String" itemvalue="pyrsistent" />
<item index="108" class="java.lang.String" itemvalue="requests-oauthlib" />
<item index="109" class="java.lang.String" itemvalue="jupyter" />
<item index="110" class="java.lang.String" itemvalue="tensorboard-plugin-wit" />
<item index="111" class="java.lang.String" itemvalue="zipp" />
<item index="112" class="java.lang.String" itemvalue="nest-asyncio" />
<item index="113" class="java.lang.String" itemvalue="urllib3" />
<item index="114" class="java.lang.String" itemvalue="ipywidgets" />
<item index="115" class="java.lang.String" itemvalue="tornado" />
<item index="116" class="java.lang.String" itemvalue="google-auth-oauthlib" />
<item index="117" class="java.lang.String" itemvalue="nbformat" />
<item index="118" class="java.lang.String" itemvalue="Send2Trash" />
<item index="119" class="java.lang.String" itemvalue="prometheus-client" />
<item index="120" class="java.lang.String" itemvalue="mistune" />
<item index="121" class="java.lang.String" itemvalue="jupyter-console" />
<item index="122" class="java.lang.String" itemvalue="cachetools" />
<item index="123" class="java.lang.String" itemvalue="debugpy" />
<item index="124" class="java.lang.String" itemvalue="argon2-cffi" />
<item index="125" class="java.lang.String" itemvalue="webencodings" />
<item index="126" class="java.lang.String" itemvalue="google-auth" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="E501" />
<option value="E122" />
<option value="W292" />
</list>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N803" />
<option value="N802" />
<option value="N806" />
</list>
</option>
</inspection_tool>
<inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
<option name="processCode" value="true" />
<option name="processLiterals" value="true" />
<option name="processComments" value="true" />
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (tf-latest-base)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/OralAPI.iml" filepath="$PROJECT_DIR$/.idea/OralAPI.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.python import keras
from CRF import CRF
# from CRF import CRF
class BiLSTMCRF:
def __init__(self, vocabSize, maxLen, tagIndexDict, tagSum, sequenceLengths=None, vecSize=100, learning_rate=0.01):
keras.backend.clear_session()
self.vocabSize = vocabSize
self.vecSize = vecSize
self.maxLen = maxLen
self.tagSum = tagSum
self.sequenceLengths = sequenceLengths
self.tagIndexDict = tagIndexDict
self.learning_rate = learning_rate
self.buildBiLSTMCRF()
def getTransParam(self, y, tagIndexDict):
self.trainY = np.argmax(y, axis=-1)
yList = self.trainY.tolist()
transParam = np.zeros(
[len(list(tagIndexDict.keys())), len(list(tagIndexDict.keys()))])
for rowI in range(len(yList)):
for colI in range(len(yList[rowI])-1):
transParam[yList[rowI][colI]][yList[rowI][colI+1]] += 1
for rowI in range(transParam.shape[0]):
transParam[rowI] = transParam[rowI]/np.sum(transParam[rowI])
return transParam
def buildBiLSTMCRF(self):
model = Sequential()
model.add(tf.keras.layers.Input(shape=(self.maxLen,)))
model.add(tf.keras.layers.Embedding(self.vocabSize, self.vecSize))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
self.tagSum, return_sequences=True, activation="tanh"), merge_mode='sum'))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
self.tagSum, return_sequences=True, activation="softmax"), merge_mode='sum'))
crf = CRF(self.tagSum, name='crf_layer')
model.add(crf)
model.compile(Adam(learning_rate=self.learning_rate), loss={
'crf_layer': crf.get_loss}, metrics=[crf.get_accuracy])
self.net = model
def fit(self, X, y, epochs=100, batchsize=32):
if len(y.shape) == 3:
y = np.argmax(y, axis=-1)
if self.sequenceLengths is None:
self.sequenceLengths = [row.shape[0] for row in y]
callbacks_list = [
tf.keras.callbacks.History(),
tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5,
verbose=1, mode='auto', min_lr=1e-9),
tf.keras.callbacks.ModelCheckpoint("model/model.h5", monitor='get_accuracy',
verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1),
tf.keras.callbacks.EarlyStopping(
monitor='loss', min_delta=1e-5, patience=10),
TensorBoard(log_dir="logs", histogram_freq=1)
# WeightsSaver(1)
]
history = self.net.fit(
X, y, epochs=epochs, callbacks=callbacks_list, batch_size=batchsize)
return history
def predict(self, X):
preYArr = self.net.predict(X)
return preYArr
def load_weights(self, model_path):
self.net.load_weights(model_path)
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Orginal implementation from keras_contrib/layers/crf
# ==============================================================================
"""Implementing Conditional Random Field layer."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from tensorflow_addons.text.crf import crf_decode, crf_log_likelihood
from tensorflow_addons.utils import keras_utils
# @keras_utils.register_keras_custom_object
class CRF(tf.keras.layers.Layer):
"""Linear chain conditional random field (CRF).
Examples:
```python
from tensorflow_addons.layers import CRF
model = Sequential()
model.add(Embedding(3001, 300, mask_zero=True)
crf = CRF(10, name='crf_layer')
model.add(crf)
model.compile('adam', loss={'crf_layer': crf.loss})
model.fit(x, y)
```
Arguments:
units: Positive integer, dimensionality of the output space,
should equal to tag num.
chain_initializer: Initializer for the `chain_kernel` weights matrix,
used for the CRF chain energy.
(see [initializers](../initializers.md)).
chain_regularizer: Regularizer function applied to
the `chain_kernel` weights matrix.
chain_constraint: Constraint function applied to
the `chain_kernel` weights matrix.
use_boundary: Boolean (default True), indicating if trainable
start-end chain energies should be added to model.
boundary_initializer: Initializer for the `left_boundary`,
'right_boundary' weights vectors,
used for the start/left and end/right boundary energy.
boundary_regularizer: Regularizer function applied to
the 'left_boundary', 'right_boundary' weight vectors.
boundary_constraint: Constraint function applied to
the `left_boundary`, `right_boundary` weights vectors.
use_kernel: Boolean (default True), indicating if apply
a fully connected layer before CRF op.
kernel_initializer: Initializer for the `kernel` weights matrix,
used for the linear transformation of the inputs.
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix.
kernel_constraint: Constraint function applied to
the `kernel` weights matrix.
use_bias: Boolean (default True), whether the layer uses a bias vector.
bias_initializer: Initializer for the bias vector.
bias_regularizer: Regularizer function applied to the bias vector.
bias_constraint: Constraint function applied to the bias vector.
activation: default value is 'linear', Activation function to use.
Input shape:
3D tensor with shape: `(batch_size, sequence_length, feature_size)`.
Output shape:
2D tensor (dtype: int32) with shape: `(batch_size, sequence_length)`.
Masking:
This layer supports masking
(2D tensor, shape: `(batch_size, sequence_length)`)
for input data with a variable number of timesteps.
This layer output same make tensor,
NOTICE this may cause issue when you
use some keras loss and metrics function which usually expect 1D mask.
Loss function:
Due to the TF 2.0 version support eager execution be default,
there is no way can implement CRF loss as independent loss function.
Thus, user should use loss method of this layer.
See Examples (above) for detailed usage.
References:
- [Conditional Random Field](https://en.wikipedia.org/wiki/Conditional_random_field)
"""
def __init__(self,
units,
chain_initializer="orthogonal",
chain_regularizer=None,
chain_constraint=None,
use_boundary=True,
boundary_initializer="zeros",
boundary_regularizer=None,
boundary_constraint=None,
use_kernel=True,
kernel_initializer="glorot_uniform",
kernel_regularizer=None,
kernel_constraint=None,
use_bias=True,
bias_initializer="zeros",
bias_regularizer=None,
bias_constraint=None,
activation="linear",
**kwargs):
super(CRF, self).__init__(**kwargs)
# setup mask supporting flag, used by base class (the Layer)
# because base class's init method will set it to False unconditionally
# So this assigned must be executed after call base class's init method
self.supports_masking = True
self.units = units # numbers of tags
self.use_boundary = use_boundary
self.use_bias = use_bias
self.use_kernel = use_kernel
self.activation = tf.keras.activations.get(activation)
self.kernel_initializer = tf.keras.initializers.get(kernel_initializer)
self.chain_initializer = tf.keras.initializers.get(chain_initializer)
self.boundary_initializer = tf.keras.initializers.get(
boundary_initializer)
self.bias_initializer = tf.keras.initializers.get(bias_initializer)
self.kernel_regularizer = tf.keras.regularizers.get(kernel_regularizer)
self.chain_regularizer = tf.keras.regularizers.get(chain_regularizer)
self.boundary_regularizer = tf.keras.regularizers.get(
boundary_regularizer)
self.bias_regularizer = tf.keras.regularizers.get(bias_regularizer)
self.kernel_constraint = tf.keras.constraints.get(kernel_constraint)
self.chain_constraint = tf.keras.constraints.get(chain_constraint)
self.boundary_constraint = tf.keras.constraints.get(
boundary_constraint)
self.bias_constraint = tf.keras.constraints.get(bias_constraint)
# values will be assigned in method
self.input_spec = None
# value remembered for loss/metrics function
self.potentials = None
self.sequence_length = None
self.mask = None
# global variable
self.kernel = None
self.chain_kernel = None
self.bias = None
self.left_boundary = None
self.right_boundary = None
def build(self, input_shape):
input_shape = tuple(tf.TensorShape(input_shape).as_list())
# see API docs of InputSpec for more detail
self.input_spec = [tf.keras.layers.InputSpec(shape=input_shape)]
feature_size = input_shape[-1]
if self.use_kernel:
# weights that mapping arbitrary tensor to correct shape
self.kernel = self.add_weight(
shape=(feature_size, self.units),
name="kernel",
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint,
)
# weights that work as transfer probability of each tags
self.chain_kernel = self.add_weight(
shape=(self.units, self.units),
name="chain_kernel",
initializer=self.chain_initializer,
regularizer=self.chain_regularizer,
constraint=self.chain_constraint,
)
# bias that works with self.kernel
if self.use_kernel and self.use_bias:
self.bias = self.add_weight(
shape=(self.units, ),
name="bias",
initializer=self.bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint,
)
else:
self.bias = 0
# weight of <START> to tag probability and tag to <END> probability
if self.use_boundary:
self.left_boundary = self.add_weight(
shape=(self.units, ),
name="left_boundary",
initializer=self.boundary_initializer,
regularizer=self.boundary_regularizer,
constraint=self.boundary_constraint,
)
self.right_boundary = self.add_weight(
shape=(self.units, ),
name="right_boundary",
initializer=self.boundary_initializer,
regularizer=self.boundary_regularizer,
constraint=self.boundary_constraint,
)
# or directly call self.built = True
super(CRF, self).build(input_shape)
def call(self, inputs, mask=None, **kwargs):
# mask: Tensor(shape=(batch_size, sequence_length), dtype=bool) or None
if mask is not None:
assert (tf.keras.backend.ndim(mask) == 2
), "Input mask to CRF must have dim 2 if not None"
# left padding of mask is not supported, due the underline CRF function
# detect it and report it to user
first_mask = None
if mask is not None:
left_boundary_mask = self._compute_mask_left_boundary(mask)
first_mask = left_boundary_mask[:, 0]
# remember this value for later use
self.mask = mask
if first_mask is not None:
with tf.control_dependencies([
tf.debugging.assert_equal(
tf.math.reduce_all(first_mask),
tf.constant(True),
message="Currently, CRF layer do not support left padding"
)
]):
self.potentials = self._dense_layer(inputs)
else:
self.potentials = self._dense_layer(inputs)
# appending boundary probability info
if self.use_boundary:
self.potentials = self.add_boundary_energy(
self.potentials, mask, self.left_boundary, self.right_boundary)
self.sequence_length = self._get_sequence_length(inputs, mask)
decoded_sequence, _ = self.get_viterbi_decoding(
self.potentials, self.sequence_length)
return decoded_sequence
def _get_sequence_length(self, input_, mask):
"""
Currently underline CRF fucntion (provided by tensorflow_addons.text.crf)
do not support bi-direction masking (left padding / right padding),
it support right padding by tell it the sequence length.
this function is compute the sequence length from input and mask.
"""
if mask is not None:
int_mask = tf.keras.backend.cast(mask, tf.int8)
sequence_length = self.mask_to_sequence_length(int_mask)
else:
# make a mask tensor from input, then used to generate sequence_length
input_energy_shape = tf.shape(input_)
raw_input_shape = tf.slice(input_energy_shape, [0], [2])
alt_mask = tf.ones(raw_input_shape)
sequence_length = self.mask_to_sequence_length(alt_mask)
return sequence_length
def mask_to_sequence_length(self, mask):
"""
compute sequence length from mask
"""
sequence_length = tf.keras.backend.cast(
tf.keras.backend.sum(mask, 1), tf.int64)
return sequence_length
@staticmethod
def _compute_mask_right_boundary(mask):
"""
input mask: 0011100, output left_boundary: 0000100
"""
# shift mask to left by 1: 0011100 => 0111000
offset = 1
left_shifted_mask = tf.keras.backend.concatenate(
[mask[:, offset:],
tf.keras.backend.zeros_like(mask[:, :offset])],
axis=1)
# TODO(howl-anderson): for below code
# Original code in keras_contrib:
# end_mask = K.cast(
# K.greater(self.shift_left(mask), mask),
# K.floatx()
# )
# May have a bug, it's better confirmed
# by the original keras_contrib maintainer
# Luiz Felix (github: lzfelix),
# mailed him already and waiting for reply.
# 0011100 > 0111000 => 0000100
right_boundary = tf.keras.backend.greater(mask, left_shifted_mask)
return right_boundary
@staticmethod
def _compute_mask_left_boundary(mask):
"""
input mask: 0011100, output left_boundary: 0010000
"""
# shift mask to right by 1: 0011100 => 0001110
offset = 1
right_shifted_mask = tf.keras.backend.concatenate(
[tf.keras.backend.zeros_like(mask[:, :offset]), mask[:, :-offset]],
axis=1)
# 0011100 > 0001110 => 0010000
left_boundary = tf.keras.backend.greater(mask, right_shifted_mask)
return left_boundary
def add_boundary_energy(self, potentials, mask, start, end):
def expend_scalar_to_3d(x):
# expend tensor from shape (x, ) to (1, 1, x)
return tf.keras.backend.expand_dims(
tf.keras.backend.expand_dims(x, 0), 0)
start = expend_scalar_to_3d(start)
end = expend_scalar_to_3d(end)
if mask is None:
potentials = tf.keras.backend.concatenate(
[potentials[:, :1, :] + start, potentials[:, 1:, :]], axis=1)
potentials = tf.keras.backend.concatenate(
[potentials[:, :-1, :], potentials[:, -1:, :] + end], axis=1)
else:
mask = tf.keras.backend.expand_dims(
tf.keras.backend.cast(mask, start.dtype), axis=-1)
start_mask = tf.keras.backend.cast(
self._compute_mask_left_boundary(mask),
start.dtype,
)
end_mask = tf.keras.backend.cast(
self._compute_mask_right_boundary(mask),
end.dtype,
)
potentials = potentials + start_mask * start
potentials = potentials + end_mask * end
return potentials
def get_viterbi_decoding(self, potentials, sequence_length):
# decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`
decode_tags, best_score = crf_decode(potentials, self.chain_kernel,
sequence_length)
return decode_tags, best_score
def get_config(self):
# used for loading model from disk
config = {
"units":
self.units,
"use_boundary":
self.use_boundary,
"use_bias":
self.use_bias,
"use_kernel":
self.use_kernel,
"kernel_initializer":
tf.keras.initializers.serialize(self.kernel_initializer),
"chain_initializer":
tf.keras.initializers.serialize(self.chain_initializer),
"boundary_initializer":
tf.keras.initializers.serialize(self.boundary_initializer),
"bias_initializer":
tf.keras.initializers.serialize(self.bias_initializer),
"activation":
tf.keras.activations.serialize(self.activation),
"kernel_regularizer":
tf.keras.regularizers.serialize(self.kernel_regularizer),
"chain_regularizer":
tf.keras.regularizers.serialize(self.chain_regularizer),
"boundary_regularizer":
tf.keras.regularizers.serialize(self.boundary_regularizer),
"bias_regularizer":
tf.keras.regularizers.serialize(self.bias_regularizer),
"kernel_constraint":
tf.keras.constraints.serialize(self.kernel_constraint),
"chain_constraint":
tf.keras.constraints.serialize(self.chain_constraint),
"boundary_constraint":
tf.keras.constraints.serialize(self.boundary_constraint),
"bias_constraint":
tf.keras.constraints.serialize(self.bias_constraint)
}
base_config = super(CRF, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
output_shape = input_shape[:2]
return output_shape
def compute_mask(self, input_, mask=None):
# """
# Set output mask to be 1D tensor, so loss method of this class can work without error.
# But there is big short come:
# layer, loss and metrics after this layer
# can not access meaningful mask. Which mean they can not work correctly.
# User only can get correct loss and metrics value from methods of this layer.
# """
# if mask is not None:
# # transform mask from shape (?, ?) to (?, )
# new_mask = tf.keras.backend.any(mask, axis=1)
# return new_mask
return mask
def get_negative_log_likelihood(self, y_true):
# TODO: remove typing cast
self.potentials = tf.keras.backend.cast(self.potentials, tf.float32)
y_true = tf.keras.backend.cast(y_true, tf.int32)
self.sequence_length = tf.keras.backend.cast(self.sequence_length,
tf.int32)
# self.chain_kernel = tf.keras.backend.cast(self.chain_kernel,
# tf.float32)
log_likelihood, _ = crf_log_likelihood(
self.potentials, y_true, self.sequence_length, self.chain_kernel)
return -log_likelihood
def get_loss(self, y_true, y_pred):
# we don't use y_pred, but caller pass it anyway, ignore it
return self.get_negative_log_likelihood(y_true)
def get_accuracy(self, y_true, y_pred):
judge = tf.keras.backend.cast(
tf.keras.backend.equal(y_pred, y_true), tf.keras.backend.floatx())
if self.mask is None:
return tf.keras.backend.mean(judge)
else:
mask = tf.keras.backend.cast(self.mask, tf.keras.backend.floatx())
return (tf.keras.backend.sum(judge * mask) /
tf.keras.backend.sum(mask))
def _dense_layer(self, input_):
if self.use_kernel:
output = self.activation(
tf.keras.backend.dot(input_, self.kernel) + self.bias)
else:
output = input_
return tf.keras.backend.cast(output, self.chain_kernel.dtype)
def __call__(self, inputs, *args, **kwargs):
outputs = super(CRF, self).__call__(inputs, *args, **kwargs)
# A hack that add _keras_history to EagerTensor, make it more like normal Tensor
for tensor in tf.nest.flatten(outputs):
if not hasattr(tensor, '_keras_history'):
tensor._keras_history = (self, 0, 0)
return outputs
@property
def _compute_dtype(self):
# fixed output dtype from underline CRF functions
return tf.int32
# 肺结节CT影像报告实体提取
import json
import copy
import requests
import uuid
from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
from model import Oral
oral_api = Blueprint('oral', __name__)
@oral_api.route('/')
def show():
return 'This is oral api.'
@oral_api.route('/recg/', methods = ['POST'])
def recognize():
if request.method == 'POST':
finding = request.form.get('finding')
conclusion = request.form.get('conclusion')
verbose = request.form.get('verbose', default = 0)
try:
verbose = int(verbose)
except Exception as e:
return jsonify({'success': False, 'description': {'error msg': 'verbose can be only 0 or 1'}}), 500
if verbose != 0:
print()
print(finding)
print(conclusion)
if finding is None or conclusion is None:
return jsonify({'success': False, 'description': {'error msg': 'invalid post body fields'}}), 500
elif finding == '' or conclusion == '':
return jsonify(
{'success': False, 'description': {'error msg': 'findings or conclusions cannot be empty'}}), 500
else:
try:
print('' if verbose == 0 else 'verbose out:')
oral = Oral(finding, conclusion, verbose = False if verbose == 0 else 1)
data = oral.get_json()
return jsonify({'success': True, 'description': {'data': data}}), 200
except Exception as e:
print("/n******ERROR SRART******/n")
print(e)
print("----------findind----------")
print(finding)
print("---------conclusion--------")
print(conclusion)
print("/n*******ERROR END*******/n")
return jsonify({'success': False, 'description': {'error msg': e}}), 500
else:
return jsonify({'success': False, 'description': {'error msg': 'Invalid methods'}}), 404
vocabSize:497
maxLen:177
classSum:21
from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
from api import oral_api
app = Flask(__name__)
app.secret_key = '1234567'
app.register_blueprint(oral_api, url_prefix = '/oral')
if __name__ == '__main__':
# from werkzeug.contrib.fixers import ProxyFix
# app.wsgi_app = ProxyFix(app.wsgi_app)
app.run(debug = True, port = 5004, host = '0.0.0.0')
##
import os
import re
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import splittxt
import tools
import predict
import tensorflow
##
class Oral:
def __init__(self, ImagingFindings, ImagingConclusion, verbose = True):
self.verbose = verbose
self._Conclusion = ''
self._Finding = ''
if '送检淋巴结' in ImagingConclusion:
self._Conclusion = ImagingConclusion
self._Finding = ImagingFindings
else:
self._Conclusion = ImagingFindings
self._Finding = ImagingConclusion
self._Conclusion = self._Conclusion.strip('"').strip() \
.replace("大于", ">").replace("小于", "<").replace("大于等于", "≥").replace("小于等于", "≤").replace(">", ">").replace(
"<", "<")
self._Finding = self._Finding.strip('"').strip() \
.replace("大于", ">").replace("小于", "<").replace("大于等于", "≥").replace("小于等于", "≤").replace(">", ">").replace(
"<", "<")
self.ImmunohistochemistryContent = ''
self.MolecularResultsContent = ''
self.Degree = ''
self.CuttingEdgePathologyOther = ''
self.CuttingEdgePathology = ''
_, CuttingEdge1, _, MolecularResults1, Immunohistochemistry1 = splittxt.splittxt(
self._Conclusion)
_, CuttingEdge2, _, MolecularResults2, Immunohistochemistry2 = splittxt.splittxt(
self._Finding)
self.ConclusionCuttingEdge = CuttingEdge1 + CuttingEdge2
self.ConclusionMolecularResults = MolecularResults1 + MolecularResults2
if Immunohistochemistry1 != Immunohistochemistry2:
self.ConclusionImmunohistochemistry = Immunohistochemistry1 + Immunohistochemistry2
else:
self.ConclusionImmunohistochemistry = Immunohistochemistry1
# self.ConclusionFrist, self.ConclusionCuttingEdge, self.ConclusionMolecularResults, self.ConclusionImmunohistochemistry = splittxt.splittxt(
# self._Conclusion + self._Finding)
# self.FindingFrist, self.FindingCuttingEdge, self.FindingMolecularResults, self.FindingImmunohistochemistry = splittxt.splittxt(
# self._Finding + self._Conclusion)
if self.ConclusionCuttingEdge != "":
self.CuttingEdgePathology = tools.CuttingEdgePathology(self.ConclusionCuttingEdge) # 术后病理切缘
self.CuttingEdgePathologyOther = ""
if self.CuttingEdgePathology == "其他情况":
CuttingEdgeID = tools.FindChar(self.ConclusionCuttingEdge)[0] + 1
self.CuttingEdgePathologyOther = self.ConclusionCuttingEdge[CuttingEdgeID:] # 其他术后病理切缘情况
self.Degree = tools.findDegree(self.ConclusionCuttingEdge) # 黏膜上皮异常增生程度
# print(self.Degree)
self.Degree = self.getDegree(self.Degree)
if self.CuttingEdgePathologyOther == '':
self.CuttingEdgePathologyOther = '无'
# 分子结果
if self.ConclusionMolecularResults != "":
MolecularResultsID = tools.FindChar(self.ConclusionMolecularResults)[0]
self.MolecularResultsContent = self.ConclusionMolecularResults[MolecularResultsID:] # 分子结果
if self.MolecularResultsContent == "":
self.MolecularResultsContent = "无"
# 免疫组化
# print(self.ConclusionImmunohistochemistry)
if self.ConclusionImmunohistochemistry != "":
self.Immunohistochemistryisornot = "有" # 免疫组化有无
# print(self.Immunohistochemistryisornot)
# ImmunohistochemistryID = tools.FindChar(self.ConclusionImmunohistochemistry)[0]
# print(tools.FindChar(self.ConclusionImmunohistochemistry))
# print(self.ConclusionImmunohistochemistry)
# print(self.ConclusionImmunohistochemistry[ImmunohistochemistryID:])
# self.ImmunohistochemistryContent = self.ConclusionImmunohistochemistry[ImmunohistochemistryID:] # 免疫组化结果
self.ImmunohistochemistryContent = self.ConclusionImmunohistochemistry # 免疫组化结果
else:
self.Immunohistochemistryisornot = "无" # 免疫组化有无
# self.print_original_data()
self.ConclusionFrist, _, self.ConclusionCuttingLymph, _, _ = splittxt.splittxt(
self._Conclusion)
self.FindingFrist, _, self.FindingCuttingLymph, _, _ = splittxt.splittxt(
self._Finding)
self.ConclusionFrist = self.ConclusionFrist.replace('肿物', '肿块').replace('\n', '。')
tensorflow.keras.backend.clear_session()
if self.verbose:
print(self.ConclusionFrist + self.ConclusionCuttingLymph)
ans, y_pre = predict.predict(self.ConclusionFrist + self.ConclusionCuttingLymph)
self._y_pre = predict.output(self.ConclusionFrist + self.ConclusionCuttingLymph, y_pre)
if self.verbose:
self.print_list_item(ans)
self.FindingFrist = self.FindingFrist.replace('肿物', '肿块').replace('\n', '。')
tensorflow.keras.backend.clear_session()
if self.verbose:
print(self.FindingFrist + self.FindingCuttingLymph)
ans_o, y_pre_o = predict.predict(self.FindingFrist + self.FindingCuttingLymph)
self._y_pre_o = predict.output(self.FindingFrist + self.FindingCuttingLymph, y_pre_o)
if self.verbose:
self.print_list_item(ans_o)
tensorflow.keras.backend.clear_session()
if self.verbose:
print(self.ConclusionCuttingLymph)
ans_lymph, y_pre_lymph = predict.predict(self.ConclusionCuttingLymph)
self._y_pre_lymph = predict.output(self.ConclusionCuttingLymph, y_pre_lymph)
# if self.verbose:
# self.print_list_item(ans_lymph)
def _get_entity_with_O(self, y_pre, with_o):
all = []
if with_o:
for i in range(len(y_pre)):
if i == 0:
# print('O', ImagingConclusionFrist[0:y_pre[i][2]].replace('\n', ' '), str(0), str(y_pre[i][2]))
# print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
all.append({'tag': 'O', 'words': self.FindingFrist[0:y_pre[i][2]].replace('\n', ' '), 'h': 0,
'r': y_pre[i][2]})
all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
else:
O_h = y_pre[i - 1][3] + 1
O_r = y_pre[i][2]
# print('O', ImagingConclusionFrist[O_h:O_r].replace('\n', ' '), str(y_pre[i - 1][3] + 1), str(y_pre[i][2]))
# print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
all.append({'tag': 'O', 'words': self.FindingFrist[O_h:O_r].replace('\n', ' '),
'h': y_pre[i - 1][3] + 1, 'r': y_pre[i][2]})
all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
else:
for i in range(len(y_pre)):
if i == 0:
# print('O', ImagingConclusionFrist[0:y_pre[i][2]].replace('\n', ' '), str(0), str(y_pre[i][2]))
# print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
all.append({'tag': 'O', 'words': self.ConclusionFrist[0:y_pre[i][2]].replace('\n', ' '), 'h': 0,
'r': y_pre[i][2]})
all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
else:
O_h = y_pre[i - 1][3] + 1
O_r = y_pre[i][2]
# print('O', ImagingConclusionFrist[O_h:O_r].replace('\n', ' '), str(y_pre[i - 1][3] + 1), str(y_pre[i][2]))
# print(y_pre[i][0], y_pre[i][1], y_pre[i][2], y_pre[i][3])
all.append({'tag': 'O', 'words': self.ConclusionFrist[O_h:O_r].replace('\n', ' '),
'h': y_pre[i - 1][3] + 1, 'r': y_pre[i][2]})
all.append({'tag': y_pre[i][0], 'words': y_pre[i][1], 'h': y_pre[i][2], 'r': y_pre[i][3]})
return all
def max_size(self, type):
haveSIZE = False
for i in self._get_entity_with_O(self._y_pre, False):
if i['tag'] == 'SIZE':
haveSIZE = True
all = []
if not haveSIZE:
all = self._get_entity_with_O(self._y_pre_o, True)
else:
all = self._get_entity_with_O(self._y_pre, False)
# for i in all:
# print(i)
max = 0
max_i = 0
numbers = []
types = []
for i in range(0, len(all)):
if type in all[i]['words']:
types.append(all[i]['words'])
if all[i]['tag'] == 'SIZE' and len(types) != 0:
numbers.append(tools.exactNumber(all[i]['words']))
types = []
if len(numbers) == 0:
return ""
for arr_i in range(0, len(numbers)):
for num in numbers[arr_i]:
if (re.match("^\d+?\.\d+?$", str(num)) or num.isdigit()) and (
re.match("^\d+?\.\d+?$", str(max)) or num.isdigit()):
if float(num) > float(max):
max = num
max_i = arr_i
s = ''
for num in numbers[max_i]:
s += str(num) + '*'
return s.strip("*").strip("cm")
def get_DOI(self):
haveDOI = False
for i in self._get_entity_with_O(self._y_pre_o, True):
if i['tag'] == 'DOI':
haveDOI = True
all = []
if not haveDOI:
all = self._get_entity_with_O(self._y_pre, False)
else:
all = self._get_entity_with_O(self._y_pre_o, True)
DOI_txt = ''
# all = self._get_entity_with_O(self._y_pre, False)
# for i in all:
# print(i)
for i in all:
if i['tag'] == 'DOI':
DOI_txt += i['words'] + '\n'
return DOI_txt
def get_pT(self):
pT_txt = ''
haveDOI = False
for i in self._get_entity_with_O(self._y_pre_o, True):
if i['tag'] == 'DOI':
haveDOI = True
all = []
if not haveDOI:
all = self._get_entity_with_O(self._y_pre, False)
else:
all = self._get_entity_with_O(self._y_pre_o, True)
for i in all:
if i['tag'] == 'DOI':
pT_txt += tools.pT(i['words']) + '\n'
return pT_txt
def get_differentiation(self):
differentiation_txt = ''
differentiations = []
all = self._get_entity_with_O(self._y_pre, False)
# print('self._y_pre:')
for i in all:
# print(i)
if i['tag'] == 'LEVEL':
_, ans = tools.differentiation(i['words'])
differentiations.append(ans)
# differentiation_txt += ans + '\n'
all = self._get_entity_with_O(self._y_pre_o, True)
# print('self._y_pre_o:')
for i in all:
# print(i)
if i['tag'] == 'LEVEL':
_, ans = tools.differentiation(i['words'])
differentiations.append(ans.strip())
if ('中-低分化' in (self.ConclusionFrist + self.ConclusionCuttingLymph)) or (
'中-低分化' in (self.FindingFrist + self.FindingCuttingLymph)):
differentiations.append("Ⅱ级中分化")
differentiations.append("Ⅲ级低分化")
differentiations = set(differentiations)
for i in differentiations:
differentiation_txt += i + '\n'
return differentiation_txt
def get_invasion(self, type):
all = self._get_entity_with_O(self._y_pre, False)
for i in all:
if i['tag'] == 'INVASION':
if type in i['words']:
return '是'
return '否'
def getENE(self):
all = self._get_entity_with_O(self._y_pre, False)
for i in all:
if i['tag'] == 'ENE':
return '有'
return '无'
def getDegree(self, dgr):
if '-' in dgr:
dgr_list = dgr.split('-')
for i in range(len(dgr_list)):
if "度" not in dgr_list[i]:
dgr_list[i] += "度"
rt_dgr = ""
for i in dgr_list:
rt_dgr += i
rt_dgr += '\n'
rt_dgr.strip('\n')
return rt_dgr
else:
return dgr
def getANATOMY(self):
all = self._get_entity_with_O(self._y_pre, False)
have_anatomy = False
for i in all:
if i['tag'] == 'ANATOMY':
have_anatomy = True
if not have_anatomy:
all = self._get_entity_with_O(self._y_pre_o, True)
count_i = 0
count_o = 0
anatomy_list_init = []
for i in all:
if i['tag'] == 'ANATOMY':
if ('I' in i['words'] or 'V' in i['words'] or i['words'] == '左' or i['words'] == '右') and (
'DOI' not in i['words'] and 'b' not in i['words'] and 'a' not in i['words'] and 'A' not in i[
'words'] and 'B' not in i['words']):
count_i += 1
anatomy_list_init.append(i['words'])
# if i['words'] == '右' or i['words'] == '左':
# rt_txt += i['words']
# else:
# rt_txt += i['words'] + '、'
else:
count_o += 1
# print(anatomy_list_init)
anatomy_list_rt = []
l_or_r = ''
for i in range(len(anatomy_list_init)):
if anatomy_list_init[i] == '左' or anatomy_list_init[i] == '右':
l_or_r = anatomy_list_init[i]
elif ('左' not in anatomy_list_init[i] and '右' not in anatomy_list_init[i]) and (
'I' in anatomy_list_init[i] or 'V' in anatomy_list_init[i]):
if l_or_r != '':
anatomy_list_rt.append(l_or_r + anatomy_list_init[i].strip('区').strip('淋巴结') + '区')
else:
count_o += 1
elif '左' in anatomy_list_init[i] or '右' in anatomy_list_init[i]:
anatomy_list_rt.append(anatomy_list_init[i].strip('区').strip('淋巴结') + '区')
anatomy_set_rt = set(anatomy_list_rt)
# print(anatomy_set_rt)
rt_txt = ''
for i in anatomy_set_rt:
rt_txt += (i + '、')
if count_o != 0:
if count_i == 0:
rt_txt = (rt_txt.strip('、') + '其他')
else:
rt_txt = (rt_txt.strip('、') + '、其他')
return rt_txt.strip('、')
def getANATOMY_o(self):
all = self._get_entity_with_O(self._y_pre, False)
have_anatomy = False
for i in all:
if i['tag'] == 'ANATOMY':
have_anatomy = True
if not have_anatomy:
all = self._get_entity_with_O(self._y_pre_o, True)
rt_txt = ''
anatomy_o_list = []
anatomy_list = []
for i in all:
if i['tag'] == 'ANATOMY':
if ('I' in i['words'] or 'V' in i['words'] or i['words'] == '左' or i['words'] == '右') and (
'DOI' not in i['words'] and 'b' not in i['words'] and 'a' not in i['words'] and 'A' not in i[
'words'] and 'B' not in i['words']):
anatomy_list.append(i['words'])
continue
else:
anatomy_o_list.append(i['words'])
l_or_r = ''
for i in range(len(anatomy_list)):
if anatomy_list[i] == '左' or anatomy_list[i] == '右':
l_or_r = anatomy_list[i]
elif ('左' not in anatomy_list[i] and '右' not in anatomy_list[i]) and (
'I' in anatomy_list[i] or 'V' in anatomy_list[i]):
if l_or_r == '':
anatomy_o_list.append(anatomy_list[i])
elif '左' in anatomy_list[i] or '右' in anatomy_list[i]:
continue
# print(anatomy_o_list)
if len(anatomy_o_list) == 0:
return '无'
anatomy_o_list = set(anatomy_o_list)
for i in anatomy_o_list:
if ('I' in i or 'V' in i) and ('区' not in i):
rt_txt += i + '区、'
else:
rt_txt += i + '、'
return rt_txt.strip('、')
def get_histological_type(self):
all = self._get_entity_with_O(self._y_pre, False)
# print(all)
count_s = 0
count_o = 0
rt_txt = ''
for i in all:
if i['tag'] == 'SQUAMOUS':
count_s += 1
if i['tag'] == 'OTHER':
if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
'words']) and ('鳞状细胞' not in i['words']):
count_o += 1
all = self._get_entity_with_O(self._y_pre_o, True)
# print(all)
for i in all:
if i['tag'] == 'SQUAMOUS':
count_s += 1
if i['tag'] == 'OTHER':
if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
'words']) and ('鳞状细胞' not in i['words']):
# print(i['words'])
count_o += 1
if '鳞状细胞癌' in self._Conclusion or '鳞状细胞癌' in self._Finding:
count_s += 1
if count_s > 0:
rt_txt += '鳞状细胞癌\n'
if count_o > 0:
rt_txt += '其他'
if count_o == 0 and count_s == 0:
rt_txt = '无'
return rt_txt.strip('、')
def get_other_type(self):
all = self._get_entity_with_O(self._y_pre, False)
# for i in all:
# print(i)
rt_txt = ''
count = 0
entity = []
for i in all:
if i['tag'] == 'OTHER':
count += 1
if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
'words']) and ('鳞状细胞' not in i['words']):
entity.append(i['words'])
all = self._get_entity_with_O(self._y_pre_o, True)
for i in all:
if i['tag'] == 'OTHER':
count += 1
if ('恶性' in i['words'] or '癌' in i['words'] or '肉瘤' in i['words'] or '异常增生' in i[
'words']) and ('鳞状细胞' not in i['words']):
entity.append(i['words'])
entity = set(entity)
for i in entity:
rt_txt += i + '\n'
return '无' if count == 0 else rt_txt
def get_number(self):
# all = get_entity_with_O(y_pre)
count = 0
# for i in self._y_pre:
# print(i)
for i in range(1, len(self._y_pre)):
if self._y_pre[i][0] == 'NUMBER' and self._y_pre[i - 1][0] == 'ANATOMY':
# print(y_pre[i][1])
if '各' in self._y_pre[i][1]:
# print(self._y_pre[i][1])
count_a = 0
for j in range(1, len(self._y_pre)):
if self._y_pre[j][0] == 'ANATOMY':
count_a += 1
# print(self._y_pre[i][1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip(
# '枚').strip('各'))
count = float(self._y_pre[i][1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip(
'枚').strip('各')) * count_a
return count
n = self._y_pre[i][1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip('枚').strip('各')
if '/' in n:
count += float(n.split('/')[1]) if (len(tools.exactNumber(
str(n.split('/')[1]))) != 0) and n.split('/')[1] != '' and ((re.match(
"^\d+?\.\d+?$", str(n.split('/')[1]))) or str(n.split('/')[1]).isdigit()) \
else float(0)
else:
count += float(n) if (len(tools.exactNumber(str(n)))) and (
re.match("^\d+?\.\d+?$", str(n)) or str(n).isdigit()) != 0 else float(0)
return count
def get_p_number(self):
count = 0
num_list = []
for item in self._y_pre_lymph:
if item[0] == "NUMBER":
num_list.append(item)
if item[0] == "PN":
if item[1] == '阳性(+)' or item[1] == '阳性(+)' or item[1] == '阳性' or item[1] == '(+)' or item[
1] == '(+)' or item[1] == '+':
# print(num_list)
if len(num_list) == 0:
pass
else:
# self.print_y_pred()
for p_item in num_list:
n_str = p_item[1].replace(' ', '').strip('').strip('只').strip('块').strip('组织').strip(
'枚').strip('各')
# print(n_str)
if '/' in n_str:
# print(n_str.split('/')[0])
# if len(tools.exactNumber(str(n_str.split('/')[0]))) != 0:
# print("*")
# if n_str.split('/')[0] != '':
# print("**")
# if (re.match("^\d+?\.\d+?$", str(n_str.split('/')[0]))) or str(n_str.split('/')[0]).isdigit():
# print("***")
count += float(n_str.split('/')[0]) if (len(tools.exactNumber(
str(n_str.split('/')[0]))) != 0) and n_str.split('/')[0] != '' and ((re.match(
"^\d+?\.\d+?$", str(n_str.split('/')[0]))) or str(n_str.split('/')[0]).isdigit()) \
else float(0)
else:
count += float(n_str) if (len(tools.exactNumber(str(n_str))) != 0) and (
(re.match("^\d+?\.\d+?$", str(n_str))) or str(n_str).isdigit()) else float(0)
elif item[1] == '阴性(-)' or item[1] == '阴性(-)' or item[1] == '阴性' or item[1] == '(-)' or item[
1] == '(-)' or item[1] == '-':
num_list = []
return count
def get_p_max(self):
p_list = []
p_list_tmp = []
size_list = []
size_list_tmp = []
# print('----------')
for item in self._y_pre_lymph:
if item[0] == "ANATOMY":
p_list_tmp.append(item)
if item[0] == "SIZE":
size_list_tmp.append(item)
if item[0] == "NUMBER" and '/' in item[1]:
if len(p_list_tmp) == 0:
pass
else:
for p_item in p_list_tmp:
p_list.append(p_item)
for size_item in size_list_tmp:
size_list.append(size_item[1])
p_list_tmp = []
size_list_tmp = []
if item[0] == "PN":
if item[1] == '阳性(+)' or item[1] == '阳性(+)' or item[1] == '阳性' or item[1] == '(+)' or item[
1] == '(+)' or item[1] == '+':
if len(p_list_tmp) == 0:
pass
else:
for p_item in p_list_tmp:
p_list.append(p_item)
for size_item in size_list_tmp:
size_list.append(size_item[1])
p_list_tmp = []
size_list_tmp = []
else:
p_list_tmp = []
size_list_tmp = []
# print(p_list)
# print(size_list)
if len(p_list) != 0:
contains_single_left = False
contains_single_right = False
for i in p_list:
if i[1] == '左':
contains_single_left = True
if i[1] == '右':
contains_single_right = True
if contains_single_left or contains_single_right == True:
p_list = self._handle_single(p_list)
# for i in p_list:
# print(i)
# print('-------------')
# self.print_y_pred_o()
p_anatomy = ''
for i in range(len(self._y_pre_o)):
if self._y_pre_o[i][0] == 'ANATOMY':
p_anatomy = self._y_pre_o[i][1].strip().strip('区')
if self._y_pre_o[i][0] == 'SIZE':
# print(y_pre_o[i][1])
if p_anatomy != '':
for j in range(0, len(p_list)):
# print(p_list[j][1].strip().strip('区'))
if p_list[j][1].strip().strip('区')[0] == '左' or p_list[j][1].strip().strip('区')[0] == '右':
if p_anatomy[0] != p_list[j][1].strip().strip('区')[0]:
p_anatomy = p_list[j][1].strip().strip('区')[0] + p_anatomy
# print(p_anatomy)
# print(p_list[j][1].strip().strip('区'))
# print(p_anatomy)
if p_list[j][1].strip().strip('区') == p_anatomy:
size_list.append(self._y_pre_o[i][1])
p_anatomy = ''
all_size = []
# print(size_list)
for i in size_list:
for j in tools.exactNumber(i):
all_size.append(j)
for i in range(len(all_size)):
if re.match("^\d+?\.\d+?$", all_size[i]) or all_size[i].isdigit():
all_size[i] = float(all_size[i])
# print(all_size)
if len(all_size) != 0:
return max(all_size)
return 0
# print(len(size_list))
def _handle_single(self, p_list):
rt_list = []
lr = ''
for i in p_list:
if i[1] == '左' or i[1] == '右':
lr = i[1]
else:
rt_list += [[i[0], lr + i[1], i[2], i[3]]]
return rt_list
def get_pN(self):
# if type(self.get_p_max())== int or type(self.get_p_max())== float:
# print(self.get_p_max())
return tools.pN(self.get_p_number(), float(self.get_p_max()), self.getENE())
def findSJ(self):
if '送检淋巴结' in self._Conclusion:
return '是'
else:
return '否'
def get_Info(self):
print("术后病理切缘:")
print(str(self.CuttingEdgePathology).strip())
print("其他术后病理切缘情况:")
print(str(self.CuttingEdgePathologyOther).strip())
print("黏膜上皮异常增生程度:")
print(str(self.Degree).strip())
print("分子结果:")
print(str(self.MolecularResultsContent).strip())
print("免疫组化:")
print(str(self.Immunohistochemistryisornot).strip())
print("免疫组化结果:")
print(str(self.ImmunohistochemistryContent).strip())
print("送检组织大小cm:")
print(str(self.max_size("组织")).strip())
print("肿块大小:")
print(str(self.max_size("肿块")).strip())
print("浸润深度(DOI)mm:")
print(str(self.get_DOI()).strip())
print("pT:")
print(str(self.get_pT()).strip())
print("分化程度")
print(str(self.get_differentiation()).strip())
print("神经侵犯:")
print(str(self.get_invasion('神经')).strip())
print("血管侵犯:")
print(str(self.get_invasion('血管')).strip())
print("淋巴结包膜外ENE(+):")
print(str(self.getENE()).strip())
print("送检淋巴结部位:")
print(str(self.getANATOMY().strip('、')).strip())
print("其他送检淋巴结部位:")
print(str(self.getANATOMY_o().strip('、')).strip())
print("组织学类型:")
print(str(self.get_histological_type()).strip())
print("其他组织学类型:")
print(str(self.get_other_type()).strip())
if self.findSJ() == '是':
print('送检淋巴结数目:')
print(str(self.get_number()).strip())
print("阳性淋巴结数量:")
print(str(self.get_p_number()).strip())
print("阳性淋巴结最大直径cm:")
print(str(self.get_p_max()).strip())
print("pN:")
print(str(self.get_pN()).strip())
def print_original_data(self):
print(self._Finding)
print(self._Conclusion)
def print_y_pred(self):
for i in self._y_pre:
print(i)
def print_y_pred_o(self):
for i in self._y_pre_o:
print(i)
def print_list_item(self, l):
for i in l:
print(i)
def get_json(self):
if self.verbose:
self.print_y_pred()
print("-----------------")
self.print_y_pred_o()
print("-----------------")
self.get_Info()
return {
"送检组织大小cm": str(self.max_size("组织")).strip(),
"肿块大小": str(self.max_size("肿块")).strip(),
"组织学类型": str(self.get_histological_type()).strip(),
"其他组织学类型": str(self.get_other_type()).strip(),
"分化程度": str(self.get_differentiation()).strip(),
"浸润深度(DOI)mm": str(self.get_DOI()).strip(),
"pT": str(self.get_pT()).strip(),
"神经侵犯": str(self.get_invasion('神经')).strip(),
"血管侵犯": str(self.get_invasion('血管')).strip(),
"术后病理切缘": str(self.CuttingEdgePathology).strip(),
"其他术后病理切缘情况": str(self.CuttingEdgePathologyOther).strip(),
"黏膜上皮异常增生程度": str(self.Degree).strip(),
"免疫组化": str(self.Immunohistochemistryisornot).strip(),
"免疫组化结果": str(self.ImmunohistochemistryContent).strip(),
"分子结果": str(self.MolecularResultsContent).strip(),
"是否送检淋巴结": str(self.findSJ()).strip(),
"送检淋巴结部位": str(self.getANATOMY()).strip() if str(self.findSJ()).strip() == '是' else '',
"其他送检淋巴结部位": str(self.getANATOMY_o().strip('、')).strip() if str(self.findSJ()).strip() == '是' else '',
"送检淋巴结数目": str(self.get_number()).strip() if str(self.findSJ()).strip() == '是' else '',
"阳性淋巴结数目": str(self.get_p_number()).strip() if str(self.findSJ()).strip() == '是' else '',
"阳性淋巴结最大直径cm": str(self.get_p_max()).strip() if str(self.findSJ()).strip() == '是' else '',
"淋巴结包膜外ENE(+)": str(self.getENE()).strip() if str(self.findSJ()).strip() == '是' else '',
"pN": str(self.get_pN()).strip() if str(self.findSJ()).strip() == '是' else '',
}
##
if __name__ == '__main__':
Finding = """
" 左颈大块:6*5*4cm,一侧见一腺体3*3*2cm,灰黄分叶,余为脂肪血管。
左I区: 3只直径0.2-1.2cm。
左II区: 3只直径0.5-1.2cm。
左III区: 3只直径0.5-1cm。
左IV区: 3只直径0.5-0.8cm。
右I区:3只直径1-2cm。
右II区: 3只直径1cm。
右III区: 3只直径0.5-0.8cm。
右IV区: 3只直径0.5-0.6cm。
右颈淋巴:7*7*6cm,內见一腺体3*3*2cm,灰黄分叶,余为脂肪血管。"
"""
Conclusion = """
"“左颌下腺”慢性涎腺炎
“右颌下腺”慢性涎腺炎
送检淋巴结:“左”“I区”1/3只、“II区”1/3只、“III区”1/3只(肿瘤位于软组织内)及“右”“I区”2/3只(其中1只肿瘤侵犯至包膜外)、“II区”2/3只(肿瘤侵犯至包膜外)有肿瘤转移(+),余及“左IV区”3只、“右”“III区”3只、“IV区”3只均阴性(-)"
"""
oral = Oral(Finding, Conclusion)
oral.print_y_pred()
print("-----------------")
oral.print_y_pred_o()
print("-----------------")
oral.get_Info()
print(oral.get_json())
File added
M
h
,
>
Z
L
H
6
p
i
G
3
V
F
.
(
W
<
R
O
T
UNK
"
b
t
;
A
:
1
P
S
]
线
K
[
/
I
U
m
a
B
C
%
8
D
-
l
4
E
N
c
Y
)
n
×
*
0
r
2
绿
5
?
9
7
w
+
\ No newline at end of file
import csv
import json
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from BiLSTMCRF import BiLSTMCRF
model_path = 'model/model.h5'
vocab_path = 'model/vocab.txt'
class_dict = {
"O": 0,
"B-NUMBER": 1,
"I-NUMBER": 2,
"B-SIZE": 3,
"I-SIZE": 4,
"B-ENE": 5,
"I-ENE": 6,
"B-ANATOMY": 7,
"I-ANATOMY": 8,
"B-SQUAMOUS": 9,
"I-SQUAMOUS": 10,
"B-INVASION": 11,
"I-INVASION": 12,
"B-PN": 13,
"I-PN": 14,
"B-LEVEL": 15,
"I-LEVEL": 16,
"B-OTHER": 17,
"I-OTHER": 18,
"B-DOI": 19,
"I-DOI": 20
}
maxLen = 500
classSum = 21
def build_input(text):
x = []
for char in text:
if char not in word_dict:
char = 'UNK'
x.append(word_dict.get(char))
x = pad_sequences([x], padding = 'post', maxlen = maxLen)
return x
def load_worddict():
vocabs = [line.strip()
for line in open(vocab_path, encoding = 'utf-8')]
word_dict = {wd: index for index, wd in enumerate(vocabs)}
return word_dict
def predict(text):
y_pre = []
str = build_input(text)
raw = model.predict(str)[0]
chars = [i for i in text]
tags = [label_dict[i] for i in raw][:len(text)]
res = list(zip(chars, tags))
for i, tag in enumerate(tags):
y_pre.append(tag)
return res, y_pre
def output(txt, cnt):
output = []
flag = 0
start = []
end = []
tags = []
for i, tag in enumerate(cnt):
if tag == 'O':
if flag == 1:
end = i-1
output.append([tags, txt[start:end+1], start, end])
flag = 0
continue
if tag.split("-")[0] == 'B':
if flag == 1:
end = i
output.append([tags, txt[start:end], start, end-1])
flag = 1
start = i
tags = tag.split("-")[1]
continue
return output
word_dict = load_worddict()
vocabSize = len(word_dict) + 1
label_dict = {j: i for i, j in class_dict.items()}
model = BiLSTMCRF(vocabSize = vocabSize, maxLen = maxLen,
tagIndexDict = class_dict, tagSum = classSum)
model.load_weights(model_path)
if __name__ == '__main__':
s = """
“右舌”鳞状细胞癌(复发),高-中分化,灶性多核巨细胞浸润,肿瘤侵犯神经。送检淋巴结:“左颌下”1只、“颏下”1只均阴性(-)
"""
a = predict(s)
for i in a[0]:
print(i)
b = output(s, a[1])
print(b)
absl-py==0.14.0
appnope==0.1.2
argcomplete==1.12.3
argon2-cffi==21.1.0
astunparse==1.6.3
attrs==21.2.0
backcall==0.2.0
bleach==4.1.0
cachetools==4.2.2
certifi==2021.10.8
cffi==1.14.6
charset-normalizer==2.0.6
click==8.0.3
cn2an==0.5.11
debugpy==1.4.3
decorator==5.1.0
defusedxml==0.7.1
entrypoints==0.3
Flask==2.0.2
Flask-Login==0.5.0
flatbuffers==1.12
gast==0.3.3
google-auth==1.35.0
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
grpcio==1.32.0
h5py==2.10.0
idna==3.2
importlib-metadata==4.8.1
ipykernel==6.4.1
ipython==7.28.0
ipython-genutils==0.2.0
ipywidgets==7.6.5
itsdangerous==2.0.1
jedi==0.18.0
Jinja2==1.2
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==7.0.3
jupyter-console==6.4.0
jupyter-core==4.8.1
jupyterlab-pygments==0.1.2
jupyterlab-widgets==1.0.2
Keras-Preprocessing==1.1.2
Markdown==3.3.4
MarkupSafe==2.0.1
matplotlib-inline==0.1.3
mistune==0.8.4
nbclient==0.5.4
nbconvert==6.2.0
nbformat==5.1.3
nest-asyncio==1.5.1
notebook==6.4.4
numpy==1.19.5
oauthlib==3.1.1
opt-einsum==3.3.0
packaging==21.0
pandas==1.3.3
pandocfilters==1.5.0
parso==0.8.2
pexpect==4.8.0
pickleshare==0.7.5
prometheus-client==0.11.0
prompt-toolkit==3.0.20
protobuf==3.18.0
ptyprocess==0.7.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.20
Pygments==2.10.0
pyparsing==2.4.7
pyrsistent==0.18.0
python-dateutil==2.8.2
pytz==2021.1
PyYAML==5.4.1
pyzmq==22.3.0
qtconsole==5.1.1
QtPy==1.11.2
requests==2.26.0
requests-oauthlib==1.3.0
rsa==4.7.2
Send2Trash==1.8.0
six==1.15.0
tensorboard==2.6.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow==2.4.0
tensorflow-addons==0.14.0
tensorflow-estimator==2.4.0
termcolor==1.1.0
terminado==0.12.1
testpath==0.5.0
tornado==6.1
traitlets==5.1.0
typeguard==2.12.1
typing-extensions==3.7.4.3
urllib3==1.26.7
wcwidth==0.2.5
webencodings==0.5.1
Werkzeug==2.0.2
widgetsnbextension==3.5.1
wrapt==1.12.1
zipp==3.5.0
\ No newline at end of file
# coding:utf-8
def splittxt(ImagingConclusion):
ImagingConclusion = ImagingConclusion + "\n"
ImagingConclusion = ImagingConclusion.replace("\nAE1/AE3", " AE1/AE3")
ImagingConclusion = ImagingConclusion.replace("\nEGFR", " EGFR")
ImagingConclusion = ImagingConclusion.replace("\nCK", " CK")
ImagingConclusion = ImagingConclusion.replace(":\n", ": ")
txt = []
text = []
ImagingConclusionFrist = ""
CuttingEdge = ""
CuttingLymph = ""
MolecularResults = ""
Immunohistochemistry = ""
CuttingEdgeID = -1
CuttingLymphID = -1
MolecularResultsID = -1
ImmunohistochemistryID = -1
delete = []
for i, char in enumerate(ImagingConclusion):
if char != '\n':
txt.append(char)
else:
if txt != []:
str = ''.join(txt)
text.append(str)
txt = []
for i, block in enumerate(text):
if block.find("送检切缘") != -1:
CuttingEdgeID = i
CuttingEdge = CuttingEdge + block + "\n"
delete.append(CuttingEdgeID)
elif block.find("送检淋巴结") != -1:
CuttingLymphID = i
CuttingLymph = CuttingLymph + block + "\n"
delete.append(CuttingLymphID)
elif block.find("分子结果") != -1:
MolecularResultsID = i
MolecularResults = MolecularResults + block + "\n"
delete.append(MolecularResultsID)
elif block.find("免疫组化结果") != -1:
ImmunohistochemistryID = i
Immunohistochemistry = Immunohistochemistry + block + "\n"
delete.append(ImmunohistochemistryID)
j = 0
for i in range(len(text)):
if i in delete:
text.pop(j)
else:
ImagingConclusionFrist = ImagingConclusionFrist + text[j] + "\n"
j = j + 1
MolecularResults.replace(":", "")
Immunohistochemistry.replace(":", "")
MolecularResults = MolecularResults[MolecularResults.find("分子结果") + 4:]
Immunohistochemistry = Immunohistochemistry[Immunohistochemistry.find("免疫组化结果") + 6:]
return ImagingConclusionFrist, CuttingEdge, CuttingLymph, MolecularResults, Immunohistochemistry
# print(splittxt(
# "原发灶:一带黏膜组织6*5*3cm,切面见一肿块3*2*2cm,灰白,界不清(1)\n送检切缘:前、后、内、外、底均0.5cm\n左颈大块:6*4*2cm,为脂肪血管及少量腺体,灰黄。\n左I区: 7只直径0.8-1.2cm。\n左II区: 1只直径1cm。\n左III区: 1只直径1.5cm。\n\n“左舌”黏膜鳞状细胞癌,高-中分化,DOI>10mm\n送检切缘:“前、后、内、外、底”均阴性(-)\n“左颌下腺”轻度慢性炎\n送检淋巴结:“左”“I”1/7只有肿瘤转移(+),余及“II”1只(为软组织),“III”1只(为软组织)均阴性(-)\n免疫组化结果NI21-668\nAE1/AE3+ CKH+ CK5/6+ EGFR部分+ Ki67部分+ CD31- S-100- P16-\n北院分子结果(NM2021-0302):EGFR扩增探针 FISH(未见明显扩增(-))\n"))
if __name__ == '__main__':
a = splittxt("""
“右上颌”黏膜鳞状细胞癌,高-中分化,DOI>10mm
“右颌下腺”慢性炎
送检淋巴结:“右I区”1/5只(其中1只为软组织)有肿瘤转移(+),余及“右II区”6只、“右III区”6只、“右IV区”1只(为软组织)、“右V区”10只均阴性(-)
南院分子结果(M2021-1469):EGFR扩增探针 FISH(-)
南院免疫组化结果(I2021-3111):CKH(+),CK5/6(+),P16(-),Ki67(热点区约30-40%+),CD31(-),S100(-),EGFR(+),P53(-)。
""")
for i in a:
print(i.strip())
print('---------------------------')
import decimal
def pN(num, d, ENE):
cnt = ""
if num == 0:
cnt = "pN0"
elif num == 1 and d <= 3 and ENE == '无':
cnt = "pN1"
else:
cnt = "pN2+"
return cnt
def differentiation(txt):
cnt = [0, 0, 0]
ans = ""
if txt.find("高") != -1:
cnt[0] = 1
ans = ans + "Ⅰ级高分化\n"
if txt.find("中") != -1:
cnt[1] = 1
ans = ans + "Ⅱ级中分化\n"
if txt.find("低") != -1:
cnt[2] = 1
ans = ans + "Ⅲ级低分化\n"
if cnt == [0, 0, 0]:
ans = ans + "Ⅳ级未分化\n"
return cnt, ans
def exactNumber(txt):
cnt = []
number = ""
for i, char in enumerate(txt):
if char in "0123456789./":
number = number + char
else:
cnt.append(number)
number = ""
cnt.append(number)
cnt = [i for i in cnt if i != '']
return cnt
def pT(txt):
txt.replace(" ", "")
cnt = ''
if txt.find(">10mm") != -1:
cnt = "pt3"
return cnt
elif txt.find(">5mm") != -1:
cnt = "pt2"
return cnt
score = max([decimal.Decimal(i) for i in exactNumber(txt)])
if score <= 5:
cnt = "pT1"
elif score > 5 and score <= 10:
cnt = "pT2"
elif score > 10:
cnt = "pT3"
return cnt
def findDegree(txt):
cnt = [0, 0, 0]
ans = ""
if txt.find("轻") != -1:
cnt[0] = 1
ans = ans + "轻度\n"
if txt.find("中") != -1:
cnt[1] = 1
ans = ans + "中度\n"
if txt.find("重") != -1:
cnt[2] = 1
ans = ans + "重度\n"
if cnt == [0, 0, 0]:
ans = ans + ""
return ans
def findlymph(txt):
if txt.find("淋巴结") != -1:
return 1
else:
return 0
def CuttingEdgePathology(txt):
cnt = ""
if txt.find("阳性") != -1 or txt.find("+") != -1:
cnt = "阳性(+)"
elif txt.find("异常增生") != -1:
cnt = "有黏膜上皮异常增生"
elif txt.find("阴性") != -1 or txt.find("-") != -1:
cnt = "阴性(-)"
else:
cnt = "其他情况"
return cnt
def FindChar(txt):
cnt = []
ans = []
charlist = ["分子结果", "免疫组化结果", "(", "(", ":", ":"]
for i, char in enumerate(charlist):
cnt.append(txt.find(char))
for i, flag in enumerate(cnt):
if flag != -1:
ans.append(flag)
ans.append(-1)
return ans
if __name__ == '__main__':
print(exactNumber('mm'))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment