Commit 5dd18bcf authored by 陶书衡's avatar 陶书衡

init

parents
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.7 (tf-latest-base)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
This diff is collapsed.
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7 (tf-latest-base)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/OralAPI.iml" filepath="$PROJECT_DIR$/.idea/OralAPI.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>
\ No newline at end of file
import numpy as np
import tensorflow as tf
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.python import keras
from CRF import CRF
# from CRF import CRF
class BiLSTMCRF:
def __init__(self, vocabSize, maxLen, tagIndexDict, tagSum, sequenceLengths=None, vecSize=100, learning_rate=0.01):
keras.backend.clear_session()
self.vocabSize = vocabSize
self.vecSize = vecSize
self.maxLen = maxLen
self.tagSum = tagSum
self.sequenceLengths = sequenceLengths
self.tagIndexDict = tagIndexDict
self.learning_rate = learning_rate
self.buildBiLSTMCRF()
def getTransParam(self, y, tagIndexDict):
self.trainY = np.argmax(y, axis=-1)
yList = self.trainY.tolist()
transParam = np.zeros(
[len(list(tagIndexDict.keys())), len(list(tagIndexDict.keys()))])
for rowI in range(len(yList)):
for colI in range(len(yList[rowI])-1):
transParam[yList[rowI][colI]][yList[rowI][colI+1]] += 1
for rowI in range(transParam.shape[0]):
transParam[rowI] = transParam[rowI]/np.sum(transParam[rowI])
return transParam
def buildBiLSTMCRF(self):
model = Sequential()
model.add(tf.keras.layers.Input(shape=(self.maxLen,)))
model.add(tf.keras.layers.Embedding(self.vocabSize, self.vecSize))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
self.tagSum, return_sequences=True, activation="tanh"), merge_mode='sum'))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
self.tagSum, return_sequences=True, activation="softmax"), merge_mode='sum'))
crf = CRF(self.tagSum, name='crf_layer')
model.add(crf)
model.compile(Adam(learning_rate=self.learning_rate), loss={
'crf_layer': crf.get_loss}, metrics=[crf.get_accuracy])
self.net = model
def fit(self, X, y, epochs=100, batchsize=32):
if len(y.shape) == 3:
y = np.argmax(y, axis=-1)
if self.sequenceLengths is None:
self.sequenceLengths = [row.shape[0] for row in y]
callbacks_list = [
tf.keras.callbacks.History(),
tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', factor=0.5, patience=5,
verbose=1, mode='auto', min_lr=1e-9),
tf.keras.callbacks.ModelCheckpoint("model/model.h5", monitor='get_accuracy',
verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1),
tf.keras.callbacks.EarlyStopping(
monitor='loss', min_delta=1e-5, patience=10),
TensorBoard(log_dir="logs", histogram_freq=1)
# WeightsSaver(1)
]
history = self.net.fit(
X, y, epochs=epochs, callbacks=callbacks_list, batch_size=batchsize)
return history
def predict(self, X):
preYArr = self.net.predict(X)
return preYArr
def load_weights(self, model_path):
self.net.load_weights(model_path)
This diff is collapsed.
# 肺结节CT影像报告实体提取
import json
import copy
import requests
import uuid
from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
from model import Oral
oral_api = Blueprint('oral', __name__)
@oral_api.route('/')
def show():
return 'This is oral api.'
@oral_api.route('/recg/', methods = ['POST'])
def recognize():
if request.method == 'POST':
finding = request.form.get('finding')
conclusion = request.form.get('conclusion')
verbose = request.form.get('verbose', default = 0)
try:
verbose = int(verbose)
except Exception as e:
return jsonify({'success': False, 'description': {'error msg': 'verbose can be only 0 or 1'}}), 500
if verbose != 0:
print()
print(finding)
print(conclusion)
if finding is None or conclusion is None:
return jsonify({'success': False, 'description': {'error msg': 'invalid post body fields'}}), 500
elif finding == '' or conclusion == '':
return jsonify(
{'success': False, 'description': {'error msg': 'findings or conclusions cannot be empty'}}), 500
else:
try:
print('' if verbose == 0 else 'verbose out:')
oral = Oral(finding, conclusion, verbose = False if verbose == 0 else 1)
data = oral.get_json()
return jsonify({'success': True, 'description': {'data': data}}), 200
except Exception as e:
print("/n******ERROR SRART******/n")
print(e)
print("----------findind----------")
print(finding)
print("---------conclusion--------")
print(conclusion)
print("/n*******ERROR END*******/n")
return jsonify({'success': False, 'description': {'error msg': e}}), 500
else:
return jsonify({'success': False, 'description': {'error msg': 'Invalid methods'}}), 404
vocabSize:497
maxLen:177
classSum:21
from flask import Flask, request, redirect, url_for, render_template, flash, jsonify, Blueprint
from api import oral_api
app = Flask(__name__)
app.secret_key = '1234567'
app.register_blueprint(oral_api, url_prefix = '/oral')
if __name__ == '__main__':
# from werkzeug.contrib.fixers import ProxyFix
# app.wsgi_app = ProxyFix(app.wsgi_app)
app.run(debug = True, port = 5004, host = '0.0.0.0')
This diff is collapsed.
File added
M
h
,
>
Z
L
H
6
p
i
G
3
V
F
.
(
W
<
R
O
T
UNK
"
b
t
;
A
:
1
P
S
]
线
K
[
/
I
U
m
a
B
C
%
8
D
-
l
4
E
N
c
Y
)
n
×
*
0
r
2
绿
5
?
9
7
w
+
\ No newline at end of file
import csv
import json
import os
from tensorflow.keras.preprocessing.sequence import pad_sequences
from BiLSTMCRF import BiLSTMCRF
model_path = 'model/model.h5'
vocab_path = 'model/vocab.txt'
class_dict = {
"O": 0,
"B-NUMBER": 1,
"I-NUMBER": 2,
"B-SIZE": 3,
"I-SIZE": 4,
"B-ENE": 5,
"I-ENE": 6,
"B-ANATOMY": 7,
"I-ANATOMY": 8,
"B-SQUAMOUS": 9,
"I-SQUAMOUS": 10,
"B-INVASION": 11,
"I-INVASION": 12,
"B-PN": 13,
"I-PN": 14,
"B-LEVEL": 15,
"I-LEVEL": 16,
"B-OTHER": 17,
"I-OTHER": 18,
"B-DOI": 19,
"I-DOI": 20
}
maxLen = 500
classSum = 21
def build_input(text):
x = []
for char in text:
if char not in word_dict:
char = 'UNK'
x.append(word_dict.get(char))
x = pad_sequences([x], padding = 'post', maxlen = maxLen)
return x
def load_worddict():
vocabs = [line.strip()
for line in open(vocab_path, encoding = 'utf-8')]
word_dict = {wd: index for index, wd in enumerate(vocabs)}
return word_dict
def predict(text):
y_pre = []
str = build_input(text)
raw = model.predict(str)[0]
chars = [i for i in text]
tags = [label_dict[i] for i in raw][:len(text)]
res = list(zip(chars, tags))
for i, tag in enumerate(tags):
y_pre.append(tag)
return res, y_pre
def output(txt, cnt):
output = []
flag = 0
start = []
end = []
tags = []
for i, tag in enumerate(cnt):
if tag == 'O':
if flag == 1:
end = i-1
output.append([tags, txt[start:end+1], start, end])
flag = 0
continue
if tag.split("-")[0] == 'B':
if flag == 1:
end = i
output.append([tags, txt[start:end], start, end-1])
flag = 1
start = i
tags = tag.split("-")[1]
continue
return output
word_dict = load_worddict()
vocabSize = len(word_dict) + 1
label_dict = {j: i for i, j in class_dict.items()}
model = BiLSTMCRF(vocabSize = vocabSize, maxLen = maxLen,
tagIndexDict = class_dict, tagSum = classSum)
model.load_weights(model_path)
if __name__ == '__main__':
s = """
“右舌”鳞状细胞癌(复发),高-中分化,灶性多核巨细胞浸润,肿瘤侵犯神经。送检淋巴结:“左颌下”1只、“颏下”1只均阴性(-)
"""
a = predict(s)
for i in a[0]:
print(i)
b = output(s, a[1])
print(b)
absl-py==0.14.0
appnope==0.1.2
argcomplete==1.12.3
argon2-cffi==21.1.0
astunparse==1.6.3
attrs==21.2.0
backcall==0.2.0
bleach==4.1.0
cachetools==4.2.2
certifi==2021.10.8
cffi==1.14.6
charset-normalizer==2.0.6
click==8.0.3
cn2an==0.5.11
debugpy==1.4.3
decorator==5.1.0
defusedxml==0.7.1
entrypoints==0.3
Flask==2.0.2
Flask-Login==0.5.0
flatbuffers==1.12
gast==0.3.3
google-auth==1.35.0
google-auth-oauthlib==0.4.6
google-pasta==0.2.0
grpcio==1.32.0
h5py==2.10.0
idna==3.2
importlib-metadata==4.8.1
ipykernel==6.4.1
ipython==7.28.0
ipython-genutils==0.2.0
ipywidgets==7.6.5
itsdangerous==2.0.1
jedi==0.18.0
Jinja2==1.2
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==7.0.3
jupyter-console==6.4.0
jupyter-core==4.8.1
jupyterlab-pygments==0.1.2
jupyterlab-widgets==1.0.2
Keras-Preprocessing==1.1.2
Markdown==3.3.4
MarkupSafe==2.0.1
matplotlib-inline==0.1.3
mistune==0.8.4
nbclient==0.5.4
nbconvert==6.2.0
nbformat==5.1.3
nest-asyncio==1.5.1
notebook==6.4.4
numpy==1.19.5
oauthlib==3.1.1
opt-einsum==3.3.0
packaging==21.0
pandas==1.3.3
pandocfilters==1.5.0
parso==0.8.2
pexpect==4.8.0
pickleshare==0.7.5
prometheus-client==0.11.0
prompt-toolkit==3.0.20
protobuf==3.18.0
ptyprocess==0.7.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycparser==2.20
Pygments==2.10.0
pyparsing==2.4.7
pyrsistent==0.18.0
python-dateutil==2.8.2
pytz==2021.1
PyYAML==5.4.1
pyzmq==22.3.0
qtconsole==5.1.1
QtPy==1.11.2
requests==2.26.0
requests-oauthlib==1.3.0
rsa==4.7.2
Send2Trash==1.8.0
six==1.15.0
tensorboard==2.6.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.0
tensorflow==2.4.0
tensorflow-addons==0.14.0
tensorflow-estimator==2.4.0
termcolor==1.1.0
terminado==0.12.1
testpath==0.5.0
tornado==6.1
traitlets==5.1.0
typeguard==2.12.1
typing-extensions==3.7.4.3
urllib3==1.26.7
wcwidth==0.2.5
webencodings==0.5.1
Werkzeug==2.0.2
widgetsnbextension==3.5.1
wrapt==1.12.1
zipp==3.5.0
\ No newline at end of file
# coding:utf-8
def splittxt(ImagingConclusion):
ImagingConclusion = ImagingConclusion + "\n"
ImagingConclusion = ImagingConclusion.replace("\nAE1/AE3", " AE1/AE3")
ImagingConclusion = ImagingConclusion.replace("\nEGFR", " EGFR")
ImagingConclusion = ImagingConclusion.replace("\nCK", " CK")
ImagingConclusion = ImagingConclusion.replace(":\n", ": ")
txt = []
text = []
ImagingConclusionFrist = ""
CuttingEdge = ""
CuttingLymph = ""
MolecularResults = ""
Immunohistochemistry = ""
CuttingEdgeID = -1
CuttingLymphID = -1
MolecularResultsID = -1
ImmunohistochemistryID = -1
delete = []
for i, char in enumerate(ImagingConclusion):
if char != '\n':
txt.append(char)
else:
if txt != []:
str = ''.join(txt)
text.append(str)
txt = []
for i, block in enumerate(text):
if block.find("送检切缘") != -1:
CuttingEdgeID = i
CuttingEdge = CuttingEdge + block + "\n"
delete.append(CuttingEdgeID)
elif block.find("送检淋巴结") != -1:
CuttingLymphID = i
CuttingLymph = CuttingLymph + block + "\n"
delete.append(CuttingLymphID)
elif block.find("分子结果") != -1:
MolecularResultsID = i
MolecularResults = MolecularResults + block + "\n"
delete.append(MolecularResultsID)
elif block.find("免疫组化结果") != -1:
ImmunohistochemistryID = i
Immunohistochemistry = Immunohistochemistry + block + "\n"
delete.append(ImmunohistochemistryID)
j = 0
for i in range(len(text)):
if i in delete:
text.pop(j)
else:
ImagingConclusionFrist = ImagingConclusionFrist + text[j] + "\n"
j = j + 1
MolecularResults.replace(":", "")
Immunohistochemistry.replace(":", "")
MolecularResults = MolecularResults[MolecularResults.find("分子结果") + 4:]
Immunohistochemistry = Immunohistochemistry[Immunohistochemistry.find("免疫组化结果") + 6:]
return ImagingConclusionFrist, CuttingEdge, CuttingLymph, MolecularResults, Immunohistochemistry
# print(splittxt(
# "原发灶:一带黏膜组织6*5*3cm,切面见一肿块3*2*2cm,灰白,界不清(1)\n送检切缘:前、后、内、外、底均0.5cm\n左颈大块:6*4*2cm,为脂肪血管及少量腺体,灰黄。\n左I区: 7只直径0.8-1.2cm。\n左II区: 1只直径1cm。\n左III区: 1只直径1.5cm。\n\n“左舌”黏膜鳞状细胞癌,高-中分化,DOI>10mm\n送检切缘:“前、后、内、外、底”均阴性(-)\n“左颌下腺”轻度慢性炎\n送检淋巴结:“左”“I”1/7只有肿瘤转移(+),余及“II”1只(为软组织),“III”1只(为软组织)均阴性(-)\n免疫组化结果NI21-668\nAE1/AE3+ CKH+ CK5/6+ EGFR部分+ Ki67部分+ CD31- S-100- P16-\n北院分子结果(NM2021-0302):EGFR扩增探针 FISH(未见明显扩增(-))\n"))
if __name__ == '__main__':
a = splittxt("""
“右上颌”黏膜鳞状细胞癌,高-中分化,DOI>10mm
“右颌下腺”慢性炎
送检淋巴结:“右I区”1/5只(其中1只为软组织)有肿瘤转移(+),余及“右II区”6只、“右III区”6只、“右IV区”1只(为软组织)、“右V区”10只均阴性(-)
南院分子结果(M2021-1469):EGFR扩增探针 FISH(-)
南院免疫组化结果(I2021-3111):CKH(+),CK5/6(+),P16(-),Ki67(热点区约30-40%+),CD31(-),S100(-),EGFR(+),P53(-)。
""")
for i in a:
print(i.strip())
print('---------------------------')
import decimal
def pN(num, d, ENE):
cnt = ""
if num == 0:
cnt = "pN0"
elif num == 1 and d <= 3 and ENE == '无':
cnt = "pN1"
else:
cnt = "pN2+"
return cnt
def differentiation(txt):
cnt = [0, 0, 0]
ans = ""
if txt.find("高") != -1:
cnt[0] = 1
ans = ans + "Ⅰ级高分化\n"
if txt.find("中") != -1:
cnt[1] = 1
ans = ans + "Ⅱ级中分化\n"
if txt.find("低") != -1:
cnt[2] = 1
ans = ans + "Ⅲ级低分化\n"
if cnt == [0, 0, 0]:
ans = ans + "Ⅳ级未分化\n"
return cnt, ans
def exactNumber(txt):
cnt = []
number = ""
for i, char in enumerate(txt):
if char in "0123456789./":
number = number + char
else:
cnt.append(number)
number = ""
cnt.append(number)
cnt = [i for i in cnt if i != '']
return cnt
def pT(txt):
txt.replace(" ", "")
cnt = ''
if txt.find(">10mm") != -1:
cnt = "pt3"
return cnt
elif txt.find(">5mm") != -1:
cnt = "pt2"
return cnt
score = max([decimal.Decimal(i) for i in exactNumber(txt)])
if score <= 5:
cnt = "pT1"
elif score > 5 and score <= 10:
cnt = "pT2"
elif score > 10:
cnt = "pT3"
return cnt
def findDegree(txt):
cnt = [0, 0, 0]
ans = ""
if txt.find("轻") != -1:
cnt[0] = 1
ans = ans + "轻度\n"
if txt.find("中") != -1:
cnt[1] = 1
ans = ans + "中度\n"
if txt.find("重") != -1:
cnt[2] = 1
ans = ans + "重度\n"
if cnt == [0, 0, 0]:
ans = ans + ""
return ans
def findlymph(txt):
if txt.find("淋巴结") != -1:
return 1
else:
return 0
def CuttingEdgePathology(txt):
cnt = ""
if txt.find("阳性") != -1 or txt.find("+") != -1:
cnt = "阳性(+)"
elif txt.find("异常增生") != -1:
cnt = "有黏膜上皮异常增生"
elif txt.find("阴性") != -1 or txt.find("-") != -1:
cnt = "阴性(-)"
else:
cnt = "其他情况"
return cnt
def FindChar(txt):
cnt = []
ans = []
charlist = ["分子结果", "免疫组化结果", "(", "(", ":", ":"]
for i, char in enumerate(charlist):
cnt.append(txt.find(char))
for i, flag in enumerate(cnt):
if flag != -1:
ans.append(flag)
ans.append(-1)
return ans
if __name__ == '__main__':
print(exactNumber('mm'))
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment