import json import os import sys import traceback from multiprocessing import Pool, cpu_count import SimpleITK as sitk from pydicom import dicomio import lmdb import numpy as np from datetime import datetime from resample import ItkResample BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # 添加路径 sys.path.append(BASE_DIR) class MyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, datetime): return obj.__str__() else: return super(MyEncoder, self).default(obj) def load_ct_from_dicom(dcm_path, sort_by_distance=True): class DcmInfo(object): def __init__(self, dcm_path, series_instance_uid, acquisition_number, sop_instance_uid, instance_number, image_orientation_patient, image_position_patient): super(DcmInfo, self).__init__() self.dcm_path = dcm_path self.series_instance_uid = series_instance_uid self.acquisition_number = acquisition_number self.sop_instance_uid = sop_instance_uid self.instance_number = instance_number self.image_orientation_patient = image_orientation_patient self.image_position_patient = image_position_patient self.slice_distance = self._cal_distance() def _cal_distance(self): normal = [self.image_orientation_patient[1] * self.image_orientation_patient[5] - self.image_orientation_patient[2] * self.image_orientation_patient[4], self.image_orientation_patient[2] * self.image_orientation_patient[3] - self.image_orientation_patient[0] * self.image_orientation_patient[5], self.image_orientation_patient[0] * self.image_orientation_patient[4] - self.image_orientation_patient[1] * self.image_orientation_patient[3]] distance = 0 for i in range(3): distance += normal[i] * self.image_position_patient[i] return distance def is_sop_instance_uid_exist(dcm_info, dcm_infos): for item in dcm_infos: if dcm_info.sop_instance_uid == item.sop_instance_uid: return True return False def get_dcm_path(dcm_info): return dcm_info.dcm_path reader = sitk.ImageSeriesReader() if sort_by_distance: dcm_infos = [] files = os.listdir(dcm_path) for file in files: file_path = os.path.join(dcm_path, file) dcm = dicomio.read_file(file_path, force=True) _series_instance_uid = dcm.SeriesInstanceUID _sop_instance_uid = dcm.SOPInstanceUID _instance_number = dcm.InstanceNumber _acquisition_number = dcm.AcquisitionNumber _image_orientation_patient = dcm.ImageOrientationPatient _image_position_patient = dcm.ImagePositionPatient dcm_info = DcmInfo(file_path, _series_instance_uid, _acquisition_number, _sop_instance_uid, _instance_number, _image_orientation_patient, _image_position_patient) if is_sop_instance_uid_exist(dcm_info, dcm_infos): continue dcm_infos.append(dcm_info) dcm_infos.sort(key=lambda x: x.slice_distance) dcm_series = list(map(get_dcm_path, dcm_infos)) else: dcm_series = reader.GetGDCMSeriesFileNames(dcm_path) reader.SetFileNames(dcm_series) sitk_image = reader.Execute() return sitk_image class Trans_Dicom_2_NPY(object): def __init__(self, in_path, out_path, crop_size, diameter_th, given_label_id, datainfos): ''' :param in_path: path of dicom :param out_path: path of nii.gz ''' self.in_path = in_path self.out_path = out_path self.crop_size = 48 self.diameter_th = diameter_th self.given_label_id = given_label_id self.datainfos = datainfos self.suid_list = [] for info in datainfos: self.suid_list.append(info[-2][0].split('/')[-2]) # print('self.suid_list', self.suid_list) # print(self.suid_list) # self.target_spacing = 1 # self.itk_resample = ItkResample() if not os.path.exists(self.out_path): os.makedirs(self.out_path) def __call__(self): uids = os.listdir(self.in_path) # pool = Pool(int(cpu_count() * 0.7)) for uid in uids: if not os.path.exists(os.path.join(self.out_path, uid + '_data.npy')) or not os.path.exists(os.path.join(self.out_path, uid + '_info.npy')): # working_otuput = pool.apply_async(self._single_transform, (uid, )) self._single_transform(uid) # whole_data = working_otuput['data'] # whole_data = np.concatenate(whole_data, axis=0) # whole_info = working_otuput['info'] # whole_info = np.concatenate(whole_info, axis=0) # np.save(os.path.join(self.out_path, uid + '_data.npy'), whole_data) # np.save(os.path.join(self.out_path, uid + '_info.npy'), whole_info) # pool.close() # pool.join() def _single_transform(self, uid): # dcm -> npy x_min_ratio = 0.1491 x_max_ratio = 0.8442 y_min_ratio = 0.2685 y_max_ratio = 0.7606 z_min_ratio = 0.1330 z_max_ratio = 0.9143 print('Processing series uid %s' % uid) dcm_folder = os.path.join(self.in_path, uid) save_dir = os.path.join(self.out_path, uid) print(f'save_dir: {save_dir}') os.makedirs(save_dir, exist_ok=True) # get itkimage try: itk_image = load_ct_from_dicom(dcm_folder) except Exception as err: print('!!!!! Read %s throws exception %s.' % (uid, err)) return # resample spacing # print('Resample uid %s to spacing %.1f.' % (uid, self.target_spacing)) # itk_image_resample = self.itk_resample.resample_to_spacing( # itk_image=itk_image, # target_spacing=(tuple([self.target_spacing] * 3)), # interpolator=sitk.sitkBSpline) # itk_image_resample.SetOrigin(itk_image.GetOrigin()) # itk_image_resample.SetSpacing([self.target_spacing] * 3) output_data = [] output_info = [] # 将itk数据准换位npy格式,并且进行裁切 origin = itk_image.GetOrigin() spacing = itk_image.GetSpacing() array_image = sitk.GetArrayFromImage(itk_image) bz, by, bx = array_image.shape x_min_limit = x_min_ratio * bx x_max_limit = x_max_ratio * bx y_min_limit = y_min_ratio * by y_max_limit = y_max_ratio * by z_min_limit = z_min_ratio * bz z_max_limit = z_max_ratio * bz x_limit = 10/spacing[0] y_limit = 10/spacing[1] z_limit = 10/spacing[2] # print(uid) # print(self.suid_list) work_index = self.suid_list.index(uid) label_infos = json.load(open(self.datainfos[work_index][2][0], 'r'))['annotationSessions'] # outputs = {'data': [], 'info': []} # if len(label_infos) == 1: # infos = label_infos[0]['annotationSet'] # print('infos: ', infos) try: infos = label_infos[0]['annotationSet'] except: return for i, info in enumerate(infos): work_array = np.empty(9, dtype=object) print(info) left_point, right_point = info['coordinates'] left_point = np.array([float(left_point['z']), float(left_point['y']), float(left_point['x'])]) right_point = np.array([float(right_point['z']), float(right_point['y']), float(right_point['x'])]) left_point = ((left_point - origin) / spacing).astype(int) right_point = ((right_point - origin) / spacing).astype(int) center_point = ((left_point + right_point) / 2.).astype(int) max_index_diameter = max(abs(float(left_point[0]) - float(right_point[0])), abs(float(left_point[1]) - float(right_point[1])), abs(float(left_point[2]) - float(right_point[2]))) # 判断是否超出边界, 并进行调整 x_start = max(center_point[2] - self.crop_size // 2, 0) x_end = min(center_point[2] + self.crop_size // 2, bx-1) y_start = max(center_point[1] - self.crop_size // 2, 0) y_end = min(center_point[1] + self.crop_size // 2, by-1) z_start = max(center_point[0] - self.crop_size // 2, 0) z_end = min(center_point[0] + self.crop_size // 2, bz-1) if x_start == 0: center_point[2] = center_point[2] + abs(center_point[2] - self.crop_size // 2) if x_end == bx-1: center_point[2] = center_point[2] - abs(center_point[2] - self.crop_size // 2) if y_start == 0: center_point[1] = center_point[1] + abs(center_point[1] - self.crop_size // 2) if y_end == by-1: center_point[1] = center_point[1] - abs(center_point[1] - self.crop_size // 2) if z_start == 0: center_point[0] = center_point[0] + abs(center_point[0] - self.crop_size // 2) if z_end == bz-1: center_point[0] = center_point[0] - abs(center_point[0] - self.crop_size // 2) new_left = np.array([center_point[0] - self.crop_size // 2, center_point[1] - self.crop_size // 2, center_point[2] - self.crop_size // 2]) new_right = np.array([center_point[0] + self.crop_size // 2, center_point[1] + self.crop_size // 2, center_point[2] + self.crop_size // 2]) output_rect = array_image[int(new_left[0]) : int(new_right[0]), int(new_left[1]) : int(new_right[1]), int(new_left[2]) : int(new_right[2])] output_rect = output_rect[np.newaxis, :] x1 = left_point[0] y1 = left_point[1] z1 = left_point[2] x2 = right_point[0] y2 = right_point[1] z2 = right_point[2] x1 = (x1 - origin[0]) / spacing[0] x2 = (x2 - origin[1]) / spacing[1] y1 = (y1 - origin[2]) / spacing[2] y2 = (y2 - origin[0]) / spacing[0] z1 = (z1 - origin[1]) / spacing[1] z2 = (z2 - origin[2]) / spacing[2] is_limit = False if abs(x1-x_min_limit) npy print('Processing series uid %s' % uid) dcm_folder = os.path.join(self.in_path, uid) # get itkimage try: itk_image = load_ct_from_dicom(dcm_folder) except Exception as err: print('!!!!! Read %s throws exception %s.' % (uid, err)) return # resample spacing # print('Resample uid %s to spacing %.1f.' % (uid, self.target_spacing)) # itk_image_resample = self.itk_resample.resample_to_spacing( # itk_image=itk_image, # target_spacing=(tuple([self.target_spacing] * 3)), # interpolator=sitk.sitkBSpline) # itk_image_resample.SetOrigin(itk_image.GetOrigin()) # itk_image_resample.SetSpacing([self.target_spacing] * 3) output_data = [] output_info = [] # 将itk数据准换位npy格式,并且进行裁切 origin = itk_image.GetOrigin() spacing = itk_image.GetSpacing() array_image = sitk.GetArrayFromImage(itk_image) bz, by, bx = array_image.shape work_index = self.suid_list.index(uid) label_infos = json.load(open(self.datainfos[work_index][2][0], 'r'))['annotationSessions'] outputs = {'data': [], 'info': []} infos = label_infos[0]['annotationSet'] for i, info in enumerate(infos): work_array = np.empty(5, dtype=object) left_point, right_point = info['coordinates'] left_point = np.array([float(left_point['z']), float(left_point['y']), float(left_point['x'])]) right_point = np.array([float(right_point['z']), float(right_point['y']), float(right_point['x'])]) max_index_diameter = max(abs(float(left_point[0]) - float(right_point[0])), abs(float(left_point[1]) - float(right_point[1])), abs(float(left_point[2]) - float(right_point[2]))) left_point = ((left_point - origin) / spacing).astype(int) right_point = ((right_point - origin) / spacing).astype(int) center_point = ((left_point + right_point) / 2.).astype(int) # max_index_diameter = max(abs(float(left_point[0]) - float(right_point[0])), # abs(float(left_point[1]) - float(right_point[1])), # abs(float(left_point[2]) - float(right_point[2]))) # 判断是否超出边界, 并进行调整 x_start = max(center_point[2] - self.crop_size // 2, 0) x_end = min(center_point[2] + self.crop_size // 2, bx-1) y_start = max(center_point[1] - self.crop_size // 2, 0) y_end = min(center_point[1] + self.crop_size // 2, by-1) z_start = max(center_point[0] - self.crop_size // 2, 0) z_end = min(center_point[0] + self.crop_size // 2, bz-1) if x_start == 0: center_point[2] = center_point[2] + abs(center_point[2] - self.crop_size // 2) if x_end == bx-1: center_point[2] = center_point[2] - abs(center_point[2] - self.crop_size // 2) if y_start == 0: center_point[1] = center_point[1] + abs(center_point[1] - self.crop_size // 2) if y_end == by-1: center_point[1] = center_point[1] - abs(center_point[1] - self.crop_size // 2) if z_start == 0: center_point[0] = center_point[0] + abs(center_point[0] - self.crop_size // 2) if z_end == bz-1: center_point[0] = center_point[0] - abs(center_point[0] - self.crop_size // 2) new_left = np.array([center_point[0] - self.crop_size // 2, center_point[1] - self.crop_size // 2, center_point[2] - self.crop_size // 2]) new_right = np.array([center_point[0] + self.crop_size // 2, center_point[1] + self.crop_size // 2, center_point[2] + self.crop_size // 2]) output_rect = array_image[int(new_left[0]) : int(new_right[0]), int(new_left[1]) : int(new_right[1]), int(new_left[2]) : int(new_right[2])] output_rect = output_rect[np.newaxis, :] print(output_rect.shape) # 处理信息 work_array[0] = self.datainfos[work_index][0] ##uid work_array[1] = max_index_diameter work_array[2] = uid ##serise id work_array[3] = i ##index work_array[4] = self.datainfos[work_index][2][0] outputs['data'].append(output_rect) outputs['info'].append(work_array.reshape(-1, 5)) return outputs