import os import sys import torch import math import warnings import random import traceback import numpy as np # import horovod.torch as hvd import pandas as pd import torch.backends.cudnn as cudnn from tqdm import tqdm import json # from apex import amp import pdb from tensorboardX import SummaryWriter from torch.utils.data import DataLoader from torch.autograd import Variable from run_visualize import img2gif from ..modeling.detector.nodulenet import NoduleNet, make_rpn_windows, get_anchors from BaseDetector.data.collate import train_collate, test_collate, test_ddp_collate from BaseDetector.engine import BaseDetection3D from BaseDetector.engine.BaseDetector import DistributedSampler_new, Metric, save_checkpoint from ..data import BboxReader_Nodule from ..data import SubReader from ..data import load_ct_from_dicom from k8s_utils import CMetricsWriter from NoduleDetector.data.split_combine import SplitComb import torch.nn.functional as F import copy import SimpleITK as sitk from BaseDetector.layers.overlap_3d import overlap_3d from BaseDetector.modeling.util import box_transform_numpy, convert_xyxyzz, box_transform_inv, clip_boxes, convert_zyxdhw warnings.filterwarnings("ignore") this_module = sys.modules[__name__] # Set seed SEED = 35202 random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) def pad2factor(image, factor=torch.tensor(16.0), pad_value=torch.tensor(-1000)): _, _, depth, height, width = image.shape d = torch.ceil(depth/factor) * factor h = torch.ceil(height/factor) * factor w = torch.ceil(width/factor) * factor pad = [0, int(w - width), 0, int(h - height), 0, int(d - depth)] # pad = [0, int(256 - width), 0, int(192 - height), 0, int(256 - depth)] image = F.pad(image, pad, 'constant', value=pad_value) return image class NoduleDetection3D(BaseDetection3D): def __init__(self, cfg, mode='train', log_fun=None): # super(NoduleDetection3D, self).__init__(cfg) """3d detector initization Arguments: DetectorBase {[object]} -- detector class providing basic detector functions config {[dict]} -- [config dict with hyper parameters] Raises: ValueError: only valid frames are implemented """ # Initilize network if cfg.model['meta_architecture'] == "NoduleNet": self.model = NoduleNet(cfg, mode).cuda() else: raise ValueError("Detector architecture %s not implemented." % self.cfg.model['meta_architecture']) # self.model = getattr(this_module, cfg.MODEL.META_ARCHITECTURE)(cfg).cuda() self.cfg = cfg self.DataReader = BboxReader_Nodule self.DistributedSampler = DistributedSampler_new if log_fun is None: self.log_fun = print else: self.log_fun = log_fun if cfg.train: self.metrics_writer = CMetricsWriter( filename=self.cfg.training['saver']['saver_train_metrics'], headernames=['epoch','f1_score', 'precision', 'recall', 'tpr', 'tnr', 'total_pos', 'total_neg', 'reg_z','reg_y','reg_x','reg_d','reg_h','reg_w', 'lr','rpn_cls_loss','rpn_reg_loss', 'lossTotal', 'performanceAccuracy'] ) self.best_f1 = -1 self.best_epoch = 0 if cfg.test: self.sub_reader = SubReader self.split_combine = SplitComb( self.cfg.testing['test_slide_len'], self.cfg.data['data_process']['stride'], self.cfg.testing['test_margin'] ) cudnn.benchmark = False def do_train(self): self.model.loss.cuda() self._create_dataloader() self._create_optimizer() verbose = 1 if self.cfg.rank == 0 else 0 for i in range(self.resume_from_epoch, self.cfg.epochs): if self.cfg.training['sheduler']['num_neg_shedule']: self.num_neg_shedule(i) if self.cfg.training['sheduler']['rand_crop_ratio_shedule']: self.rand_crop_ratio_shedule(i) if i >= self.cfg.training['sheduler']['switch_roi_epoch']: self.model.use_rcnn = True self.model.loss.use_rcnn = True if self.cfg.data['data_loader']['balanced_sampling']: self.train_loader.dataset.list_IDs = self.train_loader.dataset._get_list_ids() if i == self.cfg.training['sheduler']['switch_balanced_sampling_epoch']: self.cfg.data['data_loader']['balanced_sampling'] = True self.train_loader.dataset.balanced_sampling_flag = True self.train_loader.dataset.list_IDs = self.train_loader.dataset._get_list_ids() self.train_loader.sampler.update_dataset(self.train_loader.dataset) if self.cfg.rank == 0: self.log_fun('[length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) self.train(i, verbose) self.validate(i, verbose) self.train_loader.dataset.targets_idx = self.train_loader.dataset._get_targets_idx() self.train_loader.sampler.update_dataset(self.train_loader.dataset) # if hvd.rank() == 0: # save_checkpoint(i, self.cfg.training['saver']['saver_frequency'], self.model, self.optimizer, # self.model_out_dir) if self.cfg.rank == 0: json_info = json.load(open(self.cfg.training['saver']['saver_train_result'], 'r')) json_info['successFlag'] = 'SUCCESS' with open(self.cfg.training['saver']['saver_train_result'], 'w') as file: json.dump(json_info, file, indent=4) save_checkpoint(self.cfg.epochs, self.model_init, self.optimizer,self.cfg.training['saver']['saver_train_bestmodel']) if self.writer: self.writer.close() if self.train_writer: self.train_writer.close() if self.val_writer: self.val_writer.close() def do_val_ori(self): # create dataset self.val_dataset = self.DataReader(self.cfg, mode='val', log_fun=self.log_fun) # torch.set_num_threads(4) kwargs = {'num_workers': self.cfg.num_workers, 'pin_memory': True} self.val_loader = DataLoader(self.val_dataset, batch_size=self.cfg.batch_size, collate_fn=train_collate, shuffle=False, **kwargs) # 加载验证模型 # Horovod: load model only for rank 0 if self.cfg.pretrain_msg and self.cfg.rank == 0: print('[Loading model from %s]' % self.cfg.pretrain_msg) checkpoint = torch.load(self.cfg.pretrain_msg) state = self.model.state_dict() state.update(checkpoint['state_dict']) self.model.load_state_dict(state) # , strict=False # 验证 self.only_validate() # 保存 /output/eval/eval_result.json with open(self.cfg.training['saver']['saver_eval_result'], 'w+') as file: json.dump( { "successFlag": "SUCCESS", # "resultItems": result_item_list, },file, indent=4) def train(self, epoch, verbose): self.train_sampler.set_epoch(epoch) self.model.set_mode('train') if self.cfg.model['backbone']['fpn']: fpn_cls_loss = [] fpn_reg_loss = [] fpn_stats = [] for i in range(3): fpn_cls_loss.append(Metric('fpn_cls_loss_' + str(i))) fpn_reg_loss.append(Metric('fpn_reg_loss_' + str(i))) fpn_stats.append([]) rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rcnn_stats = [] else: rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.train_loader), desc='Train Epoch #{}'.format(epoch), disable=not verbose) as t: for j, (input_data, truth_bboxes) in enumerate(self.train_loader): input_data = Variable(input_data).cuda() if self.cfg.training['sheduler']['lr_shedule']: self.lr_shedule(epoch, j) self.optimizer.zero_grad() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) if self.cfg.model['backbone']['fpn']: loss, [fpn_cls_loss_, fpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], fpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) else: loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) if self.cfg.training['amp']: print(f'use amp') with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() self.optimizer.synchronize() with self.optimizer.skip_synchronize(): # torch.nn.utils.clip_grad_norm_(amp.master_params(self.optimizer), max_norm=0.01, norm_type=2) # torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=0.001, norm_type=2) self.optimizer.step() else: loss.backward() self.optimizer.step() if self.cfg.model['backbone']['fpn']: for i in range(3): fpn_cls_loss[i].update(fpn_cls_loss_[i]) fpn_reg_loss[i].update(fpn_reg_loss_[i]) fpn_stats[i].append([x.item() if isinstance(x, torch.Tensor) and x.is_cuda else x for x in fpn_stat_[i]]) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rcnn_stats.append(rcnn_stat_) t.set_postfix({'total_loss': total_loss.avg.item(), 'rcnn_cls_loss': rcnn_cls_loss.avg.item(), 'rcnn_reg_loss': rcnn_reg_loss.avg.item(), }) t.set_postfix({'fpn_cls_loss_0': fpn_cls_loss[0].avg.item(), 'fpn_reg_loss_0': fpn_reg_loss[0].avg.item(), 'fpn_cls_loss_1': fpn_cls_loss[1].avg.item(), 'fpn_reg_loss_1': fpn_reg_loss[1].avg.item(), 'fpn_cls_loss_2': fpn_cls_loss[2].avg.item(), 'fpn_reg_loss_2': fpn_reg_loss[2].avg.item(), }) # for i in range(3): # t.set_postfix({'fpn_cls_loss_' + str(i): fpn_cls_loss[i].avg.item(), # 'fpn_reg_loss_' + str(i): fpn_reg_loss[i].avg.item(), }) t.update(1) else: rpn_cls_loss.update(rpn_cls_loss_) rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append([x.item() if isinstance(x, torch.Tensor) and x.is_cuda else x for x in rpn_stat_]) rcnn_stats.append(rcnn_stat_) t.set_postfix({'total_loss': total_loss.avg.item(), 'rpn_cls_loss': rpn_cls_loss.avg.item(), 'rpn_reg_loss': rpn_reg_loss.avg.item(), 'rcnn_cls_loss': rcnn_cls_loss.avg.item(), 'rcnn_reg_loss': rcnn_reg_loss.avg.item(), }) t.update(1) if self.train_writer: if self.cfg.model['backbone']['fpn']: self.train_writer.add_scalar('loss', total_loss.avg, epoch) # fpn_stats = np.asarray(fpn_stats, np.float32) self.log_fun('Train Epoch %d, loss %f' % (epoch, total_loss.avg.item())) for i in range(3): self.log_fun('fpn_cls_%s %f, fpn_reg_%s %f, rcnn_cls %f, rcnn_reg %f' % (str(i), fpn_cls_loss[i].avg.item(), str(i), fpn_reg_loss[i].avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) rpn_stats = np.asarray(fpn_stats[i], np.float32) self.log_fun( 'fpn_stats_%s: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( str(i), 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.train_writer.add_scalar('fpn_cls_' + str(i), fpn_cls_loss[i].avg, epoch) self.train_writer.add_scalar('fpn_reg_' + str(i), fpn_reg_loss[i].avg, epoch) self.train_writer.add_scalar('fpn_reg_z_' + str(i), np.mean(rpn_stats[:, 4]), epoch) self.train_writer.add_scalar('fpn_reg_y_' + str(i), np.mean(rpn_stats[:, 5]), epoch) self.train_writer.add_scalar('fpn_reg_x_' + str(i), np.mean(rpn_stats[:, 6]), epoch) self.train_writer.add_scalar('fpn_reg_d_' + str(i), np.mean(rpn_stats[:, 7]), epoch) self.train_writer.add_scalar('fpn_reg_h_' + str(i), np.mean(rpn_stats[:, 8]), epoch) self.train_writer.add_scalar('fpn_reg_w_' + str(i), np.mean(rpn_stats[:, 9]), epoch) self.train_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.train_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) self.log_fun('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.train_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.train_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.train_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.train_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.train_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.train_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) else: rpn_stats = np.asarray(rpn_stats, np.float32) self.log_fun('Train Epoch %d, loss %f' % (epoch, total_loss.avg.item())) self.log_fun('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) self.log_fun( 'rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.train_writer.add_scalar('loss', total_loss.avg, epoch) self.train_writer.add_scalar('rpn_cls', rpn_cls_loss.avg, epoch) self.train_writer.add_scalar('rpn_reg', rpn_reg_loss.avg, epoch) self.train_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.train_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) self.train_writer.add_scalar('rpn_reg_z', np.mean(rpn_stats[:, 4]), epoch) self.train_writer.add_scalar('rpn_reg_y', np.mean(rpn_stats[:, 5]), epoch) self.train_writer.add_scalar('rpn_reg_x', np.mean(rpn_stats[:, 6]), epoch) self.train_writer.add_scalar('rpn_reg_d', np.mean(rpn_stats[:, 7]), epoch) self.train_writer.add_scalar('rpn_reg_h', np.mean(rpn_stats[:, 8]), epoch) self.train_writer.add_scalar('rpn_reg_w', np.mean(rpn_stats[:, 9]), epoch) if torch.distributed.get_rank() == 0: print('metrics results are here') tp = np.sum(rpn_stats[:, 0]) fn = np.sum(rpn_stats[:, 1]) - np.sum(rpn_stats[:, 0]) tn = np.sum(rpn_stats[:, 2]) fp = np.sum(rpn_stats[:, 3]) - np.sum(rpn_stats[:, 2]) precision = tp / (tp + fp) recall = tp / (tp + fn) f1_score = 2*precision*recall / (precision + recall) self.metrics_writer.append_one_line( [epoch, f1_score, precision, recall, 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]), self.optimizer.param_groups[0]['lr'], rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), total_loss.avg.item(), f1_score] ) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) self.log_fun('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.train_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.train_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.train_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.train_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.train_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.train_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) torch.cuda.empty_cache() def validate(self, epoch, verbose): # self.val_sampler.set_epoch(epoch) self.model.set_mode('valid') if self.cfg.model['backbone']['fpn']: fpn_cls_loss = [] fpn_reg_loss = [] fpn_stats = [] for i in range(3): fpn_cls_loss.append(Metric('fpn_cls_loss_' + str(i))) fpn_reg_loss.append(Metric('fpn_reg_loss_' + str(i))) fpn_stats.append([]) rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rcnn_stats = [] else: rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.val_loader), desc='Validate Epoch #{}'.format(epoch), disable=not verbose) as t: for j, (input_data, truth_bboxes) in enumerate(self.val_loader): with torch.no_grad(): input_data = Variable(input_data).cuda() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) if self.cfg.model['backbone']['fpn']: loss, [fpn_cls_loss_, fpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], fpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) else: loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) if self.cfg.model['backbone']['fpn']: for i in range(3): fpn_cls_loss[i].update(fpn_cls_loss_[i]) fpn_reg_loss[i].update(fpn_reg_loss_[i]) fpn_stats[i].append([x.item() if isinstance(x, torch.Tensor) and x.is_cuda else x for x in fpn_stat_[i]]) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rcnn_stats.append(rcnn_stat_) else: rpn_cls_loss.update(rpn_cls_loss_) rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append([x.item() if isinstance(x, torch.Tensor) and x.is_cuda else x for x in rpn_stat_]) rcnn_stats.append(rcnn_stat_) if self.val_writer: if self.cfg.model['backbone']['fpn']: self.val_writer.add_scalar('loss', total_loss.avg, epoch) # fpn_stats = np.asarray(fpn_stats, np.float32) self.log_fun('Val Epoch %d, loss %f' % (epoch, total_loss.avg.item())) for i in range(3): self.log_fun('fpn_cls_%s %f, fpn_reg_%s %f, rcnn_cls %f, rcnn_reg %f' % (str(i), fpn_cls_loss[i].avg.item(), str(i), fpn_reg_loss[i].avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) rpn_stats = np.asarray(fpn_stats[i], np.float32) self.log_fun( 'fpn_stats_%s: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( str(i), 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.val_writer.add_scalar('fpn_cls_' + str(i), fpn_cls_loss[i].avg, epoch) self.val_writer.add_scalar('fpn_reg_' + str(i), fpn_reg_loss[i].avg, epoch) self.val_writer.add_scalar('fpn_reg_z_' + str(i), np.mean(rpn_stats[:, 4]), epoch) self.val_writer.add_scalar('fpn_reg_y_' + str(i), np.mean(rpn_stats[:, 5]), epoch) self.val_writer.add_scalar('fpn_reg_x_' + str(i), np.mean(rpn_stats[:, 6]), epoch) self.val_writer.add_scalar('fpn_reg_d_' + str(i), np.mean(rpn_stats[:, 7]), epoch) self.val_writer.add_scalar('fpn_reg_h_' + str(i), np.mean(rpn_stats[:, 8]), epoch) self.val_writer.add_scalar('fpn_reg_w_' + str(i), np.mean(rpn_stats[:, 9]), epoch) self.val_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.val_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) self.log_fun('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.val_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.val_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.val_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.val_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.val_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.val_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) else: rpn_stats = np.asarray(rpn_stats, np.float32) self.log_fun('Val Epoch %d, loss %f' % (epoch, total_loss.avg.item())) self.log_fun('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) self.log_fun( 'rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.val_writer.add_scalar('loss', total_loss.avg, epoch) self.val_writer.add_scalar('rpn_cls', rpn_cls_loss.avg, epoch) self.val_writer.add_scalar('rpn_reg', rpn_reg_loss.avg, epoch) self.val_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.val_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) self.val_writer.add_scalar('rpn_reg_z', np.mean(rpn_stats[:, 4]), epoch) self.val_writer.add_scalar('rpn_reg_y', np.mean(rpn_stats[:, 5]), epoch) self.val_writer.add_scalar('rpn_reg_x', np.mean(rpn_stats[:, 6]), epoch) self.val_writer.add_scalar('rpn_reg_d', np.mean(rpn_stats[:, 7]), epoch) self.val_writer.add_scalar('rpn_reg_h', np.mean(rpn_stats[:, 8]), epoch) self.val_writer.add_scalar('rpn_reg_w', np.mean(rpn_stats[:, 9]), epoch) if self.cfg.rank == 0: tp = np.sum(rpn_stats[:, 0]) fn = np.sum(rpn_stats[:, 1]) - np.sum(rpn_stats[:, 0]) tn = np.sum(rpn_stats[:, 2]) fp = np.sum(rpn_stats[:, 3]) - np.sum(rpn_stats[:, 2]) precision = tp / (tp + fp) recall = tp / (tp + fn) f1_score = 2*precision*recall / (precision + recall) if f1_score > self.best_f1: self.best_f1 = f1_score self.best_epoch = epoch # 更新train_result.json with open(self.cfg.training['saver']['saver_train_result'], 'w+') as file: json.dump( { "successFlag":"TRAINING", "bestModelEpoch": self.best_epoch }, file, indent=4) # 更新eval中的performance.json with open(self.cfg.training['saver']['saver_eval_pjson'], 'w+') as file: json.dump( { "f1_score": self.best_f1, }, file, indent=4) # 更新eval中的performance.md with open(self.cfg.training['saver']['saver_eval_pmd'], 'w+') as file: file.write('# overall performance \n') file.write('| precision | recall | f1Score | \n') file.write('| --------- | ------ | ------- | \n') file.write(fr'| {precision} | {recall} | {f1_score} | \n') # 保存best_model print('save best pth') save_checkpoint(epoch, self.model, self.optimizer, self.cfg.training['saver']['saver_train_bestmodel']) print('save done') if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) self.log_fun('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.val_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.val_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.val_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.val_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.val_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.val_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) torch.cuda.empty_cache() def only_validate(self): self.model.set_mode('valid') if self.cfg.model['backbone']['fpn']: fpn_cls_loss = [] fpn_reg_loss = [] fpn_stats = [] for i in range(3): fpn_cls_loss.append(Metric('fpn_cls_loss_' + str(i))) fpn_reg_loss.append(Metric('fpn_reg_loss_' + str(i))) fpn_stats.append([]) rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rcnn_stats = [] else: rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.val_loader), desc='Only Validate',) as t: for j, (input_data, truth_bboxes) in enumerate(self.val_loader): with torch.no_grad(): input_data = Variable(input_data).cuda() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) if self.cfg.model['backbone']['fpn']: loss, [fpn_cls_loss_, fpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], fpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) else: loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) if self.cfg.model['backbone']['fpn']: for i in range(3): fpn_cls_loss[i].update(fpn_cls_loss_[i]) fpn_reg_loss[i].update(fpn_reg_loss_[i]) fpn_stats[i].append([x.item() if isinstance(x, torch.Tensor) and x.is_cuda else x for x in fpn_stat_[i]]) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rcnn_stats.append(rcnn_stat_) else: rpn_cls_loss.update(rpn_cls_loss_) rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append([x.item() if isinstance(x, torch.Tensor) and x.is_cuda else x for x in rpn_stat_]) rcnn_stats.append(rcnn_stat_) if self.cfg.check_cancleflag and self.cfg.val: self.logfunc(fr'cancle flag detected, stop inferencing') return if self.cfg.model['backbone']['fpn']: # fpn_stats = np.asarray(fpn_stats, np.float32) self.log_fun('Only Val, loss %f' % (total_loss.avg.item())) for i in range(3): self.log_fun('fpn_cls_%s %f, fpn_reg_%s %f, rcnn_cls %f, rcnn_reg %f' % (str(i), fpn_cls_loss[i].avg.item(), str(i), fpn_reg_loss[i].avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) rpn_stats = np.asarray(fpn_stats[i], np.float32) self.log_fun( 'fpn_stats_%s: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( str(i), 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) self.log_fun('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) else: rpn_stats = np.asarray(rpn_stats, np.float32) self.log_fun('Only Val, loss %f' % (total_loss.avg.item())) self.log_fun('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) self.log_fun( 'rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) tp = np.sum(rpn_stats[:, 0]) fn = np.sum(rpn_stats[:, 1]) - np.sum(rpn_stats[:, 0]) tn = np.sum(rpn_stats[:, 2]) fp = np.sum(rpn_stats[:, 3]) - np.sum(rpn_stats[:, 2]) precision = tp / (tp + fp) recall = tp / (tp + fn) f1_score = 2*precision*recall / (precision + recall) # 更新eval中的performance.json with open(self.cfg.training['saver']['saver_eval_pjson'], 'w+') as file: json.dump( { "f1_score": f1_score, }, file, indent=4) # 更新eval中的performance.md with open(self.cfg.training['saver']['saver_eval_pmd'], 'w+') as file: file.write('# overall performance \n') file.write('| precision | recall | f1Score | \n') file.write('| --------- | ------ | ------- | \n') file.write(fr'| {precision} | {recall} | {f1_score} | \n') if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) self.log_fun('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) torch.cuda.empty_cache() def do_val(self): ####load moodel self.log_fun('[Loading model from %s]' % self.cfg.pretrain_msg) checkpoint = torch.load(self.cfg.pretrain_msg) ckpt_dict = checkpoint['state_dict'] model_dict = self.model.state_dict() # new_state_dict = {} # for k, v in ckpt_dict.items(): # print(k) # if 'rpn.' in k: # new_k = k.replace('rpn.', 'rpn_head.') # new_state_dict[new_k] = v # elif 'rcnn_head.' in k: # new_k = k.replace('rcnn_head.', 'box_head.') # new_state_dict[new_k] = model_dict[new_k] # else: # new_state_dict[k] = v # self.model.load_state_dict(new_state_dict) self.model.load_state_dict(ckpt_dict) ####load data self.test_dataset = self.DataReader(self.cfg, mode='test', log_fun=self.log_fun, split_combine=self.split_combine) #pt模型加载 # self.model = torch.jit.load(self.cfg.pretrain_msg) self.model.set_mode('test') self.model.use_rcnn = self.cfg.testing['use_rcnn'] self.log_fun('Total # of eval data %d' % (len(self.test_dataset))) ####infer x_min_ratio = 0.1491 x_max_ratio = 0.8442 y_min_ratio = 0.2685 y_max_ratio = 0.7606 z_min_ratio = 0.1330 z_max_ratio = 0.9143 gts = 0 preds = 0 tps = 0 for i, (inputs, patch_idx, n_zyx, coords, img_size, bboxes) in tqdm(enumerate(self.test_dataset)): img_z, img_y, img_x = img_size pid = self.test_dataset.sample_bboxes[i].get_field("filename") # pid = self.test_dataset.data_nii_gz[i] pid = pid.split('/')[-1].replace('.nii.gz', '') save_dir = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) self.log_fun('[{}] Predicting {}, inputs_shape: {} '.format(i, pid, inputs.shape)) sub_dataset = self.sub_reader(inputs, patch_idx, n_zyx, coords) kwargs = {'num_workers': self.cfg.num_workers, 'pin_memory': True} sub_loader = DataLoader(sub_dataset, batch_size=1, **kwargs) for sub_i, (sub_inputs, sub_idx, sub_zyx, sub_coord) in enumerate(sub_loader): sub_idx = sub_idx sub_zyx = sub_zyx sub_inputs = sub_inputs # num of volumes per run n_per_run = 1 # self.log_fun('length of input: {}'.format(len(sub_inputs))) split_list = list(range(0, len(sub_inputs) + 1, n_per_run)) # self.cfg.training['batch_size'] if split_list[-1] != len(sub_inputs): split_list.append(len(sub_inputs)) output_list = [] for i in range(len(split_list) - 1): s_in = sub_inputs[split_list[i]: split_list[i + 1], :, :, :, :] coord_in = sub_coord[split_list[i]: split_list[i + 1], :, :, :, :] with torch.no_grad(): s_in = s_in.cuda() coord_in = coord_in.cuda() rpns = self.model.forward_test(s_in, coord_in) output_list.append(rpns.cpu().numpy()) ####write temp results if len(output_list): save_dir = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) if not os.path.exists(save_dir): os.makedirs(save_dir) np.save(os.path.join(save_dir, '{}_zyx.npy'.format(pid)), sub_zyx[0].numpy()) for pi, patch_rpns in enumerate(output_list): np.save(os.path.join(save_dir, '{}_rpns_{}_{}_{}.npy'.format(pid, self.cfg.rank, sub_i, pi)), patch_rpns) np.save(os.path.join(save_dir, '{}_idx_{}_{}_{}.npy'.format(pid, self.cfg.rank, sub_i, pi)), sub_idx[pi].numpy()) cut_x, cut_y, cut_z = x_min_ratio*img_x/(x_max_ratio-x_min_ratio), y_min_ratio*img_y/(y_max_ratio-y_min_ratio), z_min_ratio*img_z/(z_max_ratio-z_min_ratio) root = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) rpns_path = [path for path in os.listdir(root) if 'rpns' in path] output_w = [] output_p = [] for r_path in rpns_path: i_path = r_path.replace('rpns', 'idx') rpn_info = np.load(os.path.join(root, r_path)) idx_info = np.load(os.path.join(root, i_path)) output_rpns = self.split_combine.new_combine(rpn_info, idx_info) output_w.append(output_rpns[:, 2:]) output_p.append(output_rpns[:,1].reshape(-1, 1)) output_w = torch.cat(output_w, 0) output_p = torch.cat(output_p, 0) final_rpns = self.test_nms(output_w.cuda(), output_p.unsqueeze(0).cuda()) final_rpns = final_rpns[:, 1:].cpu().detach().numpy() rpns = final_rpns rpns = rpns[rpns[:, 0] >= 0.53] rpns[:, 1] += cut_z rpns[:, 2] += cut_y rpns[:, 3] += cut_x if bboxes.data.shape[0]: print(f'gts: {bboxes.data.shape[0]}') print(f'gt data: {bboxes.data}') gts += bboxes.data.shape[0] if rpns.shape[0]: print(f'rpn: {rpns}') preds += rpns.shape[0] print(f'preds: {rpns.shape[0]}') overlap = overlap_3d(convert_xyxyzz(torch.from_numpy(rpns[:, 1:]).float().cuda()), convert_xyxyzz(torch.from_numpy(bboxes.data).float().cuda())) print(f'ious: {overlap}') iou_mask = torch.max((overlap > 0.1).int(), 0) print(f'ious mask: {iou_mask}') tps += iou_mask[0].sum().item() print(f'ALL tps: {tps}, gts: {gts}, preds: {preds}') recall = tps/gts if gts > 0 else 0 precision = tps/preds if preds > 0 else 0 # 更新eval中的performance.json with open(self.cfg.training['saver']['saver_eval_pjson'], 'w+') as file: json.dump( { "recall": recall, "precision": precision }, file, indent=4) # 更新eval中的performance.md with open(self.cfg.training['saver']['saver_eval_pmd'], 'w+') as file: file.write('# overall performance \n') file.write('| precision | recall | \n') file.write('| --------- | ------ | \n') file.write(fr'| {precision} | {recall} | \n') # 保存 /output/eval/eval_result.json with open(self.cfg.training['saver']['saver_eval_result'], 'w+') as file: json.dump( { "successFlag": "SUCCESS", # "resultItems": result_item_list, },file, indent=4) def do_infer(self): ####load moodel self.log_fun('[Loading model from %s]' % self.cfg.pretrain_msg) checkpoint = torch.load(self.cfg.pretrain_msg) ckpt_dict = checkpoint['state_dict'] model_dict = self.model.state_dict() new_state_dict = {} for k, v in ckpt_dict.items(): print(k) if 'rpn.' in k: new_k = k.replace('rpn.', 'rpn_head.') new_state_dict[new_k] = v elif 'rcnn_head.' in k: new_k = k.replace('rcnn_head.', 'box_head.') new_state_dict[new_k] = model_dict[new_k] else: new_state_dict[k] = v self.model.load_state_dict(new_state_dict) ####load data self.test_dataset = self.DataReader(self.cfg, mode='infer', log_fun=self.log_fun, split_combine=self.split_combine) #pt模型加载 # self.model = torch.jit.load(self.cfg.pretrain_msg) self.model.set_mode('test') self.model.use_rcnn = self.cfg.testing['use_rcnn'] self.log_fun('Total # of eval data %d' % (len(self.test_dataset))) ####infer x_min_ratio = 0.1491 x_max_ratio = 0.8442 y_min_ratio = 0.2685 y_max_ratio = 0.7606 z_min_ratio = 0.1330 z_max_ratio = 0.9143 gts = 0 preds = 0 tps = 0 for i, (inputs, patch_idx, n_zyx, coords, img_size) in tqdm(enumerate(self.test_dataset)): img_z, img_y, img_x = img_size # pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = self.test_dataset.data_nii_gz[i] pid = pid.split('/')[-1].replace('.nii.gz', '') save_dir = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) self.log_fun('[{}] Predicting {}, inputs_shape: {} '.format(i, pid, inputs.shape)) sub_dataset = self.sub_reader(inputs, patch_idx, n_zyx, coords) kwargs = {'num_workers': 1, 'pin_memory': True} sub_loader = DataLoader(sub_dataset, batch_size=1, **kwargs) for sub_i, (sub_inputs, sub_idx, sub_zyx, sub_coord) in enumerate(sub_loader): sub_idx = sub_idx sub_zyx = sub_zyx sub_inputs = sub_inputs # num of volumes per run n_per_run = 1 # self.log_fun('length of input: {}'.format(len(sub_inputs))) split_list = list(range(0, len(sub_inputs) + 1, n_per_run)) # self.cfg.training['batch_size'] if split_list[-1] != len(sub_inputs): split_list.append(len(sub_inputs)) output_list = [] for i in range(len(split_list) - 1): s_in = sub_inputs[split_list[i]: split_list[i + 1], :, :, :, :] coord_in = sub_coord[split_list[i]: split_list[i + 1], :, :, :, :] with torch.no_grad(): s_in = s_in.cuda() coord_in = coord_in.cuda() rpns = self.model.forward_test(s_in, coord_in) output_list.append(rpns.cpu().numpy()) ####write temp results if len(output_list): save_dir = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) if not os.path.exists(save_dir): os.makedirs(save_dir) np.save(os.path.join(save_dir, '{}_zyx.npy'.format(pid)), sub_zyx[0].numpy()) for pi, patch_rpns in enumerate(output_list): np.save(os.path.join(save_dir, '{}_rpns_{}_{}_{}.npy'.format(pid, self.cfg.rank, sub_i, pi)), patch_rpns) np.save(os.path.join(save_dir, '{}_idx_{}_{}_{}.npy'.format(pid, self.cfg.rank, sub_i, pi)), sub_idx[pi].numpy()) cut_x, cut_y, cut_z = x_min_ratio*img_x/(x_max_ratio-x_min_ratio), y_min_ratio*img_y/(y_max_ratio-y_min_ratio), z_min_ratio*img_z/(z_max_ratio-z_min_ratio) root = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) rpns_path = [path for path in os.listdir(root) if 'rpns' in path] output_w = [] output_p = [] for r_path in rpns_path: if 'all' in r_path: continue i_path = r_path.replace('rpns', 'idx') rpn_info = np.load(os.path.join(root, r_path)) idx_info = np.load(os.path.join(root, i_path)) output_rpns = self.split_combine.new_combine(rpn_info, idx_info) output_w.append(output_rpns[:, 2:]) output_p.append(output_rpns[:,1].reshape(-1, 1)) output_w = torch.cat(output_w, 0) output_p = torch.cat(output_p, 0) final_rpns = self.test_nms(output_w.cuda(), output_p.unsqueeze(0).cuda()) final_rpns = final_rpns[:, 1:].cpu().detach().numpy() rpns = final_rpns rpns = rpns[rpns[:, 0] >= 0.53] rpns[:, 1] += cut_z rpns[:, 2] += cut_y rpns[:, 3] += cut_x print(os.path.join(save_dir, 'all_rpns.npy')) np.save(os.path.join(save_dir, 'all_rpns.npy'), rpns) # imf2gif img2gif(self.cfg.testing['test_tmp_rpns'], os.path.join(os.path.dirname(os.path.dirname(self.cfg.testing['test_tmp_rpns'])), 'preprocess/preprocess_file_details/nii')) def do_test(self): ####load moodel self.log_fun('[Loading model from %s]' % self.cfg.pretrain_msg) checkpoint = torch.load(self.cfg.pretrain_msg) ckpt_dict = checkpoint['state_dict'] model_dict = self.model.state_dict() new_state_dict = {} for k, v in ckpt_dict.items(): print(k) if 'rpn.' in k: new_k = k.replace('rpn.', 'rpn_head.') new_state_dict[new_k] = v elif 'rcnn_head.' in k: new_k = k.replace('rcnn_head.', 'box_head.') new_state_dict[new_k] = model_dict[new_k] else: new_state_dict[k] = v self.model.load_state_dict(new_state_dict) ####load data self.test_dataset = self.DataReader(self.cfg, mode='test', log_fun=self.log_fun, split_combine=self.split_combine) #pt模型加载 # self.model = torch.jit.load(self.cfg.pretrain_msg) self.model.set_mode('test') self.model.use_rcnn = self.cfg.testing['use_rcnn'] self.log_fun('Total # of eval data %d' % (len(self.test_dataset))) ####infer x_min_ratio = 0.1491 x_max_ratio = 0.8442 y_min_ratio = 0.2685 y_max_ratio = 0.7606 z_min_ratio = 0.1330 z_max_ratio = 0.9143 infer_json_dir = '/shared/temp_code/infer_res/json' info_dir = '/shared/temp_code/infer_res/ct_info' # dcm_dir = self.cfg.testing['test_tmp_dir'] # nii_dir = self.cfg.testing['test_nii_dir'] for i, (inputs, patch_idx, n_zyx, coords, img_size) in tqdm(enumerate(self.test_dataset)): img_z, img_y, img_x = img_size # pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = self.test_dataset.data_nii_gz[i] pid = pid.split('/')[-1].replace('.nii.gz', '') save_dir = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) if os.path.exists(os.path.join(save_dir, 'done.json')): print(f'{pid} already done') continue self.log_fun('[{}] Predicting {}, inputs_shape: {} '.format(i, pid, inputs.shape)) sub_dataset = self.sub_reader(inputs, patch_idx, n_zyx, coords) kwargs = {'num_workers': self.cfg.num_workers, 'pin_memory': True} sub_loader = DataLoader(sub_dataset, batch_size=self.cfg.batch_size, **kwargs) for sub_i, (sub_inputs, sub_idx, sub_zyx, sub_coord) in enumerate(sub_loader): sub_idx = sub_idx sub_zyx = sub_zyx sub_inputs = sub_inputs # num of volumes per run n_per_run = 1 # self.log_fun('length of input: {}'.format(len(sub_inputs))) split_list = list(range(0, len(sub_inputs) + 1, n_per_run)) # self.cfg.training['batch_size'] if split_list[-1] != len(sub_inputs): split_list.append(len(sub_inputs)) output_list = [] for i in range(len(split_list) - 1): s_in = sub_inputs[split_list[i]: split_list[i + 1], :, :, :, :] coord_in = sub_coord[split_list[i]: split_list[i + 1], :, :, :, :] with torch.no_grad(): s_in = s_in.cuda() coord_in = coord_in.cuda() rpns = self.model.forward_test(s_in, coord_in) output_list.append(rpns.cpu().numpy()) ####write temp results if len(output_list): save_dir = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) if not os.path.exists(save_dir): os.makedirs(save_dir) np.save(os.path.join(save_dir, '{}_zyx.npy'.format(pid)), sub_zyx[0].numpy()) for pi, patch_rpns in enumerate(output_list): np.save(os.path.join(save_dir, '{}_rpns_{}_{}_{}.npy'.format(pid, self.cfg.rank, sub_i, pi)), patch_rpns) np.save(os.path.join(save_dir, '{}_idx_{}_{}_{}.npy'.format(pid, self.cfg.rank, sub_i, pi)), sub_idx[pi].numpy()) cut_x, cut_y, cut_z = x_min_ratio*img_x/(x_max_ratio-x_min_ratio), y_min_ratio*img_y/(y_max_ratio-y_min_ratio), z_min_ratio*img_z/(z_max_ratio-z_min_ratio) root = os.path.join(self.cfg.testing['test_tmp_rpns'], pid) rpns_path = [path for path in os.listdir(root) if 'rpns' in path] output_w = [] output_p = [] for r_path in rpns_path: i_path = r_path.replace('rpns', 'idx') rpn_info = np.load(os.path.join(root, r_path)) idx_info = np.load(os.path.join(root, i_path)) output_rpns = self.split_combine.new_combine(rpn_info, idx_info) output_w.append(output_rpns[:, 2:]) output_p.append(output_rpns[:,1].reshape(-1, 1)) output_w = torch.cat(output_w, 0) output_p = torch.cat(output_p, 0) final_rpns = self.test_nms(output_w.cuda(), output_p.unsqueeze(0).cuda()) final_rpns = final_rpns[:, 1:].cpu().detach().numpy() infer_json_path = os.path.join(infer_json_dir, pid+'.json') if not os.path.exists(infer_json_path): print('not find', infer_json_path) continue with open(infer_json_path, 'r+') as f: json_data = json.load(f) out_data = copy.deepcopy(json_data) out_data['annotationSessions'] = [{ "annotatorName": "Shanghai lnstitute of Computing Technology", "annotatorOrg": "RJ", "sessionType": "ANNOTATION", "mark": "NORMAL", "annotationSet": [] }] rpns = final_rpns rpns = rpns[rpns[:, 0] >= 0.53] ct_info = pd.read_csv(os.path.join(info_dir, pid+'_info.csv')) origin_x, origin_y, origin_z = ct_info['origin_x'][0], ct_info['origin_y'][0], ct_info['origin_z'][0] new_spacing_x, new_spacing_y, new_spacing_z = 0.5, 0.5, 0.5 if rpns.shape[0]: for i in range(rpns.shape[0]): result_item = { "labelProperties": [ { "labelID": 1, "propertyID": 2 }, { "labelID": 2, "propertyID": 2 }], "shapeType": "Rectangle3D", } x1 = rpns[i,3] - rpns[i,6]/2 y1 = rpns[i,2] - rpns[i,5]/2 z1 = rpns[i,1] - rpns[i,4]/2 x2 = rpns[i,3] + rpns[i,6]/2 y2 = rpns[i,2] + rpns[i,5]/2 z2 = rpns[i,1] + rpns[i,4]/2 x1 = min(max(0, x1), img_x-1) x2 = min(max(0, x2), img_x-1) y1 = min(max(0, y1), img_y-1) y2 = min(max(0, y2), img_y-1) z1 = min(max(0, z1), img_z-1) z2 = min(max(0, z2), img_z-1) coord = [ { "x": origin_x + (x1+cut_x)*new_spacing_x, "y": origin_y + (y1+cut_y)*new_spacing_y, "z": origin_z + (z1+cut_z)*new_spacing_z }, { "x": origin_x + (x2+cut_x)*new_spacing_x, "y": origin_y + (y2+cut_y)*new_spacing_y, "z": origin_z + (z2+cut_z)*new_spacing_z } ] result_item["coordinates"] = coord # if i < 5: # print(coord, float(rpns[i,0])) out_data["annotationSessions"][0]["annotationSet"].append(result_item) with open(os.path.join(self.cfg.testing['saver_pred_details'], pid+'.json'), 'w+') as fp: json.dump(out_data, fp, indent=4) with open(os.path.join(save_dir, 'done.json'), 'w+') as f: json.dump({'done': True}, f) def test_nms(self, window, logits): from BaseDetector.layers.nms_3d import nms_3d from NoduleDetector.modeling.util import convert_xyxyzz, convert_zyxdhw b = 0 device = logits.device # logits = torch.sigmoid(logits) ps = logits[b, :, 0].reshape(-1, 1) p, index = ps.squeeze().topk(min(self.cfg.testing['test_pre_nms_top_k'], ps.shape[0]), dim=0, sorted=True) w = torch.index_select(window, 0, index) box = convert_xyxyzz(w) # print(f'window shape: {window.shape}') # print(f'bbox shape: {box.shape}') keep = nms_3d(box.float(), p.float(), self.cfg.testing['test_nms_overlap_threshold']) res_box = torch.index_select(box, 0, keep) res_p = torch.index_select(p, 0, keep) res_box = convert_zyxdhw(res_box) res_p = torch.unsqueeze(res_p, 1) b_tensor = torch.full((res_box.size()[0], 1), b, device=device) # print(b_tensor.type(),res_p.type(),res_box.type()) prop = torch.cat((b_tensor.half(), res_p.half(), res_box.half()), dim=1) # prop = torch.cat((b_tensor, res_p, res_box), dim=1) return Variable(prop) def deploy(self): self.model.eval() initial_checkpoint = self.cfg.testing['weight'] self.log_fun('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) set_requires_grad(self.model) self.test_dataset = self.DataReader(self.cfg, mode='test') input_data = self.test_dataset.__getitem__(0)[1] input_data = np.expand_dims(input_data, 0) input_data = np.expand_dims(input_data, 0) self.log_fun("input_data shape", input_data.shape) input_data = torch.from_numpy(input_data).cuda() # pth to torchscript input_data = input_data.half() self.model = self.model.half() model_path = self.cfg.deploy['torchscript_save_path'] traced_model = torch.jit.trace(self.model, (input_data)) # print(traced_model.graph) traced_model.save(model_path) def torchscript_test(self): i = 0 self.test_dataset = self.DataReader(self.cfg, mode='test') torch_input_data, torchscript_input = self.test_dataset.__getitem__(i) pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = pid.split('/')[-1].replace('.nii.gz', '') self.log_fun("input_data shape", torchscript_input.shape) res_dir = self.cfg.deploy['torchscript_compare_res_dir'] initial_checkpoint = self.cfg.testing['weight'] self.log_fun('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) self.model.set_mode('test') self.model.use_rcnn = self.cfg.testing['use_rcnn'] with torch.no_grad(): input_data = torch_input_data.cuda().unsqueeze(0) torch_res = self.model.forward(input_data, []) torch_res = torch_res.cpu().numpy() torch_res = torch_res[:, 1:] np.save(os.path.join(res_dir, '%s_torch_res.npy' % (pid)), torch_res) self.npy2csv('torch_res', res_dir) input_data = np.expand_dims(torchscript_input, 0) input_data = np.expand_dims(input_data, 0) input_data = torch.from_numpy(input_data).cuda() model_path = self.cfg.deploy['torchscript_save_path'] self.log_fun('[Loading torchscript from %s]' % model_path) torchscript_model = torch.jit.load(model_path) torchscript_res = torchscript_model(input_data) torchscript_res = torchscript_res.cpu().numpy() torchscript_res = torchscript_res[:, 1:] np.save(os.path.join(res_dir, '%s_torchscript_res.npy' % (pid)), torchscript_res) self.npy2csv_fpn('torchscript_res', res_dir) def npy2csv(self, name, save_dir): bbox_border = self.cfg.data['data_process']['bbox_border'] res = [] for bbox in self.test_dataset.sample_bboxes: pid = bbox.get_field("filename").split('/')[-1].replace('.nii.gz', '') if os.path.exists(os.path.join(save_dir, '%s_' % (pid) + name + '.npy')): bboxs = np.load(os.path.join(save_dir, '%s_' % (pid) + name + '.npy')) bboxs[:, 4] = bboxs[:, 4] - bbox_border bboxs[:, 5] = bboxs[:, 5] - bbox_border bboxs[:, 6] = bboxs[:, 6] - bbox_border bboxs = bboxs[:, [3, 2, 1, 6, 5, 4, 0]] names = np.array([[pid]] * len(bboxs)) res.append(np.concatenate([names, bboxs], axis=1)) res = np.concatenate(res, axis=0) col_names = ['uid', 'x_px', 'y_px', 'z_px', 'diameter_x_px', 'diameter_y_px', 'diameter_z_px', 'probability'] submission_path = os.path.join(save_dir, 'submission_' + name + '.csv') df = pd.DataFrame(res, columns=col_names) df.to_csv(submission_path, index=False) def npy2csv_fpn(self, name, save_dir): bbox_border = self.cfg.data['data_process']['bbox_border'] res = [] for bbox in self.test_dataset.sample_bboxes: pid = bbox.get_field("filename").split('/')[-2].replace('.nii.gz', '') for i in range(3): if os.path.exists(os.path.join(save_dir, '%s_' % (pid) + name + '_' + str(i) + '.npy')): bboxs = np.load(os.path.join(save_dir, '%s_' % (pid) + name + '_' + str(i) + '.npy')) bboxs[:, 4] = bboxs[:, 4] - bbox_border bboxs[:, 5] = bboxs[:, 5] - bbox_border bboxs[:, 6] = bboxs[:, 6] - bbox_border bboxs = bboxs[:, [3, 2, 1, 6, 5, 4, 0]] names = np.array([[pid]] * len(bboxs)) features = np.array([[i]] * len(bboxs)) res.append(np.concatenate([names, bboxs, features], axis=1)) res = np.concatenate(res, axis=0) col_names = ['uid', 'x_px', 'y_px', 'z_px', 'diameter_x_px', 'diameter_y_px', 'diameter_z_px', 'probability', 'feature'] submission_path = os.path.join(save_dir, 'submission_' + name + '.csv') df = pd.DataFrame(res, columns=col_names) df.to_csv(submission_path, index=False) def lr_shedule(self, epoch, batch_idx): warmup = self.cfg.training['sheduler']['warmup'] total = self.cfg.epochs if epoch < warmup: epoch += float(batch_idx + 1) / len(self.train_loader) lr_adj = 1. / self.cfg.world_size * (epoch * (self.cfg.world_size - 1) / warmup + 1) elif epoch <= total * 0.4: lr_adj = 1. elif epoch <= total * 1.0: lr_adj = 0.1 else: lr_adj = 0.01 for param_group in self.optimizer.param_groups: param_group['lr'] = self.cfg.lr * self.cfg.world_size * lr_adj def num_neg_shedule(self, epoch): total = self.cfg.epochs if epoch <= total * 0.05: self.model.rpn.num_neg = 300 elif epoch <= total * 0.1: self.model.rpn.num_neg = 600 elif epoch <= total * 0.2: self.model.rpn.num_neg = 800 else: self.model.rpn.num_neg = 1000 def rand_crop_ratio_shedule(self, epoch): def _set_dataset_r_rand(fraction): self.train_loader.dataset.r_rand = fraction self.train_loader.sampler.update_dataset(self.train_loader.dataset) if self.cfg.rank == 0: print('[RAND_CROP_RATIO_SHEDULE: length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) total = self.cfg.epochs if epoch <= total * 0.05: pass elif epoch <= total * 0.1: _set_dataset_r_rand(0.3) elif epoch <= total * 0.2: _set_dataset_r_rand(0.4) else: _set_dataset_r_rand(0.5) def set_requires_grad(nets, requires_grad=False): """Set requies_grad=Fasle for all the networks to avoid unnecessary computations Parameters: nets (network list) -- a list of networks requires_grad (bool) -- whether the networks require gradients or not """ if not isinstance(nets, list): nets = [nets] for net in nets: if net is not None: for param in net.parameters(): param.requires_grad = requires_grad