import math import os import random import sys import traceback import warnings # import horovod.torch as hvd import numpy as np import pandas as pd import torch import torch.backends.cudnn as cudnn from apex import amp from tensorboardX import SummaryWriter from torch.autograd import Variable import torch.distributed from torch.utils.data import DataLoader from tqdm import tqdm import torch.distributed as dist from ..data import BboxReader from ..data.collate import train_collate from ..modeling.detector.centernet import CenterNet from ..modeling.detector.nodulenet import NoduleNet import copy warnings.filterwarnings("ignore") this_module = sys.modules[__name__] # Set seed SEED = 35202 random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) class DistributedSampler_new(torch.utils.data.distributed.DistributedSampler): def update_dataset(self, dataset): self.dataset = dataset self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) self.total_size = self.num_samples * self.num_replicas # Horovod: average metrics from distributed training. class Metric(object): def __init__(self, name): self.name = name self.sum = torch.tensor(0.) self.n = torch.tensor(0.) def update(self, val): # val = val.clone().detach() # output_tensors = [val.clone().to(val) for _ in range(torch.distributed.get_world_size())] # print('world_size: ', torch.distributed.get_world_size()) if os.environ.get('RANK', -1) != -1: output_tensors = [val.clone() for _ in range(torch.distributed.get_world_size())] dist.all_gather(output_tensors, val) self.sum += torch.sum(torch.stack(output_tensors)).cpu().detach() else: self.sum += val.cpu().detach() self.n += 1 @property def avg(self): return self.sum / self.n def save_checkpoint(epoch, model, optimizer, model_out_dir): state_dict = model.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() torch.save({ 'epoch': epoch, 'out_dir': model_out_dir, 'state_dict': state_dict, 'optimizer': optimizer.state_dict()}, model_out_dir) class BaseDetection3D(object): def __init__(self, cfg, mode='train', log_fun=None): """3d detector initialization Arguments: DetectorBase {[object]} -- detector class providing basic detector functions config {[dict]} -- [config dict with hyper parameters] Raises: ValueError: only valid frames are implemented """ # Init network if cfg.model['meta_architecture'] == 'NoduleNet': self.model = NoduleNet(cfg, mode).cuda() elif cfg.model['meta_architecture'] == 'CenterNet': self.model = CenterNet(cfg, mode).cuda() else: raise ValueError('Detector architecture %s is not implemented.' % cfg.model['meta_architecture']) self.cfg = cfg self.DataReader = BboxReader self.DistributedSampler = DistributedSampler_new if log_fun is None: self.log_fun = print else: self.log_fun = log_fun # 设置 torch.backends.cudnn.benchmark=True 将会让程序在开始时花费一点额外时间, # 为整个网络的每个卷积层搜索最适合它的卷积实现算法,进而实现网络的加速。 # 适用场景是网络结构固定(不是动态变化的),网络的输入形状(包括 batch size,图片大小,输入的通道)是不变的, # 其实也就是一般情况下都比较适用。反之,如果卷积层的设置一直变化,将会导致程序不停地做优化,反而会耗费更多的时间。 cudnn.benchmark = False def _create_dataloader(self): self.train_dataset = self.DataReader(self.cfg, mode='train', log_fun=self.log_fun) self.val_dataset = self.DataReader(self.cfg, mode='val', log_fun=self.log_fun) # Horovod: limit # of CPU threads to be used per worker. torch.set_num_threads(4) kwargs = {'num_workers': self.cfg.num_workers, 'pin_memory': True} # When supported, use 'forkserver' to spawn dataloader workers instead of 'fork' to prevent # issues with Infiniband implementations that are not fork-safe # if (kwargs.get('num_workers', 0) > 0 and hasattr(mp, '_supports_context') and # mp._supports_context and 'forkserver' in mp.get_all_start_methods()): # kwargs['multiprocessing_context'] = 'forkserver' # Partition dataset among workers using DistributedSampler self.train_sampler = self.DistributedSampler(self.train_dataset, num_replicas=self.cfg.world_size, rank=self.cfg.rank) self.val_sampler = self.DistributedSampler(self.val_dataset, num_replicas=self.cfg.world_size, rank=self.cfg.rank) self.train_loader = DataLoader(self.train_dataset, batch_size=self.cfg.batch_size, collate_fn=train_collate, sampler=self.train_sampler, **kwargs) self.val_loader = DataLoader(self.val_dataset, batch_size=self.cfg.batch_size, collate_fn=train_collate, sampler=self.val_sampler, **kwargs) def _create_optimizer(self): self.optimizer = getattr(torch.optim, self.cfg.training['solver']['optimizer']) # Horovod: lr = base_lr * hvd.size # special_lr_layers = ['feature_net.back3.1.conv1.conv_offset.weight', # 'feature_net.back3.1.conv1.conv_offset.bias', # 'feature_net.back3.1.conv1.conv_mask.weight', # 'feature_net.back3.1.conv1.conv_mask.bias', # 'feature_net.back3.1.conv2.conv_offset.weight', # 'feature_net.back3.1.conv2.conv_offset.bias', # 'feature_net.back3.1.conv2.conv_mask.weight', # 'feature_net.back3.1.conv2.conv_mask.bias'] # special_params = list(map(lambda x: x[1],list(filter(lambda kv: kv[0] in special_lr_layers, self.model.named_parameters())))) # base_params = list(map(lambda x: x[1],list(filter(lambda kv: kv[0] not in special_lr_layers, self.model.named_parameters())))) # self.optimizer = self.optimizer([{"params": base_params}, # {"params": special_params, "lr": self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size() * 0.05}], # lr=self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size(), # weight_decay=self.cfg.TRAINING.SOLVER.WRIGHT_DECAY, momentum=self.cfg.TRAINING.SOLVER.MOMENTUM) if self.cfg.training['solver']['optimizer'] == 'Adam': self.optimizer = self.optimizer(self.model.parameters(), lr=self.cfg.lr * self.cfg.world_size, weight_decay=self.cfg.training['solver']['wright_decay']) elif self.cfg.training['solver']['optimizer'] == 'SGD': self.optimizer = self.optimizer(self.model.parameters(), lr=self.cfg.lr * self.cfg.world_size, weight_decay=self.cfg.training['solver']['wright_decay'], momentum=self.cfg.training['solver']['momentum']) else: raise ValueError("Optimizer %s is not implemented." % self.cfg.training['solver']['optimizer']) # Horovod: (optional) compression algorithm. # compression = hvd.Compression.fp16 if self.cfg.training['fp16_allreduce'] else hvd.Compression.none # Horovod: Add Horovod Distributed Optimizer # self.optimizer = hvd.DistributedOptimizer(self.optimizer, named_parameters=self.model.named_parameters(), # compression=compression) # self.optimizer._requires_update = set() # Horovod: load model only for rank 0 self.resume_from_epoch = 0 if self.cfg.pretrain_msg and self.cfg.rank == 0: # print(os.path.isfile(self.cfg.pretrain_msg)) print('[Loading model from %s]' % self.cfg.pretrain_msg) # checkpoint = torch.load(self.cfg.pretrain_msg) # self.resume_from_epoch = checkpoint['epoch'] # state = self.model.state_dict() # state.update(checkpoint['state_dict']) # self.model.load_state_dict(state) # , strict=False checkpoint = torch.load(self.cfg.pretrain_msg) # print('load weight feature_net.preBlock.0.weight: ', checkpoint['feature_net.preBlock.0.weight']) ckpt_dict = checkpoint['state_dict'] # print(ckpt_dict) model_dict = self.model.state_dict() # print('self.model feature_net.preBlock.0.weight: ', checkpoint['feature_net.preBlock.0.weight']) new_state_dict = {} for k, v in ckpt_dict.items(): if 'rpn.' in k: new_k = k.replace('rpn.', 'rpn_head.') new_state_dict[new_k] = v elif 'rcnn_head.' in k: new_k = k.replace('rcnn_head.', 'box_head.') new_state_dict[new_k] = model_dict[new_k] else: new_state_dict[k] = v print(k) self.model.load_state_dict(new_state_dict) self.model_init = copy.deepcopy(self.model) print('Load success') # self.optimizer.load_state_dict(checkpoint['optimizer']) # self.resume_from_epoch += 1 # Horovod: broadcast resume_from_epoch from rank 0 (which will have # checkpoints) to other ranks. # self.resume_from_epoch = hvd.broadcast(torch.tensor(self.resume_from_epoch), root_rank=0, # name='resume_from_epoch').item() # Horovod: Broadcast parameters from rank 0 to all other processes. # hvd.broadcast_parameters(self.model.state_dict(), root_rank=0) # hvd.broadcast_optimizer_state(self.optimizer, root_rank=0) if self.cfg.training['amp']: # Apex amp.register_float_function(torch, 'sigmoid') self.model, optimizer = amp.initialize(self.model, self.optimizer, opt_level="O1") # , keep_batchnorm_fp32=True # Horovod: print output only on first rank. if self.cfg.rank == 0: # self.model_out_dir = os.path.join(self.cfg.training['saver']['saver_dir'], 'checkpoints') tb_out_dir = os.path.join(self.cfg.training['saver']['saver_root'], 'tensorboards') # if not os.path.exists(self.model_out_dir): # os.makedirs(self.model_out_dir) if not os.path.exists(tb_out_dir): os.makedirs(tb_out_dir) self.log_fun('[start_epoch %d, out_dir %s]' % (self.resume_from_epoch, self.cfg.training['saver']['saver_root'])) self.log_fun('[length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) # Write graph to tensorboard for visualization self.writer = SummaryWriter(tb_out_dir) if self.cfg.rank == 0 else None self.train_writer = SummaryWriter(os.path.join(tb_out_dir, 'train')) if self.cfg.rank == 0 else None self.val_writer = SummaryWriter(os.path.join(tb_out_dir, 'val')) if self.cfg.rank == 0 else None def do_train(self): self.model.loss.cuda() self._create_dataloader() self._create_optimizer() verbose = 1 if self.cfg.rank == 0 else 0 for i in range(self.resume_from_epoch, self.cfg.epochs): if self.cfg.training['sheduler']['num_neg_shedule']: self.num_neg_shedule(i) if self.cfg.training['sheduler']['rand_crop_ratio_shedule']: self.rand_crop_ratio_shedule(i) if i >= self.cfg.training['sheduler']['switch_roi_epoch']: self.model.use_rcnn = True self.model.loss.use_rcnn = True if self.cfg.data['data_loader']['balanced_sampling']: self.train_loader.dataset.list_IDs = self.train_loader.dataset._get_list_ids() self.train_loader.sampler.update_dataset(self.train_loader.dataset) if i == self.cfg.training['sheduler']['switch_balanced_sampling_epoch']: self.cfg.data['data_loader']['balanced_sampling'] = True self.train_loader.dataset.balanced_sampling_flag = True self.train_loader.dataset.list_IDs = self.train_loader.dataset._get_list_ids() self.train_loader.sampler.update_dataset(self.train_loader.dataset) if self.cfg.rank == 0: print('[length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) self.train(i, verbose) self.validate(i, verbose) if self.cfg.rank == 0: save_checkpoint(i, self.cfg.training['saver']['saver_frequency'], self.model, self.optimizer, self.model_out_dir) if self.writer: self.writer.close() if self.train_writer: self.train_writer.close() if self.val_writer: self.val_writer.close() def train(self, epoch, verbose): self.train_sampler.set_epoch(epoch) self.model.set_mode('train') rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.train_loader), desc='Train Epoch #{}'.format(epoch), disable=not verbose) as t: for j, (input_data, truth_bboxes) in enumerate(self.train_loader): input_data = Variable(input_data).cuda() if self.cfg.training['sheduler']['lr_shedule']: self.lr_shedule(epoch, j) self.optimizer.zero_grad() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) if self.cfg.training['amp'].AMP: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() self.optimizer.synchronize() with self.optimizer.skip_synchronize(): # torch.nn.utils.clip_grad_norm_(amp.master_params(self.optimizer), max_norm = 1.0, norm_type=2) self.optimizer.step() else: loss.backward() self.optimizer.step() rpn_cls_loss.update(rpn_cls_loss_) rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append(rpn_stat_) rcnn_stats.append(rcnn_stat_) t.set_postfix({'total_loss': total_loss.avg.item(), 'rpn_cls_loss': rpn_cls_loss.avg.item(), 'rpn_reg_loss': rpn_reg_loss.avg.item(), 'rcnn_cls_loss': rcnn_cls_loss.avg.item(), 'rcnn_reg_loss': rcnn_reg_loss.avg.item(), }) t.update(1) if self.train_writer: rpn_stats = np.asarray(rpn_stats, np.float32) print('Train Epoch %d, loss %f' % (epoch, total_loss.avg.item())) print('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) print('rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.train_writer.add_scalar('loss', total_loss.avg, epoch) self.train_writer.add_scalar('rpn_cls', rpn_cls_loss.avg, epoch) self.train_writer.add_scalar('rpn_reg', rpn_reg_loss.avg, epoch) self.train_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.train_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) self.train_writer.add_scalar('rpn_reg_z', np.mean(rpn_stats[:, 4]), epoch) self.train_writer.add_scalar('rpn_reg_y', np.mean(rpn_stats[:, 5]), epoch) self.train_writer.add_scalar('rpn_reg_x', np.mean(rpn_stats[:, 6]), epoch) self.train_writer.add_scalar('rpn_reg_d', np.mean(rpn_stats[:, 7]), epoch) self.train_writer.add_scalar('rpn_reg_h', np.mean(rpn_stats[:, 8]), epoch) self.train_writer.add_scalar('rpn_reg_w', np.mean(rpn_stats[:, 9]), epoch) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) print('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.train_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.train_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.train_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.train_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.train_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.train_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) torch.cuda.empty_cache() def validate(self, epoch, verbose): # self.val_sampler.set_epoch(epoch) self.model.set_mode('valid') rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.val_loader), desc='Validate Epoch #{}'.format(epoch), disable=not verbose) as t: for j, (input_data, truth_bboxes) in enumerate(self.val_loader): with torch.no_grad(): input_data = Variable(input_data).cuda() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) rpn_cls_loss.update(rpn_cls_loss_) # if torch.is_nonzero(rpn_reg_loss_): rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append(rpn_stat_) rcnn_stats.append(rcnn_stat_) if self.val_writer: rpn_stats = np.asarray(rpn_stats, np.float32) print('Val Epoch %d, loss %f' % (epoch, total_loss.avg.item())) print('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) print('rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.val_writer.add_scalar('loss', total_loss.avg, epoch) self.val_writer.add_scalar('rpn_cls', rpn_cls_loss.avg, epoch) self.val_writer.add_scalar('rpn_reg', rpn_reg_loss.avg, epoch) self.val_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.val_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) self.val_writer.add_scalar('rpn_reg_z', np.mean(rpn_stats[:, 4]), epoch) self.val_writer.add_scalar('rpn_reg_y', np.mean(rpn_stats[:, 5]), epoch) self.val_writer.add_scalar('rpn_reg_x', np.mean(rpn_stats[:, 6]), epoch) self.val_writer.add_scalar('rpn_reg_d', np.mean(rpn_stats[:, 7]), epoch) self.val_writer.add_scalar('rpn_reg_h', np.mean(rpn_stats[:, 8]), epoch) self.val_writer.add_scalar('rpn_reg_w', np.mean(rpn_stats[:, 9]), epoch) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) print('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.val_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.val_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.val_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.val_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.val_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.val_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) torch.cuda.empty_cache() def do_test(self): initial_checkpoint = self.cfg.testing['weight'] save_dir = self.cfg.testing['saver_dir'] if initial_checkpoint: print('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) # state = self.model.state_dict() # state.update(checkpoint['state_dict']) # self.model.load_state_dict(state) #,strict=False epoch = checkpoint['epoch'] else: print('No model weight file specified') return self.model.set_mode('test') self.model.use_rcnn = self.cfg.testing['use_rcnn'] # model_path = self.cfg.DEPLOY.TORCHSCRIPT_SAVE_PATH # print('[Loading torchscript from %s]' % model_path) # torchscript_model = torch.jit.load(model_path) self.test_dataset = self.DataReader(self.cfg, mode='test') if not os.path.exists(save_dir): os.makedirs(save_dir) res_dir = os.path.join(save_dir, str(epoch)) if not os.path.exists(res_dir): os.makedirs(res_dir) print('Total # of eval data %d' % (len(self.test_dataset))) for i, (input, image) in enumerate(self.test_dataset): # if i == 10: # break try: D, H, W = image.shape pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = pid.split('/')[-2].replace('.nii.gz', '') print('[%d] Predicting %s' % (i, pid), image.shape) with torch.no_grad(): input = input.cuda().unsqueeze(0) if self.cfg.testing['use_rcnn']: rpns, detections, ensembles = self.model.forward(input, []) else: rpns = self.model.forward(input, []) # image = np.expand_dims(image, 0) # image = np.expand_dims(image, 0) # image = torch.from_numpy(image).cuda() # rpns = torchscript_model(image.half()) rpns = rpns.cpu().numpy() if len(rpns): rpns = rpns[:, 1:] np.save(os.path.join(res_dir, '%s_rpns.npy' % (pid)), rpns) if self.cfg.testing['use_rcnn']: detections = detections.cpu().numpy() ensembles = ensembles.cpu().numpy() if len(detections): detections = detections[:, 0:-1] np.save(os.path.join(res_dir, '%s_rcnns.npy' % (pid)), detections) if len(ensembles): ensembles = ensembles[:, 1:] np.save(os.path.join(res_dir, '%s_ensembles.npy' % (pid)), ensembles) # Clear gpu memory torch.cuda.empty_cache() except Exception as e: torch.cuda.empty_cache() traceback.print_exc() self.npy2csv('rpns', res_dir) if self.cfg.testing['use_rcnn']: self.npy2csv('rcnns', res_dir) self.npy2csv('ensembles', res_dir) def deploy(self): self.model.eval() initial_checkpoint = self.cfg.testing['weight'] print('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) set_requires_grad(self.model) self.test_dataset = self.DataReader(self.cfg, mode='test') input_data = self.test_dataset.__getitem__(0)[1] input_data = np.expand_dims(input_data, 0) input_data = np.expand_dims(input_data, 0) print("input_data shape", input_data.shape) input_data = torch.from_numpy(input_data[:, :, :228, :226, :234]).cuda() # print(input_data.shape) # pth to torchscript input_data = input_data.half() self.model = self.model.half() print('1') traced_model = torch.jit.trace(self.model, (input_data)) print(traced_model.graph) model_path = self.cfg.deploy['torchscript_save_path'] os.makedirs(os.path.dirname(model_path), exist_ok=True) traced_model.save(model_path) def torchscript_test(self): i = 0 self.test_dataset = self.DataReader(self.cfg, mode='test') torch_input_data, torchscript_input = self.test_dataset.__getitem__(i) pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = pid.split('/')[-1].replace('.nii.gz', '') print("input_data shape", torchscript_input.shape) res_dir = self.cfg.deploy['torchscript_compare_res_dir'] initial_checkpoint = self.cfg.testing['weight'] print('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) self.model.set_mode('test') self.model.use_rcnn = self.cfg.testing['use_rcnn'] with torch.no_grad(): input_data = torch_input_data.cuda().unsqueeze(0) torch_res = self.model.forward(input_data, []) torch_res = torch_res.cpu().numpy() torch_res = torch_res[:, 1:] np.save(os.path.join(res_dir, '%s_torch_res.npy' % (pid)), torch_res) self.npy2csv('torch_res', res_dir) input_data = np.expand_dims(torchscript_input, 0) input_data = np.expand_dims(input_data, 0) input_data = torch.from_numpy(input_data).cuda() model_path = self.cfg.deploy['torchscript_save_path'] print('[Loading torchscript from %s]' % model_path) torchscript_model = torch.jit.load(model_path) torchscript_res = torchscript_model(input_data) torchscript_res = torchscript_res.cpu().numpy() torchscript_res = torchscript_res[:, 1:] np.save(os.path.join(res_dir, '%s_torchscript_res.npy' % (pid)), torchscript_res) self.npy2csv('torchscript_res', res_dir) def npy2csv(self, name, save_dir): bbox_border = self.cfg.data['data_process']['bbox_border'] res = [] for bbox in self.test_dataset.sample_bboxes: pid = bbox.get_field("filename").split('/')[-2].replace('.nii.gz', '') if os.path.exists(os.path.join(save_dir, '%s_' % (pid) + name + '.npy')): bboxs = np.load(os.path.join(save_dir, '%s_' % (pid) + name + '.npy')) bboxs[:, 4] = bboxs[:, 4] - bbox_border bboxs[:, 5] = bboxs[:, 5] - bbox_border bboxs[:, 6] = bboxs[:, 6] - bbox_border bboxs = bboxs[:, [3, 2, 1, 6, 5, 4, 0]] names = np.array([[pid]] * len(bboxs)) res.append(np.concatenate([names, bboxs], axis=1)) res = np.concatenate(res, axis=0) col_names = ['uid', 'x_px', 'y_px', 'z_px', 'diameter_x_px', 'diameter_y_px', 'diameter_z_px', 'probability'] submission_path = os.path.join(save_dir, 'submission_' + name + '.csv') df = pd.DataFrame(res, columns=col_names) df.to_csv(submission_path, index=False) def lr_shedule(self, epoch, batch_idx): warmup = self.cfg.training['sheduler']['warmup'] total = self.cfg.epochs if epoch < warmup: epoch += float(batch_idx + 1) / len(self.train_loader) lr_adj = 1. / self.cfg.world_size * (epoch * (self.cfg.world_size - 1) / warmup + 1) elif epoch <= total * 0.5: lr_adj = 1. elif epoch <= total * 0.8: lr_adj = 0.1 else: lr_adj = 0.01 for param_group in self.optimizer.param_groups: param_group['lr'] = self.cfg.lr * self.cfg.world_size * lr_adj def num_neg_shedule(self, epoch): total = self.cfg.epochs if epoch <= total * 0.2: self.model.rpn.num_neg = 800 elif epoch <= total * 0.5: self.model.rpn.num_neg = 1600 elif epoch <= total * 0.8: self.model.rpn.num_neg = 2000 else: self.model.rpn.num_neg = 3000 def rand_crop_ratio_shedule(self, epoch): def _set_dataset_r_rand(fraction): self.train_loader.dataset.r_rand = fraction self.train_loader.sampler.update_dataset(self.train_loader.dataset) if self.cfg.world_rank == 0: print('[RAND_CROP_RATIO_SHEDULE: length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) total = self.cfg.epochs if epoch <= total * 0.05: pass elif epoch <= total * 0.1: _set_dataset_r_rand(0.3) elif epoch <= total * 0.2: _set_dataset_r_rand(0.4) else: _set_dataset_r_rand(0.5) def set_requires_grad(nets, requires_grad=False): """Set requies_grad=Fasle for all the networks to avoid unnecessary computations Parameters: nets (network list) -- a list of networks requires_grad (bool) -- whether the networks require gradients or not """ if not isinstance(nets, list): nets = [nets] for net in nets: if net is not None: for param in net.parameters(): param.requires_grad = requires_grad # for name, param in net.named_parameters(): # print(name,param.size()) # raise