import math import os import random import sys import traceback import warnings import horovod.torch as hvd import numpy as np import pandas as pd import torch import torch.backends.cudnn as cudnn from apex import amp from tensorboardX import SummaryWriter from torch.autograd import Variable from torch.utils.data import DataLoader from tqdm import tqdm from ..data import BboxReader from ..data.collate import train_collate from ..modeling.detector.centernet import CenterNet from ..modeling.detector.nodulenet import NoduleNet warnings.filterwarnings("ignore") this_module = sys.modules[__name__] # Set seed SEED = 35202 random.seed(SEED) np.random.seed(SEED) torch.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) class DistributedSampler_new(torch.utils.data.distributed.DistributedSampler): def update_dataset(self, dataset): self.dataset = dataset self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) self.total_size = self.num_samples * self.num_replicas # Horovod: average metrics from distributed training. class Metric(object): def __init__(self, name): self.name = name self.sum = torch.tensor(0.) self.n = torch.tensor(0.) def update(self, val): val_cpu = val.detach().cpu() self.sum += hvd.allreduce(val_cpu, name=self.name) self.n += 1 @property def avg(self): return self.sum / self.n def save_checkpoint(epoch, frequency, model, optimizer, model_out_dir): state_dict = model.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() if epoch % frequency == 0: torch.save({ 'epoch': epoch, 'out_dir': model_out_dir, 'state_dict': state_dict, 'optimizer': optimizer.state_dict()}, os.path.join(model_out_dir, '%03d.pth' % epoch)) class BaseDetection3D(object): def __init__(self, cfg, mode='train'): """3d detector initialization Arguments: DetectorBase {[object]} -- detector class providing basic detector functions config {[dict]} -- [config dict with hyper parameters] Raises: ValueError: only valid frames are implemented """ # Init network if cfg.MODEL.META_ARCHITECTURE == 'NoduleNet': self.model = NoduleNet(cfg, mode).cuda() elif cfg.MODEL.META_ARCHITECTURE == 'CenterNet': self.model = CenterNet(cfg, mode).cuda() else: raise ValueError('Detector architecture %s is not implemented.' % cfg.MODEL.META_ARCHITECTURE) self.cfg = cfg self.DataReader = BboxReader self.DistributedSampler = DistributedSampler_new # 设置 torch.backends.cudnn.benchmark=True 将会让程序在开始时花费一点额外时间, # 为整个网络的每个卷积层搜索最适合它的卷积实现算法,进而实现网络的加速。 # 适用场景是网络结构固定(不是动态变化的),网络的输入形状(包括 batch size,图片大小,输入的通道)是不变的, # 其实也就是一般情况下都比较适用。反之,如果卷积层的设置一直变化,将会导致程序不停地做优化,反而会耗费更多的时间。 cudnn.benchmark = False def _create_dataloader(self): self.train_dataset = self.DataReader(self.cfg, mode='train') self.val_dataset = self.DataReader(self.cfg, mode='val') # Horovod: limit # of CPU threads to be used per worker. torch.set_num_threads(4) kwargs = {'num_workers': self.cfg.DATA.DATA_LOADER.NUM_WORKERS, 'pin_memory': True} # When supported, use 'forkserver' to spawn dataloader workers instead of 'fork' to prevent # issues with Infiniband implementations that are not fork-safe # if (kwargs.get('num_workers', 0) > 0 and hasattr(mp, '_supports_context') and # mp._supports_context and 'forkserver' in mp.get_all_start_methods()): # kwargs['multiprocessing_context'] = 'forkserver' # Partition dataset among workers using DistributedSampler self.train_sampler = self.DistributedSampler(self.train_dataset, num_replicas=hvd.size(), rank=hvd.rank()) self.val_sampler = self.DistributedSampler(self.val_dataset, num_replicas=hvd.size(), rank=hvd.rank()) self.train_loader = DataLoader(self.train_dataset, batch_size=self.cfg.TRAINING.BATCH_SIZE, collate_fn=train_collate, sampler=self.train_sampler, **kwargs) self.val_loader = DataLoader(self.val_dataset, batch_size=self.cfg.TRAINING.BATCH_SIZE, collate_fn=train_collate, sampler=self.val_sampler, **kwargs) def _create_optimizer(self): self.optimizer = getattr(torch.optim, self.cfg.TRAINING.SOLVER.OPTIMIZER) # Horovod: lr = base_lr * hvd.size # special_lr_layers = ['feature_net.back3.1.conv1.conv_offset.weight', # 'feature_net.back3.1.conv1.conv_offset.bias', # 'feature_net.back3.1.conv1.conv_mask.weight', # 'feature_net.back3.1.conv1.conv_mask.bias', # 'feature_net.back3.1.conv2.conv_offset.weight', # 'feature_net.back3.1.conv2.conv_offset.bias', # 'feature_net.back3.1.conv2.conv_mask.weight', # 'feature_net.back3.1.conv2.conv_mask.bias'] # special_params = list(map(lambda x: x[1],list(filter(lambda kv: kv[0] in special_lr_layers, self.model.named_parameters())))) # base_params = list(map(lambda x: x[1],list(filter(lambda kv: kv[0] not in special_lr_layers, self.model.named_parameters())))) # self.optimizer = self.optimizer([{"params": base_params}, # {"params": special_params, "lr": self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size() * 0.05}], # lr=self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size(), # weight_decay=self.cfg.TRAINING.SOLVER.WRIGHT_DECAY, momentum=self.cfg.TRAINING.SOLVER.MOMENTUM) if self.cfg.TRAINING.SOLVER.OPTIMIZER == 'Adam': self.optimizer = self.optimizer(self.model.parameters(), lr=self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size(), weight_decay=self.cfg.TRAINING.SOLVER.WRIGHT_DECAY) elif self.cfg.TRAINING.SOLVER.OPTIMIZER == 'SGD': self.optimizer = self.optimizer(self.model.parameters(), lr=self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size(), weight_decay=self.cfg.TRAINING.SOLVER.WRIGHT_DECAY, momentum=self.cfg.TRAINING.SOLVER.MOMENTUM) else: raise ValueError("Optimizer %s is not implemented." % self.cfg.TRAINING.SOLVER.OPTIMIZER) # Horovod: (optional) compression algorithm. compression = hvd.Compression.fp16 if self.cfg.TRAINING.FP16_ALLREDUCE else hvd.Compression.none # Horovod: Add Horovod Distributed Optimizer self.optimizer = hvd.DistributedOptimizer(self.optimizer, named_parameters=self.model.named_parameters(), compression=compression) self.optimizer._requires_update = set() # Horovod: load model only for rank 0 self.resume_from_epoch = 0 if self.cfg.MODEL.WEIGHT and hvd.rank() == 0: print('[Loading model from %s]' % self.cfg.MODEL.WEIGHT) checkpoint = torch.load(self.cfg.MODEL.WEIGHT) self.resume_from_epoch = checkpoint['epoch'] state = self.model.state_dict() state.update(checkpoint['state_dict']) self.model.load_state_dict(state) # , strict=False self.optimizer.load_state_dict(checkpoint['optimizer']) self.resume_from_epoch += 1 # Horovod: broadcast resume_from_epoch from rank 0 (which will have # checkpoints) to other ranks. self.resume_from_epoch = hvd.broadcast(torch.tensor(self.resume_from_epoch), root_rank=0, name='resume_from_epoch').item() # Horovod: Broadcast parameters from rank 0 to all other processes. hvd.broadcast_parameters(self.model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(self.optimizer, root_rank=0) if self.cfg.TRAINING.AMP: # Apex amp.register_float_function(torch, 'sigmoid') self.model, optimizer = amp.initialize(self.model, self.optimizer, opt_level="O1") # , keep_batchnorm_fp32=True # Horovod: print output only on first rank. if hvd.rank() == 0: self.model_out_dir = os.path.join(self.cfg.TRAINING.SAVER.SAVER_DIR, 'checkpoints') tb_out_dir = os.path.join(self.cfg.TRAINING.SAVER.SAVER_DIR, 'tensorboards') if not os.path.exists(self.model_out_dir): os.makedirs(self.model_out_dir) if not os.path.exists(tb_out_dir): os.makedirs(tb_out_dir) print('[start_epoch %d, out_dir %s]' % (self.resume_from_epoch, self.cfg.TRAINING.SAVER.SAVER_DIR)) print('[length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) # Write graph to tensorboard for visualization self.writer = SummaryWriter(tb_out_dir) if hvd.rank() == 0 else None self.train_writer = SummaryWriter(os.path.join(tb_out_dir, 'train')) if hvd.rank() == 0 else None self.val_writer = SummaryWriter(os.path.join(tb_out_dir, 'val')) if hvd.rank() == 0 else None def do_train(self): self.model.loss.cuda() self._create_dataloader() self._create_optimizer() verbose = 1 if hvd.rank() == 0 else 0 for i in range(self.resume_from_epoch, self.cfg.TRAINING.SHEDULER.TOTAL_EPOCHS): if self.cfg.TRAINING.SHEDULER.NUM_NEG_SHEDULE: self.num_neg_shedule(i) if self.cfg.TRAINING.SHEDULER.RAND_CROP_RATIO_SHEDULE: self.rand_crop_ratio_shedule(i) if i >= self.cfg.TRAINING.SHEDULER.SWITCH_ROI_EPOCH: self.model.use_rcnn = True self.model.loss.use_rcnn = True if self.cfg.DATA.DATA_LOADER.BALANCED_SAMPLING: self.train_loader.dataset.list_IDs = self.train_loader.dataset._get_list_ids() self.train_loader.sampler.update_dataset(self.train_loader.dataset) if i == self.cfg.TRAINING.SHEDULER.SWITCH_BALANCED_SAMPLING_EPOCH: self.cfg.DATA.DATA_LOADER.BALANCED_SAMPLING = True self.train_loader.dataset.balanced_sampling_flag = True self.train_loader.dataset.list_IDs = self.train_loader.dataset._get_list_ids() self.train_loader.sampler.update_dataset(self.train_loader.dataset) if hvd.rank() == 0: print('[length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) self.train(i, verbose) self.validate(i, verbose) if hvd.rank() == 0: save_checkpoint(i, self.cfg.TRAINING.SAVER.SAVER_FREQUENCY, self.model, self.optimizer, self.model_out_dir) if self.writer: self.writer.close() if self.train_writer: self.train_writer.close() if self.val_writer: self.val_writer.close() def train(self, epoch, verbose): self.train_sampler.set_epoch(epoch) self.model.set_mode('train') rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.train_loader), desc='Train Epoch #{}'.format(epoch), disable=not verbose) as t: for j, (input_data, truth_bboxes) in enumerate(self.train_loader): input_data = Variable(input_data).cuda() if self.cfg.TRAINING.SHEDULER.LR_SHEDULE: self.lr_shedule(epoch, j) self.optimizer.zero_grad() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) if self.cfg.TRAINING.AMP: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() self.optimizer.synchronize() with self.optimizer.skip_synchronize(): # torch.nn.utils.clip_grad_norm_(amp.master_params(self.optimizer), max_norm = 1.0, norm_type=2) self.optimizer.step() else: loss.backward() self.optimizer.step() rpn_cls_loss.update(rpn_cls_loss_) rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append(rpn_stat_) rcnn_stats.append(rcnn_stat_) t.set_postfix({'total_loss': total_loss.avg.item(), 'rpn_cls_loss': rpn_cls_loss.avg.item(), 'rpn_reg_loss': rpn_reg_loss.avg.item(), 'rcnn_cls_loss': rcnn_cls_loss.avg.item(), 'rcnn_reg_loss': rcnn_reg_loss.avg.item(), }) t.update(1) if self.train_writer: rpn_stats = np.asarray(rpn_stats, np.float32) print('Train Epoch %d, loss %f' % (epoch, total_loss.avg.item())) print('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) print('rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.train_writer.add_scalar('loss', total_loss.avg, epoch) self.train_writer.add_scalar('rpn_cls', rpn_cls_loss.avg, epoch) self.train_writer.add_scalar('rpn_reg', rpn_reg_loss.avg, epoch) self.train_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.train_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) self.train_writer.add_scalar('rpn_reg_z', np.mean(rpn_stats[:, 4]), epoch) self.train_writer.add_scalar('rpn_reg_y', np.mean(rpn_stats[:, 5]), epoch) self.train_writer.add_scalar('rpn_reg_x', np.mean(rpn_stats[:, 6]), epoch) self.train_writer.add_scalar('rpn_reg_d', np.mean(rpn_stats[:, 7]), epoch) self.train_writer.add_scalar('rpn_reg_h', np.mean(rpn_stats[:, 8]), epoch) self.train_writer.add_scalar('rpn_reg_w', np.mean(rpn_stats[:, 9]), epoch) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) print('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.train_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.train_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.train_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.train_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.train_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.train_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) torch.cuda.empty_cache() def validate(self, epoch, verbose): # self.val_sampler.set_epoch(epoch) self.model.set_mode('valid') rpn_cls_loss = Metric('rpn_cls_loss') rpn_reg_loss = Metric('rpn_reg_loss') rcnn_cls_loss = Metric('rcnn_cls_loss') rcnn_reg_loss = Metric('rcnn_reg_loss') total_loss = Metric('train_loss') rpn_stats = [] rcnn_stats = [] with tqdm(total=len(self.val_loader), desc='Validate Epoch #{}'.format(epoch), disable=not verbose) as t: for j, (input_data, truth_bboxes) in enumerate(self.val_loader): with torch.no_grad(): input_data = Variable(input_data).cuda() rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, \ rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets = self.model(input_data, truth_bboxes) loss, [rpn_cls_loss_, rpn_reg_loss_, rcnn_cls_loss_, rcnn_reg_loss_], rpn_stat_, rcnn_stat_ = \ self.model.loss(rpn_logits, rpn_deltas, rpn_labels, rpn_label_weights, rpn_targets, rpn_targets_weights, rcnn_logits, rcnn_deltas, rcnn_labels, rcnn_targets) rpn_cls_loss.update(rpn_cls_loss_) # if torch.is_nonzero(rpn_reg_loss_): rpn_reg_loss.update(rpn_reg_loss_) rcnn_cls_loss.update(rcnn_cls_loss_) rcnn_reg_loss.update(rcnn_reg_loss_) total_loss.update(loss) rpn_stats.append(rpn_stat_) rcnn_stats.append(rcnn_stat_) if self.val_writer: rpn_stats = np.asarray(rpn_stats, np.float32) print('Val Epoch %d, loss %f' % (epoch, total_loss.avg.item())) print('rpn_cls %f, rpn_reg %f, rcnn_cls %f, rcnn_reg %f' % ( rpn_cls_loss.avg.item(), rpn_reg_loss.avg.item(), rcnn_cls_loss.avg.item(), rcnn_reg_loss.avg.item())) print('rpn_stats: tpr %f, tnr %f, total pos %d, total neg %d, reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( 100.0 * np.sum(rpn_stats[:, 0]) / np.sum(rpn_stats[:, 1]), 100.0 * np.sum(rpn_stats[:, 2]) / np.sum(rpn_stats[:, 3]), np.sum(rpn_stats[:, 1]), np.sum(rpn_stats[:, 3]), np.mean(rpn_stats[:, 4]), np.mean(rpn_stats[:, 5]), np.mean(rpn_stats[:, 6]), np.mean(rpn_stats[:, 7]), np.mean(rpn_stats[:, 8]), np.mean(rpn_stats[:, 9]))) # Write to tensorboard self.val_writer.add_scalar('loss', total_loss.avg, epoch) self.val_writer.add_scalar('rpn_cls', rpn_cls_loss.avg, epoch) self.val_writer.add_scalar('rpn_reg', rpn_reg_loss.avg, epoch) self.val_writer.add_scalar('rcnn_cls', rcnn_cls_loss.avg, epoch) self.val_writer.add_scalar('rcnn_reg', rcnn_reg_loss.avg, epoch) self.val_writer.add_scalar('rpn_reg_z', np.mean(rpn_stats[:, 4]), epoch) self.val_writer.add_scalar('rpn_reg_y', np.mean(rpn_stats[:, 5]), epoch) self.val_writer.add_scalar('rpn_reg_x', np.mean(rpn_stats[:, 6]), epoch) self.val_writer.add_scalar('rpn_reg_d', np.mean(rpn_stats[:, 7]), epoch) self.val_writer.add_scalar('rpn_reg_h', np.mean(rpn_stats[:, 8]), epoch) self.val_writer.add_scalar('rpn_reg_w', np.mean(rpn_stats[:, 9]), epoch) if self.model.use_rcnn: rcnn_stats = np.asarray([stat[:-1] for stat in rcnn_stats], np.float32) print('rcnn_stats: reg %.4f, %.4f, %.4f, %.4f, %.4f, %.4f' % ( np.mean(rcnn_stats[:, 0]), np.mean(rcnn_stats[:, 1]), np.mean(rcnn_stats[:, 2]), np.mean(rcnn_stats[:, 3]), np.mean(rcnn_stats[:, 4]), np.mean(rcnn_stats[:, 5]))) self.val_writer.add_scalar('rcnn_reg_z', np.mean(rcnn_stats[:, 0]), epoch) self.val_writer.add_scalar('rcnn_reg_y', np.mean(rcnn_stats[:, 1]), epoch) self.val_writer.add_scalar('rcnn_reg_x', np.mean(rcnn_stats[:, 2]), epoch) self.val_writer.add_scalar('rcnn_reg_d', np.mean(rcnn_stats[:, 3]), epoch) self.val_writer.add_scalar('rcnn_reg_h', np.mean(rcnn_stats[:, 4]), epoch) self.val_writer.add_scalar('rcnn_reg_w', np.mean(rcnn_stats[:, 5]), epoch) torch.cuda.empty_cache() def do_test(self): initial_checkpoint = self.cfg.TESTING.WEIGHT save_dir = self.cfg.TESTING.SAVER_DIR if initial_checkpoint: print('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) # state = self.model.state_dict() # state.update(checkpoint['state_dict']) # self.model.load_state_dict(state) #,strict=False epoch = checkpoint['epoch'] else: print('No model weight file specified') return self.model.set_mode('test') self.model.use_rcnn = self.cfg.TESTING.USE_RCNN # model_path = self.cfg.DEPLOY.TORCHSCRIPT_SAVE_PATH # print('[Loading torchscript from %s]' % model_path) # torchscript_model = torch.jit.load(model_path) self.test_dataset = self.DataReader(self.cfg, mode='test') if not os.path.exists(save_dir): os.makedirs(save_dir) res_dir = os.path.join(save_dir, str(epoch)) if not os.path.exists(res_dir): os.makedirs(res_dir) print('Total # of eval data %d' % (len(self.test_dataset))) for i, (input, image) in enumerate(self.test_dataset): # if i == 10: # break try: D, H, W = image.shape pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = pid.split('/')[-2].replace('.nii.gz', '') print('[%d] Predicting %s' % (i, pid), image.shape) with torch.no_grad(): input = input.cuda().unsqueeze(0) if self.cfg.TESTING.USE_RCNN: rpns, detections, ensembles = self.model.forward(input, []) else: rpns = self.model.forward(input, []) # image = np.expand_dims(image, 0) # image = np.expand_dims(image, 0) # image = torch.from_numpy(image).cuda() # rpns = torchscript_model(image.half()) rpns = rpns.cpu().numpy() if len(rpns): rpns = rpns[:, 1:] np.save(os.path.join(res_dir, '%s_rpns.npy' % (pid)), rpns) if self.cfg.TESTING.USE_RCNN: detections = detections.cpu().numpy() ensembles = ensembles.cpu().numpy() if len(detections): detections = detections[:, 0:-1] np.save(os.path.join(res_dir, '%s_rcnns.npy' % (pid)), detections) if len(ensembles): ensembles = ensembles[:, 1:] np.save(os.path.join(res_dir, '%s_ensembles.npy' % (pid)), ensembles) # Clear gpu memory torch.cuda.empty_cache() except Exception as e: torch.cuda.empty_cache() traceback.print_exc() self.npy2csv('rpns', res_dir) if self.cfg.TESTING.USE_RCNN: self.npy2csv('rcnns', res_dir) self.npy2csv('ensembles', res_dir) def deploy(self): self.model.eval() initial_checkpoint = self.cfg.TESTING.WEIGHT print('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) set_requires_grad(self.model) self.test_dataset = self.DataReader(self.cfg, mode='test') input_data = self.test_dataset.__getitem__(0)[1] input_data = np.expand_dims(input_data, 0) input_data = np.expand_dims(input_data, 0) print("input_data shape", input_data.shape) input_data = torch.from_numpy(input_data[:, :, :228, :226, :234]).cuda() # print(input_data.shape) # pth to torchscript input_data = input_data.half() self.model = self.model.half() print('1') traced_model = torch.jit.trace(self.model, (input_data)) print(traced_model.graph) model_path = self.cfg.DEPLOY.TORCHSCRIPT_SAVE_PATH os.makedirs(os.path.dirname(model_path), exist_ok=True) traced_model.save(model_path) def torchscript_test(self): i = 0 self.test_dataset = self.DataReader(self.cfg, mode='test') torch_input_data, torchscript_input = self.test_dataset.__getitem__(i) pid = self.test_dataset.sample_bboxes[i].get_field("filename") pid = pid.split('/')[-1].replace('.nii.gz', '') print("input_data shape", torchscript_input.shape) res_dir = self.cfg.DEPLOY.TORCHSCRIPT_COMPARE_RES_DIR initial_checkpoint = self.cfg.TESTING.WEIGHT print('[Loading model from %s]' % initial_checkpoint) checkpoint = torch.load(initial_checkpoint) self.model.load_state_dict(checkpoint['state_dict']) self.model.set_mode('test') self.model.use_rcnn = self.cfg.TESTING.USE_RCNN with torch.no_grad(): input_data = torch_input_data.cuda().unsqueeze(0) torch_res = self.model.forward(input_data, []) torch_res = torch_res.cpu().numpy() torch_res = torch_res[:, 1:] np.save(os.path.join(res_dir, '%s_torch_res.npy' % (pid)), torch_res) self.npy2csv('torch_res', res_dir) input_data = np.expand_dims(torchscript_input, 0) input_data = np.expand_dims(input_data, 0) input_data = torch.from_numpy(input_data).cuda() model_path = self.cfg.DEPLOY.TORCHSCRIPT_SAVE_PATH print('[Loading torchscript from %s]' % model_path) torchscript_model = torch.jit.load(model_path) torchscript_res = torchscript_model(input_data) torchscript_res = torchscript_res.cpu().numpy() torchscript_res = torchscript_res[:, 1:] np.save(os.path.join(res_dir, '%s_torchscript_res.npy' % (pid)), torchscript_res) self.npy2csv('torchscript_res', res_dir) def npy2csv(self, name, save_dir): bbox_border = self.cfg.DATA.DATA_PROCESS.BBOX_BORDER res = [] for bbox in self.test_dataset.sample_bboxes: pid = bbox.get_field("filename").split('/')[-2].replace('.nii.gz', '') if os.path.exists(os.path.join(save_dir, '%s_' % (pid) + name + '.npy')): bboxs = np.load(os.path.join(save_dir, '%s_' % (pid) + name + '.npy')) bboxs[:, 4] = bboxs[:, 4] - bbox_border bboxs[:, 5] = bboxs[:, 5] - bbox_border bboxs[:, 6] = bboxs[:, 6] - bbox_border bboxs = bboxs[:, [3, 2, 1, 6, 5, 4, 0]] names = np.array([[pid]] * len(bboxs)) res.append(np.concatenate([names, bboxs], axis=1)) res = np.concatenate(res, axis=0) col_names = ['uid', 'x_px', 'y_px', 'z_px', 'diameter_x_px', 'diameter_y_px', 'diameter_z_px', 'probability'] submission_path = os.path.join(save_dir, 'submission_' + name + '.csv') df = pd.DataFrame(res, columns=col_names) df.to_csv(submission_path, index=False) def lr_shedule(self, epoch, batch_idx): warmup = self.cfg.TRAINING.SHEDULER.WARMUP total = self.cfg.TRAINING.SHEDULER.TOTAL_EPOCHS if epoch < warmup: epoch += float(batch_idx + 1) / len(self.train_loader) lr_adj = 1. / hvd.size() * (epoch * (hvd.size() - 1) / warmup + 1) elif epoch <= total * 0.5: lr_adj = 1. elif epoch <= total * 0.8: lr_adj = 0.1 else: lr_adj = 0.01 for param_group in self.optimizer.param_groups: param_group['lr'] = self.cfg.TRAINING.SOLVER.BASE_LR * hvd.size() * lr_adj def num_neg_shedule(self, epoch): total = self.cfg.TRAINING.SHEDULER.TOTAL_EPOCHS if epoch <= total * 0.2: self.model.rpn.num_neg = 800 elif epoch <= total * 0.5: self.model.rpn.num_neg = 1600 elif epoch <= total * 0.8: self.model.rpn.num_neg = 2000 else: self.model.rpn.num_neg = 3000 def rand_crop_ratio_shedule(self, epoch): def _set_dataset_r_rand(fraction): self.train_loader.dataset.r_rand = fraction self.train_loader.sampler.update_dataset(self.train_loader.dataset) if hvd.rank() == 0: print('[RAND_CROP_RATIO_SHEDULE: length of train loader %d, length of valid loader %d]' % ( len(self.train_loader), len(self.val_loader))) total = self.cfg.TRAINING.SHEDULER.TOTAL_EPOCHS if epoch <= total * 0.05: pass elif epoch <= total * 0.1: _set_dataset_r_rand(0.3) elif epoch <= total * 0.2: _set_dataset_r_rand(0.4) else: _set_dataset_r_rand(0.5) def set_requires_grad(nets, requires_grad=False): """Set requies_grad=Fasle for all the networks to avoid unnecessary computations Parameters: nets (network list) -- a list of networks requires_grad (bool) -- whether the networks require gradients or not """ if not isinstance(nets, list): nets = [nets] for net in nets: if net is not None: for param in net.parameters(): param.requires_grad = requires_grad # for name, param in net.named_parameters(): # print(name,param.size()) # raise