import torch import copy import random import numpy as np import torch.nn.functional as F from torch.autograd import Variable from util import box_transform_inv_numpy, clip_boxes_numpy,box_transform_numpy, convert_xyxyzz from layers.overlap_3d import overlap_3d class BoxModule(object): def __init__(self, cfg): self.cfg = cfg self.pre_nms_score_threshold = cfg.MODEL.ROI_BOX_HEAD.TEST_PRE_NMS_SCORE_THRESH self.num_class = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASS self.box_reg_weight = cfg.MODEL.BBOX_REG_WEIGHT self.batch_size = cfg.MODEL.ROI_BOX_HEAD.ROI_BATCH_SIZE self.box_train_bg_thresh_high = cfg.MODEL.ROI_BOX_HEAD.BG_THRESH_HIGH self.box_train_fg_thresh_low = cfg.MODEL.ROI_BOX_HEAD.FG_THRESH_LOW self.fg_fraction = cfg.MODEL.ROI_BOX_HEAD.FG_FRACTION def loss(self, logits, deltas, labels, targets, deltas_sigma=1.0): batch_size, num_class = logits.size(0),logits.size(1) # Weighted cross entropy for imbalance class distribution weight = torch.ones(num_class).cuda() total = len(labels) for i in range(num_class): num_pos = float((labels == i).sum()) num_pos = max(num_pos, 1) weight[i] = total / num_pos weight = weight / weight.sum() rcnn_cls_loss = F.cross_entropy(logits, labels, weight=weight, size_average=True) # If multi-class classification, compute the confusion metric to understand the mistakes confusion_matrix = np.zeros((num_class, num_class)) probs = F.softmax(logits, dim=1) v, cat = torch.max(probs, dim=1) for i in labels.nonzero(): i = i.item() confusion_matrix[labels.long().detach()[i].item()][cat[i].detach().item()] += 1 num_pos = len(labels.nonzero()) if num_pos > 0: # one hot encode select = Variable(torch.zeros((batch_size,num_class))).cuda() select.scatter_(1, labels.view(-1,1), 1) select[:,0] = 0 select = select.view(batch_size,num_class, 1).expand((batch_size, num_class, 6)).contiguous().bool() deltas = deltas.view(batch_size, num_class, 6) deltas = deltas[select].view(-1, 6) rcnn_reg_loss = 0 reg_losses = [] for i in range(6): l = F.smooth_l1_loss(deltas[:, i], targets[:, i]) rcnn_reg_loss += l reg_losses.append(l.data.item()) else: rcnn_reg_loss = Variable(torch.cuda.FloatTensor(1).zero_()).sum() reg_losses = [Variable(torch.cuda.FloatTensor(1).zero_()) for i in range(6)] return rcnn_cls_loss, rcnn_reg_loss, [reg_losses[0], reg_losses[1], reg_losses[2], reg_losses[3], reg_losses[4], reg_losses[5], confusion_matrix] def get_probability(self, inputs, proposals, logits, deltas): if logits is not None: probs = F.softmax(logits).cpu().data.numpy() deltas = deltas.cpu().data.numpy().reshape(-1, self.num_class, 6) proposals = proposals.cpu().data.numpy() else: raise NotImplementedError for j in range(1, self.num_class): # skip background idx = np.where(probs[:, j] > self.nms_pre_score_threshold)[0] if len(idx) > 0: p = probs[idx, j].reshape(-1, 1) d = deltas[idx, j] box = box_transform_inv_numpy(proposals[idx, 2:8], d, self.box_reg_weight) box = clip_boxes_numpy(box, inputs.shape[2:]) js = np.expand_dims(np.array([j] * len(p)), axis=-1) output = np.concatenate((p, box, js), 1) return torch.from_numpy(output).cuda().float() def make_one_box_target(proposal, truth_box): sampled_proposal = torch.zeros((0, 8)).float().cuda() sampled_label = torch.zeros((0)).long().cuda() sampled_assign = np.zeros((0, 1), dtype=np.int32) - 1 sampled_target = torch.zeros((0, 6)).float().cuda() # Even if there is no ground truth box in this batch if len(proposal) == 0: # print('[RCNN] No proposal') return sampled_proposal, sampled_label, sampled_assign, sampled_target if len(truth_box) == 0: # print('[RCNN] No truth_box') num_bg = min(len(proposal), self.batch_size) bg_length = len(proposal) bg_index = np.arange(len(proposal)) bg_index = bg_index[ np.random.choice(bg_length, size=num_bg, replace=bg_length= self.box_train_fg_thresh_low)[0] bg_index = np.where(max_overlap < self.box_train_bg_thresh_high)[0] # sampling for class balance num = self.batch_size num_fg = int(np.round(self.fg_fraction * num)) fg_length = len(fg_index) bg_length = len(bg_index) #print(fg_inds_length) sampled_assign = argmax_overlap[fg_index] # Need to consider four cases, corner cases if fg_length > 0 and bg_length > 0: idx = [] idx = random.sample(range(len(fg_index)), min(num_fg, len(fg_index))) fg_index = fg_index[idx] num_fg = len(fg_index) # print('[RCNN] has fgs and bgs, num_fg, num_bg, num_proposal',fg_length,bg_length,num_proposal) num_bg = num - num_fg bg_index = bg_index[ np.random.choice(bg_length, size=num_bg, replace=bg_length 0: #no bgs idx = [] idx = random.sample(range(len(fg_index)), min(num_fg, len(fg_index))) fg_index = fg_index[idx] num_fg = len(fg_index) num = num_fg num_bg = 0 # print('[RCNN] No bgs, num_fg, num_bg, num_proposal',fg_length, bg_length,num_proposal) # print('truth_box', truth_box) # print('box',box) elif bg_length > 0: #no fgs # print('[RCNN] No fgs, num_fg, num_bg, num_proposal',fg_length, bg_length,num_proposal) # print(truth_box) # print('---------------------------') # print(proposal) num_fg = 0 num_bg = num bg_index = bg_index[ np.random.choice(bg_length, size=num_bg, replace=bg_length0: target_truth_box = truth_box[sampled_assign[:num_fg], :] if len(target_truth_box.shape) < 2: # one dimension lost after slicing target_truth_box = target_truth_box[np.newaxis, ...] target_box = sampled_proposal[:num_fg,:][:, 2:8] sampled_target = box_transform_numpy(target_box, target_truth_box, self.box_reg_weight) sampled_target = Variable(torch.from_numpy(sampled_target)).float().cuda() sampled_label = Variable(torch.from_numpy(sampled_label)).long().cuda() sampled_proposal = Variable(torch.from_numpy(sampled_proposal)).cuda() return sampled_proposal, sampled_label, sampled_assign, sampled_target def make_box_target(self,proposals, truth_boxes): truth_boxes = copy.deepcopy(truth_boxes) batch_size = len(truth_boxes) for b in range(batch_size): truth_boxes [b] = truth_boxes [b] proposals = proposals.cpu().data.numpy() sampled_proposals = [] sampled_labels = [] sampled_assigns = [] sampled_targets = [] batch_size = len(truth_boxes) for b in range(batch_size): truth_box = truth_boxes[b] if len(proposals) == 0: proposal = np.zeros((0, 8),np.float32) else: proposal = proposals[proposals[:,0] == b] # Add ground truth box to proposal, so that even if the RPN branch fails to find something, # we can still get classification branch to work # proposal = select_proposal_by_diameter(proposal,23) proposal = add_truth_box_to_proposal(cfg, proposal, b, truth_box) sampled_proposal, sampled_label, _, sampled_target = \ self.make_one_box_target(cfg, proposal, truth_box) sampled_proposals.append(sampled_proposal) sampled_labels.append(sampled_label) sampled_targets.append(sampled_target) sampled_proposals = torch.cat(sampled_proposals, 0) sampled_labels = torch.cat(sampled_labels, 0) sampled_targets = torch.cat(sampled_targets, 0) return sampled_proposals, sampled_labels, sampled_targets def select_proposal_by_diameter(proposal,diameter_thre): if len(proposal) > 0: proposal_idx = np.amax(proposal[:,6:],1)>=diameter_thre # max(diameter_y,diameter_x) # print('proposal_idx',proposal_idx) proposal = proposal[proposal_idx] return proposal def add_truth_box_to_proposal(cfg, proposal, b, truth_box, score=1): # if len(truth_box) !=0: # truth_box_idx = np.amax(truth_box[:,3:],1)>=23 # truth_box = truth_box[truth_box_idx] if len(truth_box) !=0: truth = np.zeros((len(truth_box), 8),np.float32) truth[:, 0] = b truth[:, 1] = score #1 # truth[:, 2:8] = truth_box else: truth = np.zeros((0, 8),np.float32) sampled_proposal = np.vstack([proposal,truth]) return sampled_proposal