import os
import sys
import glob
import numpy as np
import pandas as pd

import keras
import tensorflow as tf
from keras import Model
from keras import backend as K
from keras.engine import Layer,InputSpec
from keras.layers.merge import concatenate
from InstanceNorm import InstanceNormalization
from keras.callbacks import TensorBoard,Callback
from keras.layers.advanced_activations import LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras import initializers, regularizers, constraints,optimizers
from keras.callbacks import ModelCheckpoint, LearningRateScheduler,TensorBoard
from keras.layers import Add,Input,Conv3D,Convolution3D,Dropout,UpSampling3D,Concatenate,MaxPooling3D,\
GlobalAveragePooling3D,Dense,GlobalMaxPooling3D,Lambda,Activation,Reshape,Permute, PReLU, Deconvolution3D,Multiply,GlobalAveragePooling3D,Dense


sys.path.append('../baseLayers/')
from CommonLayers import NormActi,ConvUnit


def BasicBlock(x,**kwargs):
    block_pre = kwargs.get('block_pre')
    block_number = kwargs.get('block_number')
    num_filters = kwargs.get('num_filters')
    kernel_size = kwargs.get('kernel_size')
    downsample = kwargs.get('downsample')
    leaky_ratio = kwargs.get('leaky_ratio',0.01)
    norm_func = kwargs.get('norm_func',BatchNormalization)
    expansion = kwargs.get('expansion',1)
    stride = kwargs.get('stride',1)
    activation_func = kwargs.get('activation_func')
    atrous_rate = kwargs.get('atrous_rate',1)
    padding = kwargs.get('padding','same')
    kernel_initializer = kwargs.get('kernel_initializer','he_normal')
    kernel_regularizer = kwargs.get('kernel_regularizer',None)
    

    residual = x
    
    block_prefix = block_pre+'_block%02d'%block_number
        
    kernel_sizes = [kernel_size,kernel_size]
    num_filter_list = [num_filters,num_filters]
    strides = [1,stride]
    
    out_result = x
    for idx in range(2):
        out_result = ConvUnit(out_result,norm_func=norm_func,activation_func=activation_func,num_filters=num_filter_list[idx],conv_stride=strides[idx],
                          kernel_size=kernel_sizes[idx],atrous_rate=atrous_rate,padding=padding,block_prefix=block_prefix,
                        kernel_initializer=kernel_initializer,kernel_regularizer=kernel_regularizer,
                        layer_idx=idx+1,conv_first=True)
        
    
    if len(downsample)>0:
        for idx in range(len(downsample)):
            x = downsample[idx](x)
        residual = x
    
    residual = keras.layers.add([residual, out_result])
    residual = LeakyReLU(alpha=leaky_ratio, name='%s_relu%d_2'%(block_pre,block_number))(residual)
    return residual


def Bottleneck(x,**kwargs):
    '''
    input_tensor.shape[-1] should be 4 times as output_tensor.shape[-1]
    '''
    block_pre = kwargs.get('block_pre')
    block_number = kwargs.get('block_number')
    num_filters = kwargs.get('num_filters')
    kernel_size = kwargs.get('kernel_size')
    downsample = kwargs.get('downsample')
    leaky_ratio = kwargs.get('leaky_ratio',0.01)
    norm_func = kwargs.get('norm_func',BatchNormalization)
    expansion = kwargs.get('expansion',4)
    stride = kwargs.get('stride',1)
    activation_func = kwargs.get('activation_func')
    atrous_rate = kwargs.get('atrous_rate',1)
    padding = kwargs.get('padding','same')
    kernel_initializer = kwargs.get('kernel_initializer','he_normal')
    kernel_regularizer = kwargs.get('kernel_regularizer',None)
    
    residual = x
    block_prefix = block_pre+'_block%02d'%block_number
        
    kernel_sizes = [1,kernel_size,kernel_size]
    num_filter_list = [num_filters,num_filters,num_filters*expansion]
    strides = [1,stride,1]
    
    out_result = x
    for idx in range(3):
        out_result = ConvUnit(out_result,norm_func=norm_func,activation_func=activation_func,num_filters=num_filter_list[idx],conv_stride=strides[idx],
                          kernel_size=kernel_sizes[idx],atrous_rate=atrous_rate,padding=padding,block_prefix=block_prefix,
                        kernel_initializer=kernel_initializer,kernel_regularizer=kernel_regularizer,
                        layer_idx=idx+1,conv_first=True)
    
        
    if len(downsample)>0:
        for idx in range(len(downsample)):
            x = downsample[idx](x)
        residual = x
    
    residual = keras.layers.add([residual, out_result])
    residual = LeakyReLU(alpha=leaky_ratio,name='%s_relu%d_4'%(block_pre,block_number))(residual)
    
    return residual


blocks_dict = {
    'BASIC': BasicBlock,
    'BOTTLENECK': Bottleneck
}

expansion_dict = {
    'BASIC': 1,
    'BOTTLENECK': 4
}


class HighResolutionModule():
    def __init__(self,**kwargs):
        self.num_inchannels = kwargs.get('num_inchannels')
        self.fuse_method = kwargs.get('fuse_method')
        self.num_branches = kwargs.get('num_branches')
        self.multi_scale_output = kwargs.get('multi_scale_output')
        self.num_blocks = kwargs.get('num_blocks')
        self.block = kwargs.get('block')
        self.stage_num = kwargs.get('stage_num')
        self.kernel_size = kwargs.get('kernel_size',3)
        self.leaky_ratio = kwargs.get('leaky_ratio',0.01)
        self.norm_func = kwargs.get('norm_func',BatchNormalization)
        self.num_channels = kwargs.get('num_channels')
        self.activation_func = LeakyReLU
        self.padding = 'same'
        self.kernel_initializer = 'he_normal'
        self.kernel_regularizer = None
        

        self.branches = []
    
    def _make_one_branch(self,input_tensor,branch_index,block,num_blocks,num_channels,stride=1):
        '''
        make a single branch()
        
        repeats block_func num_blocks[branch_index] times. Shape of output depends on num_channels[branch_index]
        
        :param input_tensor. tensor
        :param branch_index. int. index of current branch
        :param block. func. represents block func current tensor will go through
        :param num_blocks. list of int
        :param num_channels. list of int. represents num_channels of output tensor of current branch
        :param stride. int
        
        :output. tensor
        '''
        downsample = []
        prestr = 'HRmakestage_stage%02d'%self.stage_num
        if stride != 1 or self.num_inchannels[branch_index] != num_channels[branch_index]:
            '''
            Do downsample if unmatch exists in any dimension
            self.num_inchannles[branch_index] represents 
            num_channels[branch_index]
            '''            
            block_number = 1
            downsample.append(Conv3D(filters = num_channels[branch_index], kernel_size = 1, strides = stride, 
                               padding = 'same', name='%s_downsample_conv%d_1'%(prestr,branch_index), 
                               kernel_initializer='he_normal'))
            downsample.append(self.norm_func(name='%s_downsample_bn%d_1'%(prestr,block_number)))
                
        
        result = input_tensor
        
        result = block(result,block_pre = prestr+'_blockfunc_%d'%branch_index, block_number = 1,num_filters = num_channels[branch_index],
                      kernel_size = self.kernel_size,downsample=downsample,leaky_ratio = self.leaky_ratio, nurm_func = self.norm_func,
                      stride = stride)
        
        self.num_inchannels[branch_index] = num_channels[branch_index]
        
        for block_idx in range(1,num_blocks[branch_index]):
            result = block(result,block_pre = prestr+'_blockfunc_%d'%branch_index, block_number = block_idx+1,num_filters = num_channels[branch_index],
                      kernel_size = self.kernel_size,downsample=[],leaky_ratio = self.leaky_ratio, nurm_func = self.norm_func)
        
        return result
        
    def make_branches(self,input_tensors,num_branches,block,num_blocks,num_channels):
        '''
        Generate list of tensors. Each one represents output tensor of a single branch
        for each branch, call make_one_branch func
        shape of each tensor in the return list only have diff in channel dimension
        
        :param input_tensors. list of tensors

        :param block. func. represents block func current tensor will go through
        :param num_blocks. ilst of int
        :param num_channels. list of int. represents num_channels of output tensor of current branch
        :param stride. int
        
        :output. tensor
    
        '''
        self.branches = []
        for branch_index in range(num_branches):
            self.branches.append(self._make_one_branch(input_tensors[branch_index],branch_index,block,num_blocks,num_channels))
        
            
    def make_fuse_layers(self):
        '''
        Generator list of list of tensors
        shape of return tensor should have shape [self.branch_index_limit,self.num_branches]
        tensors in the same row should have same size(can be added directly)
        '''
        fuse_result = []
        ### todo : check this part
        if self.num_branches == 1:
            return fuse_result
        num_branches = self.num_branches
        num_inchannels = self.num_inchannels
        input_tensors = self.branches
        
        
        for output_branch_index in range(self.branch_index_limit):
            fuse_tensors = []
            for input_branch_index in range(num_branches):
                ##### for each input tensor
                block_pre = 'HRmakestage_fuse_stage%02d_%02d_%02d'%(self.stage_num,input_branch_index,output_branch_index)
                current_input_tensor = input_tensors[input_branch_index]
                
                if input_branch_index>output_branch_index:
 
                    '''
                    use 1*1 conv to have same output-channel
                    use upsample to have same space dimension size()
                    '''
                    current_tensor = ConvUnit(current_input_tensor,norm_func=self.norm_func,activation_func=self.activation_func,
                                              num_filters=num_inchannels[output_branch_index],
                                                kernel_size=1,atrous_rate=1,padding=self.padding,block_prefix=block_pre,
                                                kernel_initializer=self.kernel_initializer,kernel_regularizer=self.kernel_regularizer,
                                                layer_idx=1,conv_first=True)
                    current_tensor = UpSampling3D(size=2**(input_branch_index-output_branch_index),name='%s_upsample'%block_pre)(current_tensor)
                    fuse_tensors.append(current_tensor)
                elif output_branch_index == input_branch_index:
                    #no func 
                    current_tensor = current_input_tensor
                else:
                    '''
                    do downsample
                    '''
                    current_tensor = current_input_tensor
                    for branch_distance in range(output_branch_index-input_branch_index):
                        '''
                        only change channel of tensor at last branch
                        '''
                        if branch_distance == output_branch_index - input_branch_index - 1:
                            num_outchannels_conv3x3 = num_inchannels[output_branch_index]
                        else:
                            num_outchannels_conv3x3 = num_inchannels[input_branch_index]
                        
                        current_tensor = ConvUnit(current_tensor,norm_func=self.norm_func,activation_func=self.activation_func,
                                              num_filters=num_outchannels_conv3x3,conv_stride=2,
                                                kernel_size=self.kernel_size,atrous_rate=1,padding=self.padding,block_prefix=block_pre,
                                                kernel_initializer=self.kernel_initializer,kernel_regularizer=self.kernel_regularizer,
                                                layer_idx=branch_distance,conv_first=True)

                fuse_tensors.append(current_tensor)
            fuse_result.append(fuse_tensors)
        
        return fuse_result
    #
    def get_module(self,input_tensors):

        '''
        Generator self.branch_index_limit tensors with input_tensors
        '''
        '''
        self.make_branches
        
        input0 -- convBlock*N0 --- output0
        input1 -- convBlock*N1 --- output1
        input2 -- convBlock*N2 --- output2
        add conv blocks to each input in input_tensors in parallel
        
        '''
        self.make_branches(input_tensors,self.num_branches,self.block,self.num_blocks,self.num_channels)
        if self.num_branches == 1:
            return self.branches[0]
        ### self.branch_index_limit defines how many tensors should be feed to next stage
        self.branch_index_limit = self.num_branches
#         if not self.multi_scale_output:
#             self.branch_index_limit = 1
        fuse_result = self.make_fuse_layers()
            
        output_tensors = []
        for output_branch_index in range(self.branch_index_limit):
            if output_branch_index == 0:
                current_tensor = self.branches[0]
            else:
                current_tensor = fuse_result[output_branch_index][0]
            for input_branch_index in range(1,self.num_branches):
                current_tensor = keras.layers.add([current_tensor, fuse_result[output_branch_index][input_branch_index]])
            output_tensors.append(current_tensor)
        return output_tensors,self.num_inchannels
            

class HRSegNet():
    def __init__(self,**kwargs):
        '''
        init parameters and layers
        '''
        self.config = kwargs
        self.inchannels = kwargs.get('inchannels',64)
        self.norm_func = kwargs.get('norm_func')
        self.leaky_ratio = kwargs.get('leaky_ratio',0.15)
        self.kernel_size = kwargs.get('kernel_size',3)
        
        self.activation_func = kwargs.get('activation_func')
        self.padding = kwargs.get('padding','same')
        self.kernel_initializer = kwargs.get('kernel_initializer','he_normal')
        self.kernel_regularizer = kwargs.get('kernel_regularizer',None)
        
        self.num_classes = kwargs.get('num_classes')
        self.classification_layers = kwargs.get('classification_layers')
        self.dropout_rate = kwargs.get('dropout_rate',0)
        self.initial_strides = kwargs.get('initial_strides')
        
        
    def make_transition_layer(self,input_tensors,block,num_channels_pre_layer,num_channels_cur_layer,stage_num):
        '''
        generate num_branches_cur output_tensors 
        :param input_tensor: list of tensors with shape equals to num_branches in current stage,length = 
        :param num_channels_pre_layer: list of int.num_channels of last stage tensors
        :param num_channels_cur_layer: list of int.num_channels of current stage tensors
        :param stage: int. denotes No. of current stage
        '''
        num_branches_cur = len(num_channels_cur_layer)
        num_branches_pre = len(num_channels_pre_layer)
        
        transition_tensors = []
        prestr = 'HRSegModel_stage_%d_transition'%stage_num            
        for branch_idx_cur in range(num_branches_cur):
            if branch_idx_cur < num_branches_pre:
                ## if current branch does exist in pre stage. No downsample 
                if num_channels_cur_layer[branch_idx_cur] != num_channels_pre_layer[branch_idx_cur]:
                    current_tensor = ConvUnit(input_tensors[branch_idx_cur],norm_func=self.norm_func,activation_func=self.activation_func,
                                              num_filters=num_channels_cur_layer[branch_idx_cur],
                                                kernel_size=self.kernel_size,atrous_rate=1,padding=self.padding,block_prefix=prestr,
                                                kernel_initializer=self.kernel_initializer,kernel_regularizer=self.kernel_regularizer,
                                                layer_idx=branch_idx_cur,conv_first=True)
                    
                    transition_tensors.append(current_tensor)
                else:
                    #### keep current tensor
                    transition_tensors.append(input_tensors[branch_idx_cur])
            else:
                ### current branch does not exist in pre stage(downsample the smallest tensor in pre stage)
                current_tensor = input_tensors[-1]
                for branch_distance in range(branch_idx_cur+1-num_branches_pre):
                    ### Do not expand channel dimension until last layer of current branch
                    inchannels = num_channels_pre_layer[-1]
                    outchannles = num_channels_cur_layer[branch_idx_cur] if branch_distance == branch_idx_cur - num_branches_pre else inchannels
                    current_tensor = ConvUnit(current_tensor,norm_func=self.norm_func,activation_func=self.activation_func,
                                              num_filters=num_channels_cur_layer[branch_idx_cur],conv_stride=2,
                                                kernel_size=self.kernel_size,atrous_rate=1,padding=self.padding,block_prefix=prestr,
                                                kernel_initializer=self.kernel_initializer,kernel_regularizer=self.kernel_regularizer,
                                                layer_idx=branch_idx_cur,conv_first=True)
                transition_tensors.append(current_tensor)
        return transition_tensors
    
    def make_layer(self,x,num_channels,block,blocks,stride=1,expansion=4):
        '''
        defines function to make initial convStage
        '''
        prestr = 'HRSegModel_pre'
        downsample = []
        ## if any dimension does not match. Add conv(with stride)+norm layer
        if stride != 1 or self.inchannels != num_channels * expansion:
            '''
            downsample input tensor to match dimension size or channel size
            '''
            block_number,branch_index = 1,1
            ### Todo change downsampel to a single block(why not conv-bn-relu)
            downsample.append(Conv3D(filters = num_channels * expansion, kernel_size = 1, strides = stride, 
                               padding = 'same', name='%s_downsample_conv%d_3'%(prestr,branch_index), 
                               kernel_initializer='he_normal'))
            downsample.append(self.norm_func(name='%s_downsample_bn%d_3'%(prestr,block_number)))
            
        result = x
        
        downsample_func = [downsample] + [[] for _ in range(1,blocks)]
        
        for block_idx in range(blocks):
            result = block(result,block_pre = prestr+'_blockfunc', block_number = block_idx + 1,num_filters = num_channels,
                      kernel_size = self.kernel_size,downsample=downsample_func[block_idx],leaky_ratio = self.leaky_ratio, nurm_func = self.norm_func,
                      stride = stride)
        return result
    
    def make_stage(self,input_tensors,layer_config,num_inchannels,multi_scale_output=True,stage_num=1,merge_choice=False):
        '''
        defines make stage func
        :param 
        :param input_tensors. List of tensors
        :param layer_config. Dict. Represents config of current stage
        :param num_inchannels.
        :param multi_scale_output.Bool.
        
        output should be two variable
        output_tensor: single tensor: represents output tensor of current stage
        num_inchannels: int. represents
        '''
        num_modules = layer_config['NUM_MODULES']
        num_branches = layer_config['NUM_BRANCHES']
        num_blocks = layer_config['NUM_BLOCKS']
        num_channels = layer_config['NUM_CHANNELS']
        block = blocks_dict[layer_config['BLOCK']]
        fuse_method = layer_config['FUSE_METHOD']
        
        
        result = []
        for module_idx in range(num_modules):
            ##### last module of current not-multi-scale stage
            if not multi_scale_output and module_idx == num_modules - 1:
                reset_multi_scale_output = False
            else:
                reset_multi_scale_output = True
                
            current_parameter_dict = {
                'num_inchannels':num_inchannels,'fuse_method':fuse_method,
                'num_branches':num_branches,'multi_scale_output':multi_scale_output,
                'num_blocks':num_blocks,'block':block,
                'stage_num':stage_num,'kernel_size':self.kernel_size,
                'leaky_ratio':self.leaky_ratio,'norm_func':self.norm_func,
                'num_channels':num_channels,'activation_func':self.activation_func,
                'padding':self.padding,'kernel_initializer':self.kernel_initializer,
                'kernel_regularizer':self.kernel_regularizer        
                
            }
            hr_obj = HighResolutionModule(**current_parameter_dict)
                
            current_tensor_list,return_num_inchannels = hr_obj.get_module(input_tensors)
            result.append(current_tensor_list)
        
        return result,return_num_inchannels
        
    def generate_layer_config(self,stage_idx):
        layer_config = {'NUM_MODULES':self.config['num_modules'],
                        'NUM_BRANCHES':self.config['num_branches'][stage_idx],
                        'NUM_BLOCKS':[self.config['block_per_stage'][stage_idx] for _ in range(self.config['num_branches'][stage_idx])],
                        'NUM_CHANNELS':[self.inchannels*(2**layer_idx) for layer_idx in range(self.config['num_branches'][stage_idx])],
                        'BLOCK':self.config['block_func'],
                        'FUSE_METHOD':self.config['FUSE_METHOD']}
        
        return layer_config
    
    def get_model(self,input_shape,stages=4,downsample_stride=1,choice='tranpose',merge_choice=False):
        main_input = Input(shape=input_shape, name='main_input')
        
        prestr = 'HRSegModel_pre'
        num_filters = self.inchannels
        x = main_input
        for layer_idx in range(1,3):
            x = ConvUnit(x,norm_func=self.norm_func,activation_func=self.activation_func,
                                      num_filters=num_filters,conv_stride=self.initial_strides[layer_idx-1],
                                        kernel_size=self.kernel_size,atrous_rate=1,padding=self.padding,block_prefix=prestr,
                                        kernel_initializer=self.kernel_initializer,kernel_regularizer=self.kernel_regularizer,
                                        layer_idx=layer_idx,conv_first=True)

#         print ('input shape is ',x.shape)
        x = self.make_layer(x,num_channels=num_filters,block=BasicBlock,blocks=2,expansion=1)
        
        '''
        Each stage consists of 2 parts
        make_transition_layer:
        make stage: Add conv blocks in parallel & fuse tensors to get output 
        
        '''
        
        
        start_idx = 2
        for stage_idx in range(start_idx,stages+1):
            
            layer_config = self.generate_layer_config(stage_idx-2)
#             print ('stage_idx',stage_idx)
#             print layer_config
            if stage_idx == start_idx:
                y_list,pre_stage_channels = [[x for _ in range(layer_config['NUM_BRANCHES'])]],[self.inchannels]
            input_tensors = y_list[-1]
            
            multi_scale_output = False if stage_idx==stages else True
            num_channels,expansion_rate  = layer_config['NUM_CHANNELS'],expansion_dict[layer_config['BLOCK']]
            x_list = self.make_transition_layer(input_tensors,block = blocks_dict[layer_config['BLOCK']],
                                                num_channels_pre_layer=pre_stage_channels,
                                                num_channels_cur_layer=[num_channels[i] * expansion_rate for i in range(len(num_channels))],
                                                stage_num=stage_idx-1)
        
            y_list,pre_stage_channels = self.make_stage(x_list,layer_config,num_channels,multi_scale_output,stage_num = stage_idx-1)


        '''
        Concatenate fature map from differnt level together
        '''
        result_tensors = []
        prestr = 'HRSegNet_Stage%02d_FinalFeatureFuse'%stages
        for idx in range(len(y_list[0])):
            upsample_rate = 2**idx
            result_tensor = ConvUnit(y_list[0][idx],norm_func=self.norm_func,activation_func=self.activation_func,block_prefix=prestr,
                                      num_filters=self.inchannels,kernel_size=self.kernel_size,atrous_rate=1,padding=self.padding,
                                        kernel_initializer=self.kernel_initializer,kernel_regularizer=self.kernel_regularizer,
                                        layer_idx=idx+1,conv_first=True) 
            result_tensor = UpSampling3D(size=2**idx,name='%s_upsample%02d'%(prestr,idx+1))(result_tensor)
            result_tensors.append(result_tensor)   
        result_tensor = Add(name='%s_add'%prestr)(result_tensors)
        
        '''
        Classification Part
        '''
        final_feature = self.classification_layers(name='HRSegNet_classification')(result_tensor)
        if self.num_classes == 1:
            final_acti = 'sigmoid'
        else:
            final_acti = 'softmax'
        final_output = Dense(self.num_classes,activation=final_acti,name='HRSegNet_final_output')(final_feature)


        model = Model(main_input,final_output)

        return model,[main_input]+y_list[0],[]