import math
import os

import cv2 as cv
import pandas
from tqdm import tqdm

import numpy as np
import pydicom
import shutil
import json
import glob
import SimpleITK as sitk
sitk.ProcessObject_SetGlobalWarningDisplay(False)

import concurrent.futures
from collections import OrderedDict
import re


LEARNING_RATE = 0.0001
EPOCHS = 30
INPUT_IMAGE_SIZE = 224
BATCH_SIZE = 32

LABEL_COUNT = 22

labels = {
    "Abdomen": 0,
    "Ankle": 1,
    "CervicalSpine": 2,
    "Chest": 3,
    "Clavicles": 4,
    "Elbow": 5,
    "Feet": 6,
    "Finger": 7,
    "Forearm": 8,
    "Hand": 9,
    "Hip": 10,
    "Knee": 11,
    "LowerLeg": 12,
    "LumbarSpine": 13,
    "Others": 14,
    "Pelvis": 15,
    "Shoulder": 16,
    "Sinus": 17,
    "Skull": 18,
    "Thigh": 19,
    "ThoracicSpine": 20,
    "Wrist": 21,
}

EXTRA_Tags = OrderedDict({
    "0002|0000": "File Meta Information Group Length",
    "0002|0001": "File Meta Information Version",
    "0002|0002": "Media Storage SOP Class UID",
    "0002|0003": "Media Storage SOP Instance UID",
    "0002|0010": "Transfer Syntax UID",
    "0002|0012": "Implementation Class UID",
    "0002|0013": "Implementation Version Name",
    "0008|0005": "Specific Character Set",
    "0008|0008": "Image Type",
    "0008|0016": "SOP Class UID",
    "0008|0018": "SOP Instance UID",
    "0008|0020": "Study Date",
    "0008|0021": "Series Date",
    "0008|0022": "Acquisition Date",
    "0008|0023": "Content Date",
    "0008|002a": "Acquisition DateTime",
    "0008|0030": "Study Time",
    "0008|0031": "Series Time",
    "0008|0032": "Acquisition Time",
    "0008|0033": "Content Time",
    "0008|0050": "Accession Number",
    "0008|0060": "Modality",
    "0008|0070": "Manufacturer",
    "0008|0080": "Institution Name",
    "0008|0081": "Institution Address",
    "0008|0090": "Referring Physicians Name",
    "0008|1010": "Station Name",
    "0008|1030": "Study Description",
    "0008|103e": "Series Description",
    "0008|1050": "Performing Physicians Name",
    "0008|1090": "Manufacturers Model Name",
    "0008|1140": "Referenced Image Sequence",
    "0008|1150": "Referenced SOP Class UID",
    "0008|1155": "Referenced SOP Instance UID",
    "0008|2112": "Source Image Sequence",
    "0008|3010": "Irradiation Event UID",
    "0009|0010": "Private Creator",
    "0010|0010": "Patients Name",
    "0010|0020": "Patient ID",
    "0010|0030": "Patients Birth Date",
    "0010|0040": "Patients Sex",
    "0010|1002": "Other Patient IDs Sequenc",
    "fffe|e000": "defined Item",
    "0010|1010": "Patients Age",
    "0018|0015": "Body Part Examined",
    "0018|0050": "Slice Thickness",
    "0018|0060": "KVP",
    "0018|0090": "Data Collection Diameter",
    "0018|1000": "Device Serial Number",
    "0018|1020": "Software Version(s)",
    "0018|1030": "Protocol Name",
    "0018|1100": "Reconstruction Diameter",
    "0018|1110": "Distance Source to Detector",
    "0018|1111": "Distance Source to Patient",
    "0018|1120": "Gantry/Detector Tilt",
    "0018|1130": "Table Height",
    "0018|1140": "Rotation Direction",
    "0018|1150": "Exposure Time",
    "0018|1151": "X-Ray Tube Current",
    "0018|1152": "Exposure",
    "0018|1160": "Filter Type",
    "0018|1170": "Generator Power",
    "0018|1190": "Focal Spot(s)",
    "0018|1200": "Date of Last Calibration",
    "0018|1201": "Time of Last Calibration",
    "0018|1210": "Convolution Kernel",
    "0018|5100": "Patient Position",
    "0018|9306": "Single Collimation Width",
    "0018|9307": "Total Collimation Width",
    "0018|9309": "Table Speed",
    "0018|9310": "Table Feed per Rotation",
    "0018|9311": "Spiral Pitch Factor",
    "0018|9313": "Data Collection Center (Patient)",
    "0018|9318": "Reconstruction Target Center (Patient)",
    "0018|9323": "Exposure Modulation Type",
    "0018|9324": "Estimated Dose Saving",
    "0018|9345": "CTDIvol",
    "0018|9346": "CTDI Phantom Type Code Sequence",
    "0008|0100": "Code Value",
    "0008|0102": "Coding Scheme Designator",
    "0008|0104": "Code Meaning",
    "fffe|e00d": "Item Delimitation Item",
    "fffe|e0dd": "Sequence Delimitation Item",
    "0018|9352": "Calcium Scoring Mass Factor Device",
    "0019|0010": "Private Creator",
    "0020|000d": "Study Instance UID",
    "0020|000e": "Series Instance UID",
    "0020|0010": "Study ID",
    "0020|0011": "Series Number",
    "0020|0012": "Acquisition Number",
    "0020|0013": "Instance Number",
    "0020|0032": "Image Position (Patient)",
    "0020|0037": "Image Orientation (Patient)",
    "0020|0052": "Frame of Reference UID",
    "0020|1040": "Position Reference Indicator",
    "0020|1041": "Slice Location",
    "0020|4000": "Image Comments",
    "0021|0010": "Private Creator",
    "0028|0002": "Samples per Pixel",
    "0028|0004": "Photometric Interpretation",
    "0028|0010": "Rows",
    "0028|0011": "Columns",
    "0028|0030": "Pixel Spacing",
    "0028|0100": "Bits Allocated",
    "0028|0101": "Bits Stored",
    "0028|0102": "High Bit",
    "0028|0103": "Pixel Representation",
    "0028|0106": "Smallest Image Pixel Value",
    "0028|0107": "Largest Image Pixel Value",
    "0028|1050": "Window Center",
    "0028|1051": "Window Width",
    "0028|1052": "Rescale Intercept",
    "0028|1053": "Rescale Slope",
    "0028|1054": "Rescale Type",
    "0028|1055": "Window Center & Width Explanation",
})

labels_reversed = dict((v, k) for k, v in labels.items())


def label_as_string(label_id):
    return labels_reversed[label_id]


def load_dcm_image(path):
    if False:
        dicom = pydicom.dcmread(path)
        img = dicom.pixel_array
        img = img / img.max() * 255

        if dicom.PhotometricInterpretation == "MONOCHROME2":
            img = img.astype("int32")
            img = np.invert(img) + 255

        img = img.astype("float32")
    else:
        dicom = sitk.ReadImage(path)
        protocol = dicom.GetMetaData('0028|0004')

        img = sitk.GetArrayFromImage(dicom)
        img = img[0, :, :]
        # img = np.transpose(img, (1, 0))

        img = img / img.max() * 255
        if protocol == "MONOCHROME2":
            img = img.astype("int32")
            img = np.invert(img) + 255
        img = img.astype("float32")
    return img


def compress_dcm_image(inpath, outpat):
    dicom = sitk.ReadImage(inpath)
    protocol = dicom.GetMetaData('0028|0004')

    img = sitk.GetArrayFromImage(dicom)
    img = img[0, :, :]
    # img = np.transpose(img, (1, 0))

    img = img / img.max() * 255
    if protocol == "MONOCHROME2":
        img = img.astype("int32")
        img = np.invert(img) + 255
    img = img.astype("float32")
    return img


def _load_scan_with_givennames(abs_image_names):
    import SimpleITK as sitk
    # global Logger
    series_uuid_meta_key = "0020|000e"
    meta_data_slice = 0

    series_reader = sitk.ImageSeriesReader()
    series_reader.SetFileNames(abs_image_names)
    # ##################
    # reading meta data
    series_reader.MetaDataDictionaryArrayUpdateOn()
    series_reader.LoadPrivateTagsOn()
    # ##################
    image = series_reader.Execute()

    try:
        seriesUID = series_reader.GetMetaData(meta_data_slice, series_uuid_meta_key)
    except Exception as e:
        seriesUID = None

    if seriesUID is not None:
        seriesUID = str(seriesUID)

    ############
    # load tags
    supported_meta_keys = series_reader.GetMetaDataKeys(meta_data_slice)
    for meta_k in supported_meta_keys:
        try:
            meta_val = series_reader.GetMetaData(0, meta_k)

            try:
                meta_val.encode().decode()
            except:
                print("metakey {}, non-standard encoding metaval, skipping".format(meta_k))
                continue

            image.SetMetaData(meta_k, meta_val)
        except:
            print("_load_scan_with_dir, reading meta ({}) failed".format(meta_k))
            pass

    return image, seriesUID


def get_dcm_filenames_dict(dir):
    filenames = {}

    for (dir_path, dir_names, file_names) in os.walk(dir):
        for file_name in file_names:
            if file_name != '':
                # sop_uid = file_name[:-6]  # remove "-c.dcm"
                # filenames[sop_uid] = os.path.join(dir_path, file_name)

                fullpath = os.path.join(dir_path, file_name)
                img, suid = _load_scan_with_givennames([fr'{fullpath}', ])
                filenames[suid] = fullpath
    return filenames


def preprocess_train_df(df: pandas.DataFrame):
    """
    Replace Target column by category columns for all
    available classes [Abdomen, Ankle, CervicalSpine, ...]

    :param df: modified dataframe
    :return: preprocessed dataframe
    """

    for class_id in range(LABEL_COUNT):
        df[class_id] = 0

    def fill_classes(row):
        targets = row['Target'].split(" ")
        for target in targets:
            if target != "":
                row[int(target)] = 1

        return row

    df = df.apply(fill_classes, axis=1)
    df = df.drop(columns=["Target"])

    # Rename class_ids to class names: 1 -> Ankle etc.
    df.rename(columns=labels_reversed, inplace=True)
    return df

def get_dcm_seriesUID_dict(dcms):
    seriesNames = {}
    for dcm in dcms:
        img, suid = _load_scan_with_givennames([fr'{dcm}', ])
        seriesNames[suid] = dcm
    return seriesNames

def convert_dcm_2_nii(input_path, save_path, preprocess_workers=0, log_func=None, raw_dict_dir=None):
    if log_func is None:
        log_func = print

    """
    Convert DICOM files into niis. Algorithm walks through input_path and process all the files. Directory
    structure is not preserved, final images are stored directly in save_path.
    
    :param uid: uid
    :param dcms: full dcms of one zip
    :param save_dir: save directory
    """
    from trans_dicom_2_nii import Trans_Dicom_2_NII
    trans_dicom_2_nii = Trans_Dicom_2_NII(input_path, save_path, raw_dict_dir)
    trans_dicom_2_nii()
    ### get raw_dicts
    raw_dicts = {}
    for i in glob.glob(os.path.join(raw_dict_dir, '*.json')):
        with open(i, 'r+') as f:
            raw_dict = json.load(f)
            raw_dicts.update(raw_dict)
    return raw_dicts

def convert_dcm_dataset_to_jpg(src_dir, dest_dir, max_image_size=800, preprocess_workers=0, log_func=None):
    if log_func is None:
        log_func = print

    """
    Convert DICOM files into JPGs. Algorithm walks through src_dir hierarchy and process all the files. Directory
    structure is not preserved, final images are stored directly in dest_dir. Image ratio is preserved.

    :param src_dir: source directory.
    :param dest_dir: destination directory
    :param max_image_size: maximal exported image size
    """
    dcm_uid_to_path = get_dcm_filenames_dict(src_dir)

    os.makedirs(dest_dir, exist_ok=True)
    filelist = list(dcm_uid_to_path.values())
    dcm_uid_list = list(dcm_uid_to_path.keys())

    parallel = preprocess_workers > 0
    log_func(fr'convert_dcm_dataset_to_jpg paralle status {parallel}, workers {preprocess_workers}')
    if parallel:
        with tqdm(total=len(filelist)) as pbar:
            with concurrent.futures.ProcessPoolExecutor(max_workers=preprocess_workers) as executor:
                futures = {executor.submit(process_one_image, f, dcm_uid_list=dcm_uid_list, 
                                        filepath_list=filelist, dest_dir=dest_dir, max_image_size=max_image_size):f for f in range(len(filelist))}
                for future in concurrent.futures.as_completed(futures):
                    filename = filelist[futures[future]]
                    try:
                        _ = future.result()
                        pbar.update(1)
                    except:
                        log_func('{} failed.'.format(filename))
    else:
        for f in tqdm(range(len(filelist))):
            process_one_image(f, dcm_uid_list=dcm_uid_list, filepath_list=filelist, dest_dir=dest_dir, max_image_size=max_image_size)
        
    return dcm_uid_to_path
    

#################
def load_image_and_info_by_given_path_and_id(series_file_names, SOP_suid, logfunc=None):
    print(fr'loading from {series_file_names}, {SOP_suid}')
    if logfunc is None:
        logfunc = print

    series_reader = sitk.ImageSeriesReader()
    # series_file_names = series_reader.GetGDCMSeriesFileNames(path, suid)
    
    try:
        series_reader.MetaDataDictionaryArrayUpdateOn()
        series_reader.LoadPrivateTagsOn()
        series_reader.SetFileNames(series_file_names)
        image = series_reader.Execute()
    except Exception as e:
        logfunc("error loading images from suid {}, msg {}".format(SOP_suid, e))
        return None, None, None

    # using 0 slice meta as overall meta
    # output_meta = []
    output_meta_map = {}
    for meta_key, meta_desc in EXTRA_Tags.items():
        meta_val = ""
        if series_reader.HasMetaDataKey(0, meta_key):
            meta_val = series_reader.GetMetaData(0, meta_key)

            ########################
            # unicode not supported
            if "\\u" in repr(meta_val):
                meta_val = eval(repr(meta_val).replace("\\u", "_"))
            ########################
            meta_val = str(meta_val)
            meta_val = meta_val.replace('\r', '')
            meta_val = meta_val.replace('\n', '')
            meta_val = meta_val.replace(',', ' ')
            meta_val = meta_val.strip()

        # output_meta.append(meta_val)
        output_meta_map[meta_key] = meta_val

    # extract all slice meta data
    slice_meta = []
    for ii in range(len(series_file_names)):
        this_meta_dict = {}
        for meta_key in series_reader.GetMetaDataKeys(ii):
            meta_val = ""
            if series_reader.HasMetaDataKey(ii, meta_key):
                meta_val = series_reader.GetMetaData(ii, meta_key)

                ########################
                # unicode not supported
                if "\\u" in repr(meta_val):
                    meta_val = eval(repr(meta_val).replace("\\u", "_"))
                ########################
            meta_val = str(meta_val)
            meta_val = meta_val.replace('\r', '')
            meta_val = meta_val.replace('\n', '')
            meta_val = meta_val.replace(',', ' ')
            meta_val = meta_val.strip()

            this_meta_dict[meta_key] = meta_val
        slice_meta.append(this_meta_dict)

    return image, output_meta_map, slice_meta


def write_simage_to_dicom(simage, slices_meta, full_dcm_output_path, logfunc=None):
    if logfunc is None:
        logfunc = print
    
    writer = sitk.ImageFileWriter()
    # Use the study/series/frame of reference information given in the meta-data
    # dictionary and not the automatically generated information from the file IO
    writer.KeepOriginalImageUIDOn()
    writer.SetUseCompression(True)

    i = 0
    image_slice = simage[:, :, i]

    # Tags shared by the series.
    this_meta_dict = slices_meta[i]
    for this_k, this_val in this_meta_dict.items():
        try:
            image_slice.SetMetaData(this_k, this_val)
        except Exception as ex:
            logfunc("exception {} occured when writing tag {}".format(ex, this_k))
            continue
    writer.SetFileName(full_dcm_output_path)
    writer.Execute(image_slice)


def one_dcm2jpg(dcm_uid, filepath, dest_dir, max_image_size):
    if os.path.isfile(os.path.join(dest_dir, dcm_uid + ".jpg")):
        return 
        
    img = load_dcm_image(filepath)
    h, w = img.shape
    max_image_size = max_image_size

    if h > w:
        ratio = w / h
        h = max_image_size
        w = math.floor(h * ratio)
    else:
        ratio = h / w
        w = max_image_size
        h = math.floor(w * ratio)

    resized = cv.resize(img, dsize=(w, h), interpolation=cv.INTER_AREA)
    
    tarpath = os.path.join(dest_dir, dcm_uid + ".jpg")
    cv.imwrite(tarpath, resized)


def process_one_image(idd, dcm_uid_list, filepath_list, dest_dir, max_image_size):
    dcm_uid = dcm_uid_list[idd]
    filepath = filepath_list[idd]

    one_dcm2jpg(dcm_uid=dcm_uid, filepath=filepath, dest_dir=dest_dir, max_image_size=max_image_size)


def filter_data(input_csv_path, dataroot, datatar):
    print(dataroot)
    dcmfiles = glob.glob(fr'{dataroot}/**/*.dcm', recursive=True)

    df = pandas.read_csv(input_csv_path)
    map_classid2SOPsuids = {}

    NUM_SAMPLES_PER_CLASS = 40
    for _, dfd in df.iterrows():
        taglabel = dfd[1]
        SOPsuid = dfd[0]

        sptag = taglabel.split(' ')
        if len(sptag) > 2:
            continue

        if taglabel in map_classid2SOPsuids:
            map_classid2SOPsuids[taglabel].append(SOPsuid)
        else:
            map_classid2SOPsuids[taglabel] = [SOPsuid, ]

    filteredmap_classname2suids = {}
    for k, v in map_classid2SOPsuids.items():
        if len(v) > NUM_SAMPLES_PER_CLASS:
            v = v[:NUM_SAMPLES_PER_CLASS]
        
        filteredmap_classname2suids[labels_reversed[eval(k)]] = v

    for classname, suids in tqdm(filteredmap_classname2suids.items(), total=len(filteredmap_classname2suids.keys())):
        realtar = fr'{datatar}/{classname}'
        if not os.path.exists(realtar):
            os.makedirs(realtar)
        
        for SOPsuid in tqdm(suids):
            for dcmf in dcmfiles:
                if SOPsuid in dcmf:
                    # filepath = os.path.dirname(dcmf)
                    # simg, img_meta, slices_meta = load_image_and_info_by_given_path_and_id([dcmf, ], SOPsuid)
                    # write_simage_to_dicom(simg, slices_meta, fr'{realtar}/{SOPsuid}.dcm')
                    shutil.copyfile(dcmf, fr'{realtar}/{SOPsuid}.dcm')
                    break
            

def copy_filtered_data(map_name2dicoms, roottar):
    for k, v in map_name2dicoms.items():
        subroottar = fr'{roottar}/{k}/'
        if not os.path.exists(subroottar):
            os.makedirs(subroottar)

        for vv in tqdm(v):
            shutil.copytree(vv, subroottar)


if __name__ == '__main__':
    dataroot = '/home/liuxinglong/data2/downloads/unifesp-x-ray-body-part-classifier'
    datatar = '/home/liuxinglong/data2/unifesp-x-ray-body-part-classifier_filtered'
    
    filter_data(fr'{dataroot}/train.csv', fr'{dataroot}/train', fr'{datatar}/dicom')

    # classpaths = os.listdir(fr'{datatar}/dicom')
    # for clp in classpaths:
    #     convert_dcm_dataset_to_jpg(fr'{datatar}/dicom/{clp}', fr"{datatar}/jpg/{clp}")