import base64
import os

import pytest

from galaxy.exceptions import RequestParameterInvalidException
from galaxy.model.dataset_collections.auto_identifiers import FillIdentifiers
from galaxy.model.dataset_collections.rule_target_columns import column_titles_to_headers
from galaxy.model.dataset_collections.rule_target_models import target_models
from galaxy.model.dataset_collections.workbook_util import read_column_header_titles
from galaxy.tools.fetch.workbooks import (
    _infer_fetch_workbook_collection_type,
    _validate_parsed_column_headers,
    DEFAULT_WORKBOOK_TITLE,
    EXCEPTION_NO_URIS_FOUND,
    EXCEPTION_TOO_MANY_URI_COLUMNS,
    generate,
    GenerateFetchWorkbookRequest,
    parse,
    ParsedFetchWorkbook,
    ParseFetchWorkbook,
)
from galaxy.util.resources import resource_path

WRITE_TEST_WORKBOOKS = False


def test_fetch_datasets_workbook():
    request = GenerateFetchWorkbookRequest()
    workbook = generate(request)
    assert workbook
    worksheet = workbook.active
    assert worksheet.title == DEFAULT_WORKBOOK_TITLE
    assert worksheet.cell(1, 1).value == "URI"

    header_titles = read_column_header_titles(worksheet)
    assert header_titles == ["URI", "Name"]

    if WRITE_TEST_WORKBOOKS:
        path = "~/fetch_workbook.xlsx"
        expanded_path = os.path.expanduser(path)
        workbook.save(expanded_path)


def test_fetch_list_workbook():
    request = GenerateFetchWorkbookRequest(
        type="collection",
        collection_type="list",
    )
    workbook = generate(request)
    assert workbook
    worksheet = workbook.active

    header_titles = read_column_header_titles(worksheet)
    assert header_titles == ["URI", "List Identifier"]


def test_fetch_multiple_lists_workbook():
    request = GenerateFetchWorkbookRequest(
        type="collections",
        collection_type="list",
    )
    workbook = generate(request)
    assert workbook
    worksheet = workbook.active

    header_titles = read_column_header_titles(worksheet)
    assert header_titles == ["URI", "List Identifier", "Collection Name"]


def test_fetch_list_paired_workbook():
    request = GenerateFetchWorkbookRequest(
        type="collection",
        collection_type="list:paired",
    )
    workbook = generate(request)
    assert workbook
    worksheet = workbook.active

    header_titles = read_column_header_titles(worksheet)
    assert header_titles == ["URI 1 (Forward)", "URI 2 (Reverse)", "List Identifier"]

    if WRITE_TEST_WORKBOOKS:
        path = "~/fetch_workbook_paired.xlsx"
        expanded_path = os.path.expanduser(path)
        workbook.save(expanded_path)


def test_fetch_list_paired_or_unpaired_workbook():
    request = GenerateFetchWorkbookRequest(
        type="collection",
        collection_type="list:paired_or_unpaired",
    )
    workbook = generate(request)
    assert workbook
    worksheet = workbook.active

    header_titles = read_column_header_titles(worksheet)
    assert header_titles == ["URI 1 (Forward)", "URI 2 (Optional/Reverse)", "List Identifier"]


def test_parse_datasets():
    content = unittest_file_to_base64("fetch_workbook.xlsx")
    parse_request = ParseFetchWorkbook(
        content=content,
    )
    parsed = parse(parse_request)
    assert_is_simple_example_parsed(parsed)


def assert_is_simple_example_parsed(parsed: ParsedFetchWorkbook):
    assert len(parsed.rows) == 1
    row0 = parsed.rows[0]
    assert row0["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed"
    assert row0["name"] == "4.bed"

    assert len(parsed.columns) == 2


def test_parse_datasets_csv():
    content = unittest_file_to_base64("fetch_workbook.csv")
    parse_request = ParseFetchWorkbook(
        content=content,
    )
    parsed = parse(parse_request)
    assert_is_simple_example_parsed(parsed)


def test_parse_datasets_tsv():
    content = unittest_file_to_base64("fetch_workbook.tsv")
    parse_request = ParseFetchWorkbook(
        content=content,
    )
    parsed = parse(parse_request)
    assert_is_simple_example_parsed(parsed)


def test_parse_paired_list():
    # workbook has URI 1 and URI 2 columns - make sure they are broken out and have a paired_indicator column
    # for the rule builder.
    content = unittest_file_to_base64("fetch_workbook_paired.xlsx")
    parse_request = ParseFetchWorkbook(
        content=content,
    )
    parsed = parse(parse_request)
    assert len(parsed.rows) == 2
    row0 = parsed.rows[0]
    assert row0["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed"
    assert row0["list_identifiers"] == "sample1"
    assert row0["paired_identifier"] == "1"

    row1 = parsed.rows[1]
    assert row1["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed"
    assert row1["list_identifiers"] == "sample1"
    assert row1["paired_identifier"] == "2"

    assert len(parsed.columns) == 3


def test_parsed_paired_with_hashes():
    content = unittest_file_to_base64("fetch_workbook_paired_with_hashes.xlsx")
    parse_request = ParseFetchWorkbook(
        content=content,
    )
    parsed = parse(parse_request)
    assert len(parsed.rows) == 2

    row0 = parsed.rows[0]
    assert row0["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/4.bed"
    assert row0["list_identifiers"] == "sample1"
    assert row0["paired_identifier"] == "1"
    assert row0["list_identifiers"] == "sample1"
    assert row0["hash_md5"] == "37b59762b59fff860460522d271bc111"
    assert (
        row0["hash_sha512"]
        == "b83327251deae7e8c865948573d325a6657eaef10b274ba98d8ba6835f073f787c5621a4a9afd6db894f87bbd4299770f062369a3a39dea8b0263860559ff939"
    )

    row1 = parsed.rows[1]
    assert row1["url"] == "https://raw.githubusercontent.com/galaxyproject/galaxy/dev/test-data/1.bed"
    assert row1["list_identifiers"] == "sample1"
    assert row1["paired_identifier"] == "2"
    assert row1["hash_md5"] == "29e9dd693af0a946e67cd1861b987d13"
    assert (
        row1["hash_sha512"]
        == "9be59f9dc30bc2b35016dc45f5726a249baef2cdee2eb840665d3cdf4d0a0539c50eea1aa96bfd84f3a619782fc9321e5817e005a8a30c318d45c6759c9601bd"
    )


def test_parsed_list_with_auto_identifiers():
    content = unittest_file_to_base64("fetch_workbook_list_without_ids_example.xlsx")
    parse_request = ParseFetchWorkbook(
        content=content, fill_identifiers=FillIdentifiers(fill_inner_list_identifiers=True)
    )
    parsed = parse(parse_request)
    assert len(parsed.rows) == 2
    row0 = parsed.rows[0]
    assert row0["url"] == "https://ftp.sra.ebi.ac.uk/vol1/fastq/DRR000/DRR000770/DRR000770.fastq.gz"
    assert row0["list_identifiers"] == "DRR000770"
    row1 = parsed.rows[1]
    assert row1["url"] == "https://ftp.sra.ebi.ac.uk/vol1/fastq/DRR000/DRR000771/DRR000771.fastq.gz"
    assert row1["list_identifiers"] == "DRR000771"


def test_parsed_list_of_pairs_with_auto_identifiers():
    content = unittest_file_to_base64("fetch_workbook_list_pairs_without_ids_example.xlsx")
    parse_request = ParseFetchWorkbook(
        content=content, fill_identifiers=FillIdentifiers(fill_inner_list_identifiers=True)
    )
    parsed = parse(parse_request)
    assert len(parsed.rows) == 4
    row0 = parsed.rows[0]
    assert row0["url"] == "https://ftp.sra.ebi.ac.uk/vol1/fastq/DRR039/DRR039919/DRR039919_1.fastq.gz"
    assert row0["list_identifiers"] == "DRR039919"


def test_read_column_headers_from_titles():
    # datasets...
    column_headers = column_titles_to_headers(["URI", "Name", "Genome"])[0]
    assert len(column_headers) == 3
    assert column_headers[0].type == "url"
    assert column_headers[0].title == "URI"
    assert column_headers[1].type == "name"
    assert column_headers[1].title == "Name"
    assert column_headers[2].type == "dbkey"
    assert column_headers[2].title == "Genome"

    # simple list...
    column_headers = column_titles_to_headers(["URI", "List Identifier"])[0]
    assert len(column_headers) == 2
    assert column_headers[0].type == "url"
    assert column_headers[0].title == "URI"
    assert column_headers[1].type == "list_identifiers"
    assert column_headers[1].title == "List Identifier"

    # paired list with list two URIs per row....
    column_headers = column_titles_to_headers(["URI 1 (Forward)", "URI 2 (Reverse)", "List Identifier"])[0]
    assert len(column_headers) == 3
    assert column_headers[0].type == "url"
    assert column_headers[0].title == "URI 1 (Forward)"
    assert column_headers[0].type_index == 0

    assert column_headers[1].type == "url"
    assert column_headers[1].title == "URI 2 (Reverse)"
    assert column_headers[1].type_index == 1

    assert column_headers[2].type == "list_identifiers"
    assert column_headers[2].title == "List Identifier"

    # paired list with paired identifier as a row...
    column_headers = column_titles_to_headers(["URI", "List Identifier", "Paired Identifier"])[0]
    assert len(column_headers) == 3
    assert column_headers[0].type == "url"
    assert column_headers[0].title == "URI"
    assert column_headers[0].type_index == 0

    assert column_headers[2].type == "paired_identifier"
    assert column_headers[2].title == "Paired Identifier"
    assert column_headers[2].type_index == 0

    # nested list
    column_headers = column_titles_to_headers(["URI", "Outer List Identifier", "Inner List Identifier"])[0]

    assert len(column_headers) == 3
    assert column_headers[0].type == "url"
    assert column_headers[0].title == "URI"
    assert column_headers[0].type_index == 0

    assert column_headers[1].type == "list_identifiers"
    assert column_headers[1].title == "Outer List Identifier"
    assert column_headers[1].type_index == 0

    assert column_headers[2].type == "list_identifiers"
    assert column_headers[2].title == "Inner List Identifier"
    assert column_headers[2].type_index == 1


def test_infer_fetch_workbook_collection_type():
    column_headers = column_titles_to_headers(["URI", "List Identifier", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list"

    column_headers = column_titles_to_headers(["URI", "List Identifier 1", "List Identifier 2", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:list"

    column_headers = column_titles_to_headers(["URI", "List Identifier", "Paired Identifier", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:paired"

    # probably more usable - two URI style list:paired
    column_headers = column_titles_to_headers(["URI 1", "URI 2", "List Identifier", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:paired"

    column_headers = column_titles_to_headers(
        ["URI", "List Identifier 1", "List Identifier 2", "Paired Identifier", "Genome"]
    )[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:list:paired"

    column_headers = column_titles_to_headers(["URI 1", "URI 2", "List Identifier 1", "List Identifier 2", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:list:paired"

    # paired/unpaired sheets
    column_headers = column_titles_to_headers(["URI 1", "URI 2 (Optional)", "List Identifier", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:paired_or_unpaired"

    # paired/unpaired sheets
    column_headers = column_titles_to_headers(["URI", "List Identifier", "Paired Identifier (Optional)", "Genome"])[0]
    collection_type = _infer_fetch_workbook_collection_type(column_headers)[0]
    assert collection_type == "list:paired_or_unpaired"


def test_column_target_model_parsing():
    target_models()


def test_validate_parsed_column_headers():
    headers = column_titles_to_headers(["URI 1", "URI 2", "URI 3"])[0]
    with pytest.raises(RequestParameterInvalidException) as exception_info:
        _validate_parsed_column_headers(headers)
    assert EXCEPTION_TOO_MANY_URI_COLUMNS in str(exception_info.value)

    headers = column_titles_to_headers(["Name", "Paired Indicator"])[0]
    with pytest.raises(RequestParameterInvalidException) as exception_info:
        _validate_parsed_column_headers(headers)
    assert EXCEPTION_NO_URIS_FOUND in str(exception_info.value)


def unittest_file_to_base64(filename: str) -> str:
    path = resource_path("galaxy.app_unittest_utils", filename)
    example_as_bytes = path.read_bytes()
    content_base64 = base64.b64encode(example_as_bytes).decode("utf-8")
    return content_base64