#!/usr/bin/env python
"""
Runs SRMA on a SAM/BAM file;
TODO: more documentation

usage: srma_wrapper.py [options]

See below for options
"""

import optparse
import os
import shutil
import subprocess
import sys
import tempfile


def stop_err(msg):
    sys.stderr.write("%s\n" % msg)
    sys.exit()


def parseRefLoc(refLoc, refUID):
    for line in open(refLoc):
        if not line.startswith("#"):
            fields = line.strip().split("\t")
            if len(fields) >= 3:
                if fields[0] == refUID:
                    return fields[1]
    return None


def __main__():
    parser = optparse.OptionParser()
    parser.add_option("-r", "--ref", dest="ref", help="The reference genome to index and use")
    parser.add_option("-u", "--refUID", dest="refUID", help="The pre-index reference genome unique Identifier")
    parser.add_option("-i", "--input", dest="input", help="The SAM/BAM input file")
    parser.add_option("-I", "--inputIndex", dest="inputIndex", help="The SAM/BAM input index file")
    parser.add_option("-o", "--output", dest="output", help="The SAM/BAM output file")
    parser.add_option("-O", "--offset", dest="offset", help="The alignment offset")
    parser.add_option("-Q", "--minMappingQuality", dest="minMappingQuality", help="The minimum mapping quality")
    parser.add_option(
        "-P",
        "--minAlleleProbability",
        dest="minAlleleProbability",
        help="The minimum allele probability conditioned on coverage (for the binomial quantile).",
    )
    parser.add_option(
        "-C", "--minAlleleCoverage", dest="minAlleleCoverage", help="The minimum haploid coverage for the consensus"
    )
    parser.add_option("-R", "--range", dest="range", help="A range to examine")
    parser.add_option("-c", "--correctBases", dest="correctBases", help="Correct bases ")
    parser.add_option("-q", "--useSequenceQualities", dest="useSequenceQualities", help="Use sequence qualities ")
    parser.add_option(
        "-M",
        "--maxHeapSize",
        dest="maxHeapSize",
        help="The maximum number of nodes on the heap before re-alignment is ignored",
    )
    parser.add_option(
        "-s",
        "--fileSource",
        dest="fileSource",
        help="Whether to use a previously indexed reference sequence or one from history (indexed or history)",
    )
    parser.add_option("-p", "--params", dest="params", help="Parameter setting to use (pre_set or full)")
    parser.add_option("-j", "--jarBin", dest="jarBin", default="", help="The path to where jars are stored")
    parser.add_option("-f", "--jarFile", dest="jarFile", help="The file name of the jar file to use")
    options, args = parser.parse_args()

    # make temp directory for srma
    tmp_dir = tempfile.mkdtemp()
    buffsize = 1048576

    # set up reference filenames
    reference_filepath_name = None
    # need to create SRMA dict and Samtools fai files for custom genome
    if options.fileSource == "history":
        try:
            reference_filepath = tempfile.NamedTemporaryFile(dir=tmp_dir, suffix=".fa")
            reference_filepath_name = reference_filepath.name
            reference_filepath.close()
            dict_filepath_name = reference_filepath_name.replace(".fa", ".dict")
            os.symlink(options.ref, reference_filepath_name)
            # create fai file using Samtools
            index_fai_cmd = "samtools faidx %s" % reference_filepath_name
            try:
                tmp = tempfile.NamedTemporaryFile(dir=tmp_dir).name
                tmp_stderr = open(tmp, "wb")
                proc = subprocess.Popen(args=index_fai_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno())
                returncode = proc.wait()
                tmp_stderr.close()
                # get stderr, allowing for case where it's very large
                tmp_stderr = open(tmp, "rb")
                stderr = ""
                try:
                    while True:
                        stderr += tmp_stderr.read(buffsize)
                        if not stderr or len(stderr) % buffsize != 0:
                            break
                except OverflowError:
                    pass
                tmp_stderr.close()
                if returncode != 0:
                    raise Exception(stderr)
            except Exception as e:
                # clean up temp dir
                if os.path.exists(tmp_dir):
                    shutil.rmtree(tmp_dir)
                stop_err("Error creating Samtools index for custom genome file: %s\n" % str(e))
            # create dict file using SRMA
            dict_cmd = 'java -cp "%s" net.sf.picard.sam.CreateSequenceDictionary R=%s O=%s' % (
                os.path.join(options.jarBin, options.jarFile),
                reference_filepath_name,
                dict_filepath_name,
            )
            try:
                tmp = tempfile.NamedTemporaryFile(dir=tmp_dir).name
                tmp_stderr = open(tmp, "wb")
                proc = subprocess.Popen(args=dict_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno())
                returncode = proc.wait()
                tmp_stderr.close()
                # get stderr, allowing for case where it's very large
                tmp_stderr = open(tmp, "rb")
                stderr = ""
                try:
                    while True:
                        stderr += tmp_stderr.read(buffsize)
                        if not stderr or len(stderr) % buffsize != 0:
                            break
                except OverflowError:
                    pass
                tmp_stderr.close()
                if returncode != 0:
                    raise Exception(stderr)
            except Exception as e:
                # clean up temp dir
                if os.path.exists(tmp_dir):
                    shutil.rmtree(tmp_dir)
                stop_err("Error creating index for custom genome file: %s\n" % str(e))
        except Exception as e:
            # clean up temp dir
            if os.path.exists(tmp_dir):
                shutil.rmtree(tmp_dir)
            stop_err("Problem handling SRMA index (dict file) for custom genome file: %s\n" % str(e))
    # using built-in dict/index files
    else:
        if options.ref:
            reference_filepath_name = options.ref
        else:
            reference_filepath_name = parseRefLoc(options.refLocation, options.refUID)
    if reference_filepath_name is None:
        raise ValueError("A valid genome reference was not provided.")

    # set up aligning and generate aligning command options
    if options.params == "pre_set":
        srma_cmds = ""
    else:
        ranges = "null"
        if options.range == "None":
            range = "null"
        else:
            range = options.range
        srma_cmds = (
            "OFFSET=%s MIN_MAPQ=%s MINIMUM_ALLELE_PROBABILITY=%s MINIMUM_ALLELE_COVERAGE=%s RANGES=%s RANGE=%s CORRECT_BASES=%s USE_SEQUENCE_QUALITIES=%s MAX_HEAP_SIZE=%s"
            % (
                options.offset,
                options.minMappingQuality,
                options.minAlleleProbability,
                options.minAlleleCoverage,
                ranges,
                range,
                options.correctBases,
                options.useSequenceQualities,
                options.maxHeapSize,
            )
        )

    srma_cmds = "%s VALIDATION_STRINGENCY=LENIENT" % srma_cmds

    # perform alignments
    buffsize = 1048576
    try:
        # symlink input bam and index files due to the naming conventions required by srma here
        input_bam_filename = os.path.join(tmp_dir, "%s.bam" % os.path.split(options.input)[-1])
        os.symlink(options.input, input_bam_filename)
        input_bai_filename = "%s.bai" % os.path.splitext(input_bam_filename)[0]
        os.symlink(options.inputIndex, input_bai_filename)

        # create a temp output name, ending in .bam due to required naming conventions? unkown if required
        output_bam_filename = os.path.join(tmp_dir, "%s.bam" % os.path.split(options.output)[-1])
        # generate commandline
        java_opts = ""
        if "_JAVA_OPTIONS" not in os.environ:
            java_opts = "-Xmx2048m"
        cmd = "java %s -jar %s I=%s O=%s R=%s %s" % (
            java_opts,
            os.path.join(options.jarBin, options.jarFile),
            input_bam_filename,
            output_bam_filename,
            reference_filepath_name,
            srma_cmds,
        )
        # need to nest try-except in try-finally to handle 2.4
        try:
            try:
                tmp = tempfile.NamedTemporaryFile(dir=tmp_dir).name
                tmp_stderr = open(tmp, "wb")
                proc = subprocess.Popen(args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno())
                returncode = proc.wait()
                tmp_stderr.close()
                # get stderr, allowing for case where it's very large
                tmp_stderr = open(tmp, "rb")
                stderr = ""
                try:
                    while True:
                        stderr += tmp_stderr.read(buffsize)
                        if not stderr or len(stderr) % buffsize != 0:
                            break
                except OverflowError:
                    pass
                tmp_stderr.close()
                if returncode != 0:
                    raise Exception(stderr)
            except Exception as e:
                raise Exception("Error executing SRMA. " + str(e))
            # move file from temp location (with .bam name) to provided path
            shutil.move(output_bam_filename, options.output)
            # check that there are results in the output file
            if os.path.getsize(options.output) <= 0:
                raise Exception(
                    "The output file is empty. You may simply have no matches, or there may be an error with your input file or settings."
                )
        except Exception as e:
            stop_err("The re-alignment failed.\n" + str(e))
    finally:
        # clean up temp dir
        if os.path.exists(tmp_dir):
            shutil.rmtree(tmp_dir)


if __name__ == "__main__":
    __main__()