#!/usr/bin/env python
from __future__ import print_function

import re
import sys
import tempfile

try:
    from rpy2.rpy_classic import (
        BASIC_CONVERSION,
        NO_CONVERSION,
        r,
        RException,
        set_default_mode,
    )
except ImportError:
    # RPy isn't maintained, and doesn't work with R>3.0, use it as a fallback
    from rpy import (
        BASIC_CONVERSION,
        NO_CONVERSION,
        r,
        RException,
        set_default_mode,
    )


def stop_err(msg):
    sys.stderr.write(msg)
    sys.exit(1)


def S3_METHODS(all="key"):
    Group_Math = [
        "abs",
        "sign",
        "sqrt",
        "floor",
        "ceiling",
        "trunc",
        "round",
        "signif",
        "exp",
        "log",
        "cos",
        "sin",
        "tan",
        "acos",
        "asin",
        "atan",
        "cosh",
        "sinh",
        "tanh",
        "acosh",
        "asinh",
        "atanh",
        "lgamma",
        "gamma",
        "gammaCody",
        "digamma",
        "trigamma",
        "cumsum",
        "cumprod",
        "cummax",
        "cummin",
        "c",
    ]
    Group_Ops = [
        "+",
        "-",
        "*",
        "/",
        "^",
        "%%",
        "%/%",
        "&",
        "|",
        "!",
        "==",
        "!=",
        "<",
        "<=",
        ">=",
        ">",
        "(",
        ")",
        "~",
        ",",
    ]
    if all == "key":
        return {"Math": Group_Math, "Ops": Group_Ops}


def main():
    try:
        datafile = sys.argv[1]
        outfile_name = sys.argv[2]
        expression = sys.argv[3]
    except Exception:
        stop_err("Usage: python gsummary.py input_file ouput_file expression")

    math_allowed = S3_METHODS()["Math"]
    ops_allowed = S3_METHODS()["Ops"]

    # Check for invalid expressions
    for word in re.compile("[a-zA-Z]+").findall(expression):
        if word and word not in math_allowed:
            stop_err("Invalid expression '%s': term '%s' is not recognized or allowed" % (expression, word))
    symbols = set()
    for symbol in re.compile(r"[^a-z0-9\s]+").findall(expression):
        if symbol and symbol not in ops_allowed:
            stop_err("Invalid expression '%s': operator '%s' is not recognized or allowed" % (expression, symbol))
        else:
            symbols.add(symbol)
    if len(symbols) == 1 and "," in symbols:
        # User may have entered a comma-separated list r_data_frame columns
        stop_err("Invalid columns '%s': this tool requires a single column or expression" % expression)

    # Find all column references in the expression
    cols = []
    for col in re.compile("c[0-9]+").findall(expression):
        try:
            cols.append(int(col[1:]) - 1)
        except Exception:
            pass

    tmp_file = tempfile.NamedTemporaryFile("w+")
    # Write the R header row to the temporary file
    hdr_str = "\t".join("c%s" % str(col + 1) for col in cols)
    tmp_file.write("%s\n" % hdr_str)
    skipped_lines = 0
    first_invalid_line = 0
    i = 0
    for i, line in enumerate(open(datafile)):
        line = line.rstrip("\r\n")
        if line and not line.startswith("#"):
            valid = True
            fields = line.split("\t")
            # Write the R data row to the temporary file
            for col in cols:
                try:
                    float(fields[col])
                except Exception:
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i + 1
                    valid = False
                    break
            if valid:
                data_str = "\t".join(fields[col] for col in cols)
                tmp_file.write("%s\n" % data_str)
    tmp_file.flush()

    if skipped_lines == i + 1:
        stop_err(
            "Invalid column or column data values invalid for computation.  See tool tips and syntax for data requirements."
        )
    else:
        # summary function and return labels
        set_default_mode(NO_CONVERSION)
        summary_func = r(
            "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }"
        )
        headings = ["sum", "mean", "stdev", "0%", "25%", "50%", "75%", "100%"]
        headings_str = "\t".join(headings)

        r_data_frame = r.read_table(tmp_file.name, header=True, sep="\t")

        outfile = open(outfile_name, "w")

        for col in re.compile("c[0-9]+").findall(expression):
            r.assign(col, r["$"](r_data_frame, col))
        try:
            summary = summary_func(r(expression))
        except RException as s:
            outfile.close()
            stop_err("Computation resulted in the following error: %s" % str(s))
        summary = summary.as_py(BASIC_CONVERSION)
        outfile.write("#%s\n" % headings_str)
        if isinstance(summary, dict):
            # using rpy
            outfile.write("%s\n" % "\t".join("%g" % summary[k] for k in headings))
        else:
            # using rpy2
            outfile.write("%s\n" % "\t".join("%g" % k for k in summary))
        outfile.close()

        if skipped_lines:
            print(
                "Skipped %d invalid lines beginning with line #%d.  See tool tips for data requirements."
                % (skipped_lines, first_invalid_line)
            )


if __name__ == "__main__":
    main()