#! /usr/bin/env python3

"""\
%(prog)s: prepare a HEPData submission tarball from a Rivet routine and auxiliary files

Usage: %(prog)s [--help|-h] <analysisname>
"""

import rivet, yoda, sys, os, glob
from rivet.plotting import plot2yaml
rivet.util.set_process_name(os.path.basename(__file__))
import logging

try:
    import hepdata_lib as hdlib
except:
    logging.error("Couldn't import hepdata_lib. Have you pip-installed the module?")
    sys.exit(1)


## Handle command line
import argparse
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument("ANANAMES", nargs="+", help="names of analyses to make")
parser.add_argument("-o", "--outputdir", dest="OUTPUTDIR",
                    default="", help="directory for YAML submission output")
parser.add_argument("-m", "--match", action="append", dest="PATHPATTERNS", default=[],
                    help="only convert histograms whose $path/$name string matches any of these regexes")
parser.add_argument("-M", "--unmatch", action="append", dest="PATHUNPATTERNS", default=[],
                    help="exclude histograms whose $path/$name string matches any of these regexes")
parser.add_argument("--include-overflows", help="also include under-/overflow bins", dest="INCLUDE_OVERFLOWS",
                    action="store_true", default=False)
parser.add_argument("--include-masked", help="also include masked bins", dest="INCLUDE_MASKED",
                    action="store_true", default=False)
parser.add_argument("--related-record", dest="RELATED_RECORDS", action="append", default=[],
                    help="add Inspire IDs corresponding to related HEPData records")
parser.add_argument("--no-validation", help="disable HEPData validator", dest="VALIDATE",
                    action="store_false", default=True)
parser.add_argument("-q", "--quiet", dest="LOGLEVEL", default=logging.INFO,
                    action="store_const", const=logging.WARNING, help="only write out warning and error messages")
parser.add_argument("-v", "--verbose", dest="LOGLEVEL", default=logging.INFO,
                    action="store_const", const=logging.DEBUG, help="provide extra debugging messages")

args = parser.parse_args()
logging.basicConfig(format="%(msg)s", level=args.LOGLEVEL)
ANANAMES = args.ANANAMES

## Work out installation paths
ANAROOT = os.path.abspath(args.OUTPUTDIR if len(args.OUTPUTDIR) else os.getcwd())
if not os.access(ANAROOT, os.W_OK):
    logging.error("Can't write to source root directory %s" % ANAROOT)
    sys.exit(1)

rivet.addAnalysisLibPath(os.path.abspath("."))
rivet.addAnalysisDataPath(os.path.abspath("."))

## Get list of directories containing plot files
ANAPLOTDIRS = rivet.getAnalysisPlotPaths()

# Scrape Rivet ref-data files for matching ref-data AOs
ANAREFDIRS = list(set([ item for a in ANANAMES
                             for d in rivet.getAnalysisRefPaths()
                             for item in glob.glob(os.path.join(d, a+'.yoda*')) ]))

## Standard Rivet analysis name pattern
import re
re_stdana = re.compile(r"^(\w+)_(\d{4})_(I|S)(\d+)$")

## Now make a submisison for each analysis
for ANANAME in ANANAMES:
    logging.debug(f"Processing analysis {ANANAME}")
    ANAOUTDIR = f"{ANAROOT}/HEPData_YAML_{ANANAME}"
    logging.info(f"Writing YAML templates to {ANAOUTDIR}")

    ## Extract some metadata from the info file (if available)
    ## or the name if it matches the standard pattern
    match = re_stdana.match(ANANAME)
    ANADESC = "<Insert the abstract>"
    ANAINSPIREID = "<Insert the Inspire ID>"
    ANATAGS = dict()
    ANASQRTS = []
    anainfo = rivet.AnalysisLoader.getAnalysis(ANANAME)
    if anainfo:
        logging.debug(f"AnalysisInfo found!")
        ANADESC = anainfo.description()
        ANAINSPIREID = anainfo.inspireId()
        if anainfo.requiredBeamEnergies() and len(anainfo.requiredBeamEnergies()) == 1:
            ANASQRTS.append( sum(anainfo.requiredBeamEnergies()[0]) )
        if ANASQRTS:
            ANATAGS['cmenergies'] = ANASQRTS
        if anainfo.keywords():
            ANATAGS['phrases'] = anainfo.keywords()
    elif match:
        if match.group(3) == "I":
            ANAINSPIREID = match.group(4)

    submission = hdlib.Submission()
    submission.comment = ANADESC

    if ANAINSPIREID.isdigit():
        submission.add_record_id(ANAINSPIREID, "inspire")

    for relID in args.RELATED_RECORDS:
        submission.add_related_recid(int(relID))

    ref_candidates = [ anaref for anaref in ANAREFDIRS if ANANAME == os.path.basename(anaref).split('.')[0] ]
    if len(ref_candidates) != 1:
        logging.error(f"Unexpected number of reference data files encountered: {len(ref_candidates)} (expected 1)")
        sys.exit(1)

    ## load ref-data objects and covert them to HEPData tables
    refAOS = yoda.read(ref_candidates.pop(), patterns=args.PATHPATTERNS, unpatterns=args.PATHUNPATTERNS)
    # TODO - add support for multiple dependent variables?
    for table_id, (refkey, refdata) in enumerate(refAOS.items()):
        logging.debug(f"Processing table {table_id+1} for {refkey}")
        aop = rivet.AOPath(refdata.path())
        plot_configs = plot2yaml.get_plot_configs(aop.basepath(), plotdirs=ANAPLOTDIRS)

        table = yoda.hepdata.to_table(refdata, args.INCLUDE_OVERFLOWS, args.INCLUDE_MASKED)

        ## add some more metadata
        table.name = f'Table {table_id+1}'
        table.description = plot_configs.get('Title', '<Insert table description>')

        for ia, axis in enumerate('XYZ'[:min(len(table.variables),3)]):
            table.variables[ia].name = plot_configs.get(f'{axis}Label', f'<Insert {axis}-axis label>')

        #if ANASQRTS:
        #    table.variables[-1].add_qualifier('SQRTS(S)', ANASQRTS[0], 'GEV')

        for k,v in ANATAGS.items():
            table.keywords[k] = v

        submission.add_table(table)

    TARNAME = f'{ANAOUTDIR}.tar.gz'
    if os.path.exists(TARNAME):
        logging.debug(f"Output tarball {TARNAME} already exists, removing")
        os.remove(TARNAME)

    logging.debug(f"Creating submission {TARNAME}")
    submission.create_files(ANAOUTDIR, validate=args.VALIDATE, remove_old=True)
    os.rename('submission.tar.gz', TARNAME)

