import pandas as pd
import os

from datetime import datetime

from nsys_recipe.lib import exceptions
from nsys_recipe.lib import recipe

def mapper_func(files):
    file1, file2 = files
    # file1 and file2 have the same basename.
    return os.path.basename(file1), pd.read_parquet(file1), pd.read_parquet(file2)

def parse_diff_dir(diff_dirs):
    all_dfs = []

    for diff_dir in diff_dirs:
        dfs = []
        for filename in ('per-rank.parquet', 'all-ranks.parquet'):
            filepath = os.path.join(diff_dir, filename)
            if not os.path.exists(filepath):
                raise exceptions.ValueError("{} is not a valid stats recipe directory."
                    .format(diff_dir))
            dfs.append(filepath)
        all_dfs.append(dfs)

    return list(zip(all_dfs[0], all_dfs[1]))

def diff_df(df1, df2):
    if not df1.columns.equals(df2.columns) or df1.index.name != df2.index.name:
        raise exceptions.ValueError("Incompatible dataframes. "
            "Please verify that both directories were created using the same recipe.")

    if 'Report' in df1.columns:
        # Handle case where the two dataframes have different report file
        # names for the same rank. We assume that both dataframes contain
        # an equal number of ranks, and if we sort them, the same rank in
        # each dataframe will correspond to the same index.
        report_dict = dict(zip(sorted(df2['Report'].unique()), sorted(df1['Report'].unique())))
        df2 = df2.replace({"Report": report_dict})

        df1.set_index('Report', inplace=True, append=True)
        df2.set_index('Report', inplace=True, append=True)

    # Select only numerical columns.
    df1 = df1.select_dtypes(include='number')
    df2 = df2.select_dtypes(include='number')

    df2 = df2.reindex(df1.index)
    return df1.subtract(df2).reset_index()

def reducer_func(obj, mapper_res):
    # The order of the futures may not correspond to the order of inputs.
    # Thus we use the basename of the mapper_res to save the diff df.
    for basename, df1, df2 in mapper_res:
        diff_df(df1, df2).to_parquet(obj.add_output_file(basename))

def save_metadata(obj):
    obj._analysis_dict.update({
        'Name': obj.display_name + " Diff",
        'EndTime': str(datetime.now()),
        'DiffDirs': obj._parsed_args.diff,
        'Outputs': obj._output_files
    })
    obj.create_analysis_file()

def run(obj, context):
    obj._parsed_args.script_name = obj.get_script_name() + '-diff'
    obj._parsed_args.script_dir = os.path.dirname(__file__)

    diff_dirs = parse_diff_dir(obj._parsed_args.diff)
    mapper_res = context.wait(context.map(mapper_func, diff_dirs))
    reducer_func(obj, mapper_res)

    obj.create_notebook('diff.ipynb', 'nsys_display.py')
    save_metadata(obj)
