import os
import pandas as pd

from datetime import datetime

from nsys_recipe.lib import helpers
from nsys_recipe.lib import recipe
from nsys_recipe.lib.args import Option
from nsys_recipe.lib.recipe import Context

class GpuTimeUtil(recipe.Recipe):
    display_name = 'GPU Time Utilization'
    description = """
    This recipe identifies time regions with low GPU utilization. For each
    process, each GPU device is examined, and a time range is created that
    starts with the beginning of the first GPU operation on that device and
    ends with the end of the last GPU operation on that device. This time range
    is then divided into equal chunks, and the GPU utilization is calculated
    for each chunk. The utilization includes all GPU operations as well as
    profiling overheads that the user cannot address.

    Note that the utilization refers to the "time" utilization and not the
    "resource" utilization. This script does not take into account how many GPU
    resources are being used. Therefore, a single running memcpy is considered
    the same amount of "utilization" as a huge kernel that takes over all the
    cores. If multiple operations run concurrently in the same chunk, their
    utilization will be added up and may exceed 100%.

    Chunks with an in-use percentage less than the threshold value are
    displayed. If consecutive chunks have a low in-use percentage, the
    individual chunks are coalesced into a single display record, keeping the
    weighted average of percentages. This is why returned chunks may have
    different durations.
"""

    @staticmethod
    def mapper_func(nsysrep, parsed_args):
        sqlite_file = helpers.nsysrep_to_sqlite(nsysrep)
        if sqlite_file is None:
            return None

        stats_cls = helpers.get_stats_cls('gpu_time_util', 'GpuTimeUtil')
        return helpers.stats_cls_to_df(sqlite_file, parsed_args, stats_cls)

    def reducer_func(self, dfs):
        dfs = helpers.filter_none(dfs)
        df = pd.concat(dfs)

        # Remove any tags or hidden columns that are for internal use.
        df.columns = df.columns.str.replace('(:).*', '', regex=True)
        df = df.loc[:, ~df.columns.str.startswith('_')]

        df.to_parquet(self.add_output_file('analysis.parquet'))

    def save_metadata(self):
        self._analysis_dict.update({
            'EndTime': str(datetime.now()),
            'InputReports': self._parsed_args.dir,
            'Outputs': self._output_files
        })
        self.create_analysis_file()

    def run(self, context):
        super().run(context)
        mapper_res = context.wait(context.map(
            self.mapper_func,
            self._parsed_args.dir,
            parsed_args=self._parsed_args
        ))
        self.reducer_func(mapper_res)

        self.create_notebook('analysis.ipynb', 'nsys_display.py')
        self.save_metadata()

    @classmethod
    def get_argument_parser(cls):
        parser = super().get_argument_parser()

        parser.add_recipe_argument(Option.OUTPUT)
        parser.add_recipe_argument(Option.FORCE_OVERWRITE)
        parser.add_recipe_argument(Option.START)
        parser.add_recipe_argument(Option.END)
        parser.add_recipe_argument(Option.NVTX)
        parser.add_recipe_argument(Option.ROWS)
        parser.add_recipe_argument(
            '--threshold',
            metavar='percent',
            type=int,
            default=50,
            help="Maximum percentage of time the GPU is being used")
        parser.add_recipe_argument(
            '--chunks',
            metavar='number',
            type=int,
            default=30,
            help="Number of equal-duration chunks")
        parser.add_recipe_argument(Option.REPORT_DIR, required=True)

        return parser
