/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2023, the Ginkgo authors
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#ifndef GKO_PUBLIC_CORE_BASE_TIMER_HPP_
#define GKO_PUBLIC_CORE_BASE_TIMER_HPP_


#include <chrono>


#include <ginkgo/core/base/executor.hpp>


namespace gko {


/**
 * An opaque wrapper for a time point generated by a timer.
 */
class time_point {
public:
    ~time_point();

    time_point(time_point&&);

    time_point& operator=(time_point&&);

    time_point(const time_point&) = delete;

    time_point& operator=(const time_point&) = delete;

private:
    time_point();

    friend class Timer;
    friend class CpuTimer;
    friend class CudaTimer;
    friend class HipTimer;
    friend class DpcppTimer;

    /** What kind of timer was used to generate the time point? */
    enum class type {
        /** std::chrono-based timer */
        cpu,
        /** cudaEvent-based timer */
        cuda,
        /** hipEvent-based timer */
        hip,
        /** sycl::event-based timer */
        dpcpp,
    };

    type type_;
    union data_union {
        CUevent_st* cuda_event;
        GKO_HIP_EVENT_STRUCT* hip_event;
        sycl::event* dpcpp_event;
        std::chrono::steady_clock::time_point chrono;

        data_union();
    } data_;
};


/**
 * Represents a generic timer that can be used to record time points and measure
 * time differences on host or device streams.
 * To keep the runtime overhead of timing minimal, time points need to be
 * allocated beforehand using Timer::create_time_point:
 * ```
 * auto begin = timer->create_time_point();
 * auto end = timer->create_time_point();
 * // ...
 * timer->record(begin);
 * run_expensive_operation();
 * timer->record(end);
 * auto elapsed = timer->difference(begin, end);
 * ```
 */
class Timer {
public:
    virtual ~Timer() = default;

    /**
     * Returns a newly created time point.
     * Time points may only be used with the timer they were created with.
     */
    time_point create_time_point();

    /**
     * Records a time point at the current time.
     */
    virtual void record(time_point& time) = 0;

    /**
     * Waits until all kernels in-process when recording the time point are
     * finished.
     */
    virtual void wait(time_point& time) = 0;

    /**
     * Computes the difference between the two time points in nanoseconds.
     * The function synchronizes with `stop` before computing the difference.
     *
     * @param start  the first time point (earlier)
     * @param end  the second time point (later)
     * @return the difference between the time points in nanoseconds.
     */
    std::chrono::nanoseconds difference(time_point& start, time_point& stop);

    /**
     * Computes the difference between the two time points in nanoseconds.
     * This asynchronous version does not synchronize itself, so the time points
     * need to have been synchronized with, i.e. `timer->wait(stop)` needs to
     * have been called. The version is intended for more advanced users who
     * want to measure the overhead of timing functionality separately.
     *
     * @param start  the first time point (earlier)
     * @param end  the second time point (later)
     *
     * @return the difference between the time points in nanoseconds.
     */
    virtual std::chrono::nanoseconds difference_async(
        const time_point& start, const time_point& stop) = 0;

    /**
     * Creates the timer type most suitable for recording accurate timings of
     * kernels on the given executor.
     *
     * @param exec  the executor to create a Timer for
     *
     * @return CpuTimer for ReferenceExecutor and OmpExecutor, CudaTimer for
     *         CudaExecutor, HipTimer for HipExecutor or DpcppTimer for
     *         DpcppExecutor.
     */
    static std::unique_ptr<Timer> create_for_executor(
        std::shared_ptr<const Executor> exec);

protected:
    /** Initializes a new time_point instance for this timer. */
    virtual void init_time_point(time_point& time) = 0;
};


/** A timer using std::chrono::steady_clock for timing. */
class CpuTimer : public Timer {
public:
    void record(time_point& time) override;

    void wait(time_point& time) override;

    std::chrono::nanoseconds difference_async(const time_point& start,
                                              const time_point& stop) override;

protected:
    void init_time_point(time_point& time) override;
};


/** A timer using events for timing on a CudaExecutor. */
class CudaTimer : public Timer {
public:
    void record(time_point& time) override;

    void wait(time_point& time) override;

    std::chrono::nanoseconds difference_async(const time_point& start,
                                              const time_point& stop) override;

    CudaTimer(std::shared_ptr<const CudaExecutor> exec);

protected:
    void init_time_point(time_point& time) override;

private:
    std::shared_ptr<const CudaExecutor> exec_;
};


/** A timer using events for timing on a HipExecutor. */
class HipTimer : public Timer {
public:
    void record(time_point& time) override;

    void wait(time_point& time) override;

    std::chrono::nanoseconds difference_async(const time_point& start,
                                              const time_point& stop) override;

    HipTimer(std::shared_ptr<const HipExecutor> exec);

protected:
    void init_time_point(time_point& time) override;

private:
    std::shared_ptr<const HipExecutor> exec_;
};


/** A timer using kernels for timing on a DpcppExecutor in profiling mode. */
class DpcppTimer : public Timer {
public:
    void record(time_point& time) override;

    void wait(time_point& time) override;

    std::chrono::nanoseconds difference_async(const time_point& start,
                                              const time_point& stop) override;

    DpcppTimer(std::shared_ptr<const DpcppExecutor> exec);

protected:
    void init_time_point(time_point& time) override;

private:
    std::shared_ptr<const DpcppExecutor> exec_;
};


}  // namespace gko


#endif  // GKO_PUBLIC_CORE_BASE_TIMER_HPP_
