///////////////////////////////////////////////////////////////////////////////
//
// File: TimeIntegrationSchemeFIT.cpp
//
// For more information, please see: http://www.nektar.info
//
// The MIT License
//
// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA),
// Department of Aeronautics, Imperial College London (UK), and Scientific
// Computing and Imaging Institute, University of Utah (USA).
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.
//
// Description: implementation of time integration scheme FIT class
//
///////////////////////////////////////////////////////////////////////////////

// Note: The file is named TimeIntegrationSchemeFIT to parallel the
// TimeIntegrationSchemeGLM file but the class is named
// FractionalInTimeIntegrationScheme so keep with the factory naming
// convention.

#include <LibUtilities/TimeIntegration/TimeIntegrationSchemeFIT.h>

namespace Nektar
{
namespace LibUtilities
{
/**
 * @class FractionalInTimeIntegrationScheme
 *
 * A fast convolution algorithm for computing solutions to (Caputo)
 * time-fractional differential equations. This is an explicit solver
 * that expresses the solution as an integral over a Talbot curve,
 * which is discretized with quadrature. First-order quadrature is
 * currently implemented (Soon be expanded to forth order).
 */
FractionalInTimeIntegrationScheme::FractionalInTimeIntegrationScheme(
    std::string variant, size_t order, std::vector<NekDouble> freeParams)
    : TimeIntegrationScheme(variant, order, freeParams),
      m_name("FractionalInTime")
{
    m_variant    = variant;
    m_order      = order;
    m_freeParams = freeParams;

    // Currently up to 4th order is implemented.
    ASSERTL1(1 <= order && order <= 4,
             "FractionalInTime Time integration scheme bad order: " +
                 std::to_string(order));

    ASSERTL1(freeParams.size() == 0 ||     // Use defaults
                 freeParams.size() == 1 || // Alpha
                 freeParams.size() == 2 || // Base
                 freeParams.size() == 6,   // Talbot quadrature rule
             "FractionalInTime Time integration scheme invalid number "
             "of free parameters, expected zero, one <alpha>, "
             "two <alpha, base>, or "
             "six <alpha, base, nQuadPts, sigma, mu0, nu> received  " +
                 std::to_string(freeParams.size()));

    if (freeParams.size() >= 1)
    {
        m_alpha = freeParams[0]; // Value for exp integration.
    }

    if (freeParams.size() >= 2)
    {
        m_base = freeParams[1]; // "Base" of the algorithm.
    }

    if (freeParams.size() == 6)
    {
        m_nQuadPts = freeParams[2]; // Number of Talbot quadrature rule points
        m_sigma    = freeParams[3];
        m_mu0      = freeParams[4];
        m_nu       = freeParams[5];
    }
}

/**
 * @brief Worker method to initialize the integration scheme.
 */
void FractionalInTimeIntegrationScheme::v_InitializeScheme(
    const NekDouble deltaT, ConstDoubleArray &y_0, const NekDouble time,
    const TimeIntegrationSchemeOperators &op)

{
    m_op      = op;
    m_nvars   = y_0.size();
    m_npoints = y_0[0].size();

    m_deltaT = deltaT;

    m_T            = time; // Finial time;
    m_maxTimeSteps = m_T / m_deltaT;

    // The +2 below is a buffer, and keeps +2 extra rectangle groups
    // in case T needs to be increased later.
    m_Lmax = computeL(m_base, m_maxTimeSteps) + 2;

    // Demarcation integers - one array that is re-used
    m_qml = Array<OneD, size_t>(m_Lmax - 1, size_t(0));

    // Demarcation interval markers - one array that is re-used
    m_taus = Array<OneD, size_t>(m_Lmax + 1, size_t(0));

    // Storage of the initial values.
    m_u0 = y_0;

    // Storage for the exponential factor in the integral
    // contribution. One array that is re-used
    m_expFactor = ComplexSingleArray(m_nQuadPts, 0.0);

    // Storage of previous states and associated timesteps.
    m_u = TripleArray(m_order + 1);

    for (size_t m = 0; m <= m_order; ++m)
    {
        m_u[m] = DoubleArray(m_nvars);

        for (size_t i = 0; i < m_nvars; ++i)
        {
            m_u[m][i] = SingleArray(m_npoints, 0.0);

            for (size_t j = 0; j < m_npoints; ++j)
            {
                // Store the initial values as the first previous state.
                if (m == 0)
                    m_u[m][i][j] = m_u0[i][j];
                else
                    m_u[m][i][j] = 0;
            }
        }
    }

    // Storage for the stage derivative as the data will be re-used to
    // update the solution.
    m_F = DoubleArray(m_nvars);
    // Storage of the next solution from the final increment.
    m_uNext = DoubleArray(m_nvars);
    // Storage for the integral contribution.
    m_uInt = ComplexDoubleArray(m_nvars);

    for (size_t i = 0; i < m_nvars; ++i)
    {
        m_F[i]     = SingleArray(m_npoints, 0.0);
        m_uNext[i] = SingleArray(m_npoints, 0.0);
        m_uInt[i]  = ComplexSingleArray(m_npoints, 0.0);
    }

    // J
    m_J = SingleArray(m_order, 0.0);

    m_J[0] = pow(m_deltaT, m_alpha) / tgamma(m_alpha + 1.);

    for (size_t m = 1, m_1 = 0; m < m_order; ++m, ++m_1)
    {
        m_J[m] = m_J[m_1] * NekDouble(m) / (NekDouble(m) + m_alpha);
    }

    // Ahat array, one for each order.
    // These are elements in a multi-step exponential integrator tableau
    m_Ahats = TripleArray(m_order + 1);

    for (size_t m = 1; m <= m_order; ++m)
    {
        m_Ahats[m] = DoubleArray(m);

        for (size_t n = 0; n < m; ++n)
        {
            m_Ahats[m][n] = SingleArray(m, 0.0);
        }

        switch (m)
        {
            case 1:
                m_Ahats[m][0][0] = 1.;
                break;

            case 2:
                m_Ahats[m][0][0] = 1.;
                m_Ahats[m][0][1] = 0.;
                m_Ahats[m][1][0] = 1.;
                m_Ahats[m][1][1] = -1.;
                break;

            case 3:
                m_Ahats[m][0][0] = 1.;
                m_Ahats[m][0][1] = 0.;
                m_Ahats[m][0][2] = 0;
                m_Ahats[m][1][0] = 3. / 2.;
                m_Ahats[m][1][1] = -2.;
                m_Ahats[m][1][2] = 1. / 2.;
                m_Ahats[m][2][0] = 1. / 2.;
                m_Ahats[m][2][1] = -1.;
                m_Ahats[m][2][2] = 1. / 2.;
                break;

            case 4:
                m_Ahats[m][0][0] = 1.;
                m_Ahats[m][0][1] = 0.;
                m_Ahats[m][0][2] = 0.;
                m_Ahats[m][0][3] = 0.;

                m_Ahats[m][1][0] = 11. / 6.;
                m_Ahats[m][1][1] = -3;
                m_Ahats[m][1][2] = 3. / 2.;
                m_Ahats[m][1][3] = -1. / 3.;

                m_Ahats[m][2][0] = 1.;
                m_Ahats[m][2][1] = -5. / 2.;
                m_Ahats[m][2][2] = 2.;
                m_Ahats[m][2][3] = -1. / 2.;

                m_Ahats[m][3][0] = 1. / 6.;
                m_Ahats[m][3][1] = -1. / 2.;
                m_Ahats[m][3][2] = 1. / 2.;
                m_Ahats[m][3][3] = -1. / 6.;
                break;

            default:

                m_Ahats[m][0][0] = 1;

                for (size_t j = 2; j <= m; ++j)
                {
                    for (size_t i = 0; i < m; ++i)
                    {
                        m_Ahats[m][j - 1][i] = pow((1 - j), i);
                    }
                }

                ASSERTL1(false, "No matrix inverse.");

                // Future code: m_Ahats[m] = inv(m_Ahats[m]);

                break;
        }
    }

    // Mulitply the last Ahat array, transposed, by J
    m_AhattJ = SingleArray(m_order, 0.0);

    for (size_t i = 0; i < m_order; ++i)
    {
        for (size_t j = 0; j < m_order; ++j)
        {
            m_AhattJ[i] += m_Ahats[m_order][j][i] * m_J[j];
        }
    }

    m_integral_classes = Array<OneD, Instance>(m_Lmax);

    for (size_t l = 0; l < m_Lmax; ++l)
    {
        integralClassInitialize(l + 1, m_integral_classes[l]);
    }
}

/**
 * @brief Worker method that performs the time integration.
 */
ConstDoubleArray &FractionalInTimeIntegrationScheme::v_TimeIntegrate(
    const size_t timestep, const NekDouble delta_t)
{
    boost::ignore_unused(delta_t);

    ASSERTL1(delta_t == m_deltaT,
             "Delta T has changed which is not permitted.");

    // The Fractional in Time works via the logical? time step value.
    size_t timeStep = timestep + 1;

    // Update the storage and counters for integral classes.  Performs
    // staging for updating u.
    for (size_t l = 0; l < m_Lmax; ++l)
    {
        updateStage(timeStep, m_integral_classes[l]);
    }

    // Compute u update to time timeStep * m_deltaT.  Stored in
    // m_uNext.
    finalIncrement(timeStep);

    // Contributions to the current integral
    size_t L = computeTaus(m_base, timeStep);

    for (size_t l = 0; l < L; ++l)
    {
        // Integral contribution over [taus(i+1) taus(i)]. Stored in
        // m_uInt.
        integralContribution(timeStep, m_taus[l], m_integral_classes[l]);

        for (size_t i = 0; i < m_nvars; ++i)
        {
            for (size_t j = 0; j < m_npoints; ++j)
            {
                m_uNext[i][j] += m_uInt[i][j].real();
            }
        }
    }

    // Shuffle the previous solutions back one in the history.
    for (size_t m = m_order; m > 0; --m)
    {
        for (size_t i = 0; i < m_nvars; ++i)
        {
            for (size_t j = 0; j < m_npoints; ++j)
            {
                m_u[m][i][j] = m_u[m - 1][i][j];
            }
        }
    }

    // Get the current solution.
    for (size_t i = 0; i < m_nvars; ++i)
    {
        for (size_t j = 0; j < m_npoints; ++j)
        {
            m_u[0][i][j] = m_uNext[i][j] + m_u0[i][j];

            m_uNext[i][j] = 0; // Zero out for the next itereation.
        }
    }

    // Update the storage and counters for integral classes to
    // time timeStep * m_deltaT. Also time-steps the sandboxes and stashes.
    for (size_t i = 0; i < m_Lmax; ++i)
    {
        advanceSandbox(timeStep, m_integral_classes[i]);
    }

    return m_u[0];
}

/**
 * @brief Method that increments the counter then performs mod
 * calculation.
 */
size_t FractionalInTimeIntegrationScheme::modIncrement(const size_t counter,
                                                       const size_t base) const
{
    return (counter + 1) % base;
}

/**
 * @brief Method to compute the smallest integer L such that base < 2
 * * base^l.
 */
size_t FractionalInTimeIntegrationScheme::computeL(const size_t base,
                                                   const size_t l) const
{
    size_t L = ceil(log(l / 2.0) / log(base));

    if (l % (size_t)(2 * pow(base, L)) == 0)
    {
        ++L;
    }

    return L;
}

/**
 * @brief Method to compute the demarcation integers q_{m, ell}.
 *
 *  Returns a length-(L-1) vector qml such that h*taus are interval
 *  boundaries for a partition of [0, m h]. The value of h is not
 *  needed to compute this vector.
 */
size_t FractionalInTimeIntegrationScheme::computeQML(const size_t base,
                                                     const size_t m)
{
    size_t L = computeL(base, m);

    // m_qml is set in InitializeScheme to be the largest length expected.
    // qml = Array<OneD, size_t>( L-1, 0 );

    for (size_t i = 0; i < L - 1; ++i)
    {
        m_qml[i] = floor(m / pow(base, i + 1)) - 1;
    }

    return L;
}

/**
 * @brief Method to compute the demarcation interval marker tau_{m, ell}.
 *
 * Returns a length-(L+1) vector tauml such that h*taus are interval
 * boundaries for a partition of [0, m h]. The value of h is not
 * needed to compute this vector.
 */
size_t FractionalInTimeIntegrationScheme::computeTaus(const size_t base,
                                                      const size_t m)
{
    if (m == 1)
    {
        m_taus[0] = 0;

        return 0;
    }
    else
    {
        size_t L = computeQML(base, m);

        // m_taus is set in InitializeScheme to be the largest length
        // expected.

        m_taus[0] = m - 1;

        for (size_t i = 1; i < L; ++i)
        {
            m_taus[i] = m_qml[i - 1] * pow(base, i);
        }

        m_taus[L] = 0;

        return L;
    }
}

/**
 * @brief Method to compute the quadrature rule over Tablot contour
 *
 * Returns a quadrature rule over the Tablot contour defined by the
 * parameterization.
 *
 * gamma(th) = sigma + mu * ( th*cot(th) + i*nu*th ),  -pi < th < pi
 *
 * An N-point rule is returned, equidistant in the parameter theta. The
 * returned quadrature rule approximes an integral over the contour.
 */
void FractionalInTimeIntegrationScheme::talbotQuadrature(
    const size_t nQuadPts, const NekDouble mu, const NekDouble nu,
    const NekDouble sigma, ComplexSingleArray &lamb,
    ComplexSingleArray &w) const
{
    lamb = ComplexSingleArray(nQuadPts, 0.0);
    w    = ComplexSingleArray(nQuadPts, 0.0);

    for (size_t q = 0; q < nQuadPts; ++q)
    {
        NekDouble th =
            (NekDouble(q) + 0.5) / NekDouble(nQuadPts) * 2.0 * M_PI - M_PI;

        lamb[q] = sigma + mu * th * std::complex<NekDouble>(1. / tan(th), nu);

        w[q] = std::complex<NekDouble>(0, -1. / NekDouble(nQuadPts)) * mu *
               std::complex<NekDouble>(1. / tan(th) - th / (sin(th) * sin(th)),
                                       nu);
    }

    // Special case for th = 0 which happens when there is an odd
    // number of quadrature points.
    if (nQuadPts % 2 == 1)
    {
        size_t q = (nQuadPts + 1) / 2;

        lamb[q] = std::complex<NekDouble>(sigma + mu, 0);

        w[q] = std::complex<NekDouble>(nu * mu / nQuadPts, 0);
    }
}

/**
 * @brief Method to initialize the integral class
 */
void FractionalInTimeIntegrationScheme::integralClassInitialize(
    const size_t index, Instance &instance) const
{
    /**
     * /brief
     *
     * This object stores information for performing integration over
     * an interval [a, b]. (Defined by taus in the parent calling
     * function.)
     *
     * The "main" object stores information about [a,b]. In
     * particular, main.ind identifies [a,b] via multiples of h.
     *
     * Periodically the values of [a,b] need to be incremented. The
     * necessary background storage to accomplish this increment
     * depends whether a or b is being incremented.
     *
     * The objects with "f" ("Floor") modifiers are associated with
     * increments of the interval floor a.
     *
     * The objects with "c" ("Ceiling") modifiers are associated with
     * increments of the interval ceiling b.
     *
     * Items on the "stage" are stored for use in computing u at the
     * current time.  Items in the "stash" are stored for use for
     * future staging. Items in the "sandbox" are being actively
     * updated at the current time for future stashing. Only items in
     * the sandbox are time-stepped. the stage and stash locations are
     * for storage only.
     *
     * This is the same for all integral classes, so there's probably
     * a better way to engineer this. And technically, all that's
     * needed is the array K(instance.z) anyway.
     */

    instance.base          = m_base;
    instance.index         = index; // Index of this instance
    instance.active        = false; // Used to determine if active
    instance.activecounter = 0;     // Counter used to flip active bit
    instance.activebase    = 2. * pow(m_base, (index - 1));

    // Storage for values of y currently used to update u
    instance.stage_y    = ComplexTripleArray(m_nvars);
    instance.cstash_y   = ComplexTripleArray(m_nvars);
    instance.csandbox_y = ComplexTripleArray(m_nvars);
    instance.fstash_y   = ComplexTripleArray(m_nvars);
    instance.fsandbox_y = ComplexTripleArray(m_nvars);

    for (size_t q = 0; q < m_nvars; ++q)
    {
        instance.stage_y[q]    = ComplexDoubleArray(m_npoints);
        instance.cstash_y[q]   = ComplexDoubleArray(m_npoints);
        instance.csandbox_y[q] = ComplexDoubleArray(m_npoints);
        instance.fstash_y[q]   = ComplexDoubleArray(m_npoints);
        instance.fsandbox_y[q] = ComplexDoubleArray(m_npoints);

        for (size_t i = 0; i < m_npoints; ++i)
        {
            instance.stage_y[q][i]    = ComplexSingleArray(m_nQuadPts, 0.0);
            instance.cstash_y[q][i]   = ComplexSingleArray(m_nQuadPts, 0.0);
            instance.csandbox_y[q][i] = ComplexSingleArray(m_nQuadPts, 0.0);
            instance.fstash_y[q][i]   = ComplexSingleArray(m_nQuadPts, 0.0);
            instance.fsandbox_y[q][i] = ComplexSingleArray(m_nQuadPts, 0.0);
        }
    }

    // Major storage for auxilliary ODE solutions.
    instance.stage_ind =
        std::pair<size_t, size_t>(0, 0); // Time-step counters
                                         // indicating the interval
                                         // ymain is associated with

    // Staging allocation
    instance.stage_active   = false;
    instance.stage_ccounter = 0;
    instance.stage_cbase    = pow(m_base, index - 1); // This base is halved
                                                      // after the first cycle
    instance.stage_fcounter = 0;
    instance.stage_fbase    = pow(m_base, index); // This base is halved
                                                  // after the first cycle

    // Ceiling stash allocation
    instance.cstash_counter = 0; // Counter used to determine
                                 // when to stash

    instance.cstash_base = pow(m_base, index - 1); // base for counter ind(1)
    instance.cstash_ind =
        std::pair<size_t, size_t>(0, 0); // is never used: it always
                                         // matches main.ind(1)

    // Ceiling sandbox allocation
    instance.csandbox_active = false; // Flag to determine when stash 2
                                      // is utilized
    instance.csandbox_counter = 0;
    instance.csandbox_ind     = std::pair<size_t, size_t>(0, 0);

    // Floor stash
    instance.fstash_base = 2 * pow(m_base, index);
    instance.fstash_ind  = std::pair<size_t, size_t>(0, 0);

    // Floor sandbox
    instance.fsandbox_active         = false;
    instance.fsandbox_activebase     = pow(m_base, index);
    instance.fsandbox_stashincrement = (m_base - 1) * pow(m_base, index - 1);
    instance.fsandbox_ind            = std::pair<size_t, size_t>(0, 0);

    // Defining parameters of the Talbot contour quadrature rule
    NekDouble Tl =
        m_deltaT * (2. * pow(m_base, index) - 1. - pow(m_base, index - 1));
    NekDouble mu = m_mu0 / Tl;

    // Talbot quadrature rule
    talbotQuadrature(m_nQuadPts, mu, m_nu, m_sigma, instance.z, instance.w);

    /**
     * /brief
     *
     * With sigma == 0, the dependence of z and w on index is just a
     * multiplicative scaling factor (mu). So technically we'll only
     * need one instance of this N-point rule and can scale it
     * accordingly inside each integral_class instance. Not sure if
     * this optimization is worth it. Cumulative memory savings would
     * only be about 4*N*Lmax floats.

     * Below: precomputation for time integration of auxiliary
     * variables.  Everything below here is independent of the
     * instance index index. Therefore, we could actually just
     * generate and store one copy of this stuff and use it
     * everywhere.
     */

    // 'As' array - one for each order.
    TripleArray &As = instance.As;

    As = TripleArray(m_order + 2);

    for (size_t m = 1; m <= m_order + 1; ++m)
    {
        As[m] = DoubleArray(m);

        for (size_t n = 0; n < m; ++n)
        {
            As[m][n] = SingleArray(m, 0.0);
        }

        switch (m)
        {
            case 1:
                As[m][0][0] = 1.;
                break;

            case 2:
                As[m][0][0] = 0.;
                As[m][0][1] = 1.;
                As[m][1][0] = 1.;
                As[m][1][1] = -1.;
                break;

            case 3:
                As[m][0][0] = 0.;
                As[m][0][1] = 1.;
                As[m][0][2] = 0;
                As[m][1][0] = 1. / 2.;
                As[m][1][1] = 0.;
                As[m][1][2] = -1. / 2.;
                As[m][2][0] = 1. / 2.;
                As[m][2][1] = -1.;
                As[m][2][2] = 1. / 2.;
                break;

            case 4:
                As[m][0][0] = 0.;
                As[m][0][1] = 1.;
                As[m][0][2] = 0.;
                As[m][0][3] = 0.;

                As[m][1][0] = 1. / 3.;
                As[m][1][1] = 1. / 2.;
                As[m][1][2] = -1.;
                As[m][1][3] = 1. / 6.;

                As[m][2][0] = 1. / 2.;
                As[m][2][1] = -1.;
                As[m][2][2] = 1. / 2.;
                As[m][2][3] = 0.;

                As[m][3][0] = 1. / 6.;
                As[m][3][1] = -1. / 2.;
                As[m][3][2] = 1. / 2.;
                As[m][3][3] = -1. / 6.;
                break;

            case 5:
                As[m][0][0] = 0.;
                As[m][0][1] = 1.;
                As[m][0][2] = 0.;
                As[m][0][3] = 0.;
                As[m][0][4] = 0.;

                As[m][1][0] = 1. / 4.;
                As[m][1][1] = 5. / 6.;
                As[m][1][2] = -3. / 2.;
                As[m][1][3] = 1. / 2.;
                As[m][1][4] = -1. / 12.;

                As[m][2][0] = 11. / 24.;
                As[m][2][1] = -5. / 6.;
                As[m][2][2] = 1. / 4.;
                As[m][2][3] = 1. / 6.;
                As[m][2][4] = -1. / 24.;

                As[m][3][0] = 1. / 4.;
                As[m][3][1] = -5. / 6.;
                As[m][3][2] = 1.;
                As[m][3][3] = -1. / 2.;
                As[m][3][4] = 1. / 12.;

                As[m][4][0] = 1. / 24.;
                As[m][4][1] = -1. / 6.;
                As[m][4][2] = 1. / 4.;
                As[m][4][3] = -1. / 6.;
                As[m][4][4] = 1. / 24.;
                break;

                // The default is a general formula, but the matrix inversion
                // involved is ill-conditioned, so the special cases above are
                // epxlicitly given to combat roundoff error in the most-used
                // scenarios.
            default:
                ASSERTL1(false, "No matrix inverse.");
                break;
        }
    }

    // Initialize the exponenetial integrators.
    instance.E = ComplexSingleArray(m_nQuadPts, 0.0);

    for (size_t q = 0; q < m_nQuadPts; ++q)
    {
        instance.E[q] = exp(instance.z[q] * m_deltaT);
    }

    instance.Eh = ComplexDoubleArray(m_order + 1);

    for (size_t m = 0; m < m_order + 1; ++m)
    {
        instance.Eh[m] = ComplexSingleArray(m_nQuadPts, 0.0);

        for (size_t q = 0; q < m_nQuadPts; ++q)
        {
            if (m == 0)
                instance.Eh[0][q] =
                    1. / instance.z[q] * (exp(instance.z[q] * m_deltaT) - 1.0);
            else
                instance.Eh[m][q] = -1. / instance.z[q] +
                                    NekDouble(m) / (instance.z[q] * m_deltaT) *
                                        instance.Eh[m - 1][q];
        }
    }

    // 'AtEh' is set for the primary order. If a lower order method is
    // needed for initializing it will be changed in time_advance then
    // restored.
    instance.AtEh = ComplexDoubleArray(m_order + 1);

    for (size_t m = 0; m <= m_order; ++m)
    {
        instance.AtEh[m] = ComplexSingleArray(m_nQuadPts, 0.0);

        for (size_t q = 0; q < m_nQuadPts; ++q)
        {
            for (size_t i = 0; i <= m_order; ++i)
            {
                instance.AtEh[m][q] +=
                    instance.As[m_order + 1][m][i] * instance.Eh[i][q];
            }
        }
    }
}

/**
 * @brief Method to rearrange of staging/stashing for current time
 *
 * (1) activates ceiling staging
 * (2) moves ceiling stash ---> stage
 * (3) moves floor stash --> stage (+ updates all ceiling data)
 */
void FractionalInTimeIntegrationScheme::updateStage(const size_t timeStep,
                                                    Instance &instance)
{
    // Counter to flip active bit
    if (!instance.active)
    {
        instance.active = (timeStep % instance.activebase == 0);
    }

    // Determine if staging is necessary
    if (instance.active)
    {
        // Floor staging superscedes ceiling staging
        if (timeStep % instance.fstash_base == 0)
        {
            // Here a swap of the contents can be done because values
            // will copied into the stash and the f sandbox values will
            // cleared next.
            std::swap(instance.stage_y, instance.fstash_y);
            instance.stage_ind = instance.fstash_ind;

            std::swap(instance.csandbox_y, instance.fsandbox_y);
            instance.csandbox_ind = instance.fsandbox_ind;

            // After floor staging happens once, new base is base^index
            instance.fstash_base = pow(instance.base, instance.index);

            // Restart floor sandbox
            instance.fsandbox_ind    = std::pair<size_t, size_t>(0, 0);
            instance.fsandbox_active = false;

            // Clear the floor sandbox values.
            for (size_t i = 0; i < m_nvars; ++i)
            {
                for (size_t j = 0; j < m_npoints; ++j)
                {
                    for (size_t q = 0; q < m_nQuadPts; ++q)
                    {
                        instance.fsandbox_y[i][j][q] = 0;
                    }
                }
            }
        }

        // Check for ceiling staging
        else if (timeStep % instance.stage_cbase == 0)
        {
            instance.stage_ind = instance.cstash_ind;

            // A swap of the contents can be done because values will
            // copied into the stash.
            std::swap(instance.stage_y, instance.cstash_y);
        }
    }
}

/**
 * @brief Method to approximate the integral
 *
 *   \int_{(m-1) h}^{m h} k(m*h -s) f(u, s) dx{s}
 *
 * Using a time-stepping scheme of a particular order. Here, k depends
 * on alpha, the derivative order.
 */
void FractionalInTimeIntegrationScheme::finalIncrement(const size_t timeStep)
{
    // Note: m_uNext is initialized to zero and then reset to zero
    // after it is used to update the current solution in TimeIntegrate.
    for (size_t m = 0; m < m_order; ++m)
    {
        m_op.DoOdeRhs(m_u[m], m_F, m_deltaT * (timeStep - m));

        for (size_t i = 0; i < m_nvars; ++i)
        {
            for (size_t j = 0; j < m_npoints; ++j)
            {
                m_uNext[i][j] += m_F[i][j] * m_AhattJ[m];
            }
        }
    }
}

/**
 * @brief Method to get the integral contribution over [taus(i+1)
 * taus(i)]. Stored in m_uInt.
 */
void FractionalInTimeIntegrationScheme::integralContribution(
    const size_t timeStep, const size_t tauml, const Instance &instance)
{
    // Assume y has already been updated to time level m
    for (size_t q = 0; q < m_nQuadPts; ++q)
    {
        m_expFactor[q] =
            exp(instance.z[q] * m_deltaT * NekDouble(timeStep - tauml)) *
            pow(instance.z[q], -m_alpha) * instance.w[q];
    }

    for (size_t i = 0; i < m_nvars; ++i)
    {
        for (size_t j = 0; j < m_npoints; ++j)
        {
            m_uInt[i][j] = 0;

            for (size_t q = 0; q < m_nQuadPts; ++q)
            {
                m_uInt[i][j] += instance.stage_y[i][j][q] * m_expFactor[q];
            }

            if (m_uInt[i][j].real() < 1e8)
            {
                m_uInt[i][j] = m_uInt[i][j].real();
            }
        }
    }
}

/**
 * @brief Method to get the solution to y' = z*y + f(u), using an
 * exponential integrator with implicit order (m_order + 1)
 * interpolation of the f(u) term.
 */
void FractionalInTimeIntegrationScheme::timeAdvance(const size_t timeStep,
                                                    Instance &instance,
                                                    ComplexTripleArray &y)
{
    size_t order;

    // Try automated high-order method.
    if (timeStep <= m_order)
    {
        // Not enough history. For now, demote to lower-order method.
        // TODO: use multi-stage method.
        order = timeStep;

        // Prep for the time step.
        for (size_t m = 0; m <= order; ++m)
        {
            for (size_t q = 0; q < m_nQuadPts; ++q)
            {
                instance.AtEh[m][q] = 0;

                for (size_t i = 0; i <= order; ++i)
                {
                    instance.AtEh[m][q] +=
                        instance.As[order + 1][m][i] * instance.Eh[i][q];
                }
            }
        }
    }
    else
    {
        order = m_order;
    }

    // y = y * instance.E + F * instance.AtEh;
    for (size_t m = 0; m <= order; ++m)
    {
        m_op.DoOdeRhs(m_u[m], m_F, m_deltaT * (timeStep - m));

        for (size_t i = 0; i < m_nvars; ++i)
        {
            for (size_t j = 0; j < m_npoints; ++j)
            {
                for (size_t q = 0; q < m_nQuadPts; ++q)
                {
                    // y * instance.E
                    if (m == 0)
                        y[i][j][q] *= instance.E[q];

                    // F * instance.AtEh
                    y[i][j][q] += m_F[i][j] * instance.AtEh[m][q];
                }
            }
        }
    }
}

/**
 * @brief Method to update sandboxes to the current time.
 *
 * (1) advances ceiling sandbox
 * (2) moves ceiling sandbox ---> stash
 * (3) activates floor sandboxing
 * (4) advances floor sandbox
 * (5) moves floor sandbox ---> stash
 */
void FractionalInTimeIntegrationScheme::advanceSandbox(const size_t timeStep,
                                                       Instance &instance)
{
    // (1)
    // update(instance.csandbox.y)
    timeAdvance(timeStep, instance, instance.csandbox_y);
    instance.csandbox_ind.second = timeStep;

    // (2)
    // Determine if ceiling stashing is necessary
    instance.cstash_counter =
        modIncrement(instance.cstash_counter, instance.cstash_base);

    if (timeStep % instance.cstash_base == 0)
    {
        // Then need to stash
        // instance.cstash_y   = instance.csandbox_y;
        instance.cstash_ind = instance.csandbox_ind;

        // Stash the c sandbox value. This step has to be a deep copy
        // because the values in the sandbox are still needed for the
        // time advance.
        for (size_t i = 0; i < m_nvars; ++i)
        {
            for (size_t j = 0; j < m_npoints; ++j)
            {
                for (size_t q = 0; q < m_nQuadPts; ++q)
                {
                    instance.cstash_y[i][j][q] = instance.csandbox_y[i][j][q];
                }
            }
        }
    }

    if (instance.fsandbox_active)
    {
        // (4)
        timeAdvance(timeStep, instance, instance.fsandbox_y);

        instance.fsandbox_ind.second = timeStep;

        // (5) Move floor sandbox to stash
        if ((instance.fsandbox_ind.second - instance.fsandbox_ind.first) %
                instance.fsandbox_stashincrement ==
            0)
        {
            // instance.fstash_y   = instance.fsandbox_y;
            instance.fstash_ind = instance.fsandbox_ind;

            // Stash the f sandbox values. This step has to be a deep
            // copy because the values in the sandbox are still needed
            // for the time advance.
            for (size_t i = 0; i < m_nvars; ++i)
            {
                for (size_t j = 0; j < m_npoints; ++j)
                {
                    for (size_t q = 0; q < m_nQuadPts; ++q)
                    {
                        instance.fstash_y[i][j][q] =
                            instance.fsandbox_y[i][j][q];
                    }
                }
            }
        }
    }
    else // Determine if advancing floor sandbox is necessary at next time
    {
        // (3)
        if (timeStep % instance.fsandbox_activebase == 0)
        {
            instance.fsandbox_active = true;
            instance.fsandbox_ind =
                std::pair<size_t, size_t>(timeStep, timeStep);
        }
    }
}

/**
 * @brief Worker method to print details on the integration scheme
 */
void FractionalInTimeIntegrationScheme::v_print(std::ostream &os) const
{
    os << "Time Integration Scheme: " << GetFullName() << std::endl
       << "       Alpha " << m_alpha << std::endl
       << "       Base " << m_base << std::endl
       << "       Number of instances " << m_Lmax << std::endl
       << "       Number of quadature points " << m_nQuadPts << std::endl
       << "             Talbot Parameter: sigma " << m_sigma << std::endl
       << "             Talbot Parameter: mu0 " << m_mu0 << std::endl
       << "             Talbot Parameter: nu " << m_nu << std::endl;
}

void FractionalInTimeIntegrationScheme::v_printFull(std::ostream &os) const
{
    os << "Time Integration Scheme: " << GetFullName() << std::endl
       << "       Alpha " << m_alpha << std::endl
       << "       Base " << m_base << std::endl
       << "       Number of instances " << m_Lmax << std::endl
       << "       Number of quadature points " << m_nQuadPts << std::endl
       << "             Talbot Parameter: sigma " << m_sigma << std::endl
       << "             Talbot Parameter: mu0 " << m_mu0 << std::endl
       << "             Talbot Parameter: nu " << m_nu << std::endl;
}

// Friend Operators
std::ostream &operator<<(std::ostream &os,
                         const FractionalInTimeIntegrationScheme &rhs)
{
    rhs.print(os);

    return os;
}

std::ostream &operator<<(std::ostream &os,
                         const FractionalInTimeIntegrationSchemeSharedPtr &rhs)
{
    os << *rhs.get();

    return os;
}

} // end namespace LibUtilities
} // namespace Nektar
