/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.defaults.function;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.apache.commons.math3.stat.descriptive.rank.Percentile;

import nz.org.riskscape.engine.ArgsProblems;
import nz.org.riskscape.engine.bind.ParameterField;
import nz.org.riskscape.engine.function.ArgumentList;
import nz.org.riskscape.engine.function.BaseRealizableFunction;
import nz.org.riskscape.engine.function.FunctionArgument;
import nz.org.riskscape.engine.function.RiskscapeFunction;
import nz.org.riskscape.engine.rl.RealizationContext;
import nz.org.riskscape.engine.types.Integer;
import nz.org.riskscape.engine.types.RSList;
import nz.org.riskscape.engine.types.Type;
import nz.org.riskscape.engine.types.Types;
import nz.org.riskscape.engine.util.FunctionCallOptions;
import nz.org.riskscape.problem.ProblemException;
import nz.org.riskscape.problem.ResultOrProblems;
import nz.org.riskscape.rl.ast.FunctionCall;

/**
 * Calculates loss metrics such as the Probable Maximum Loss (PML), Occurrence
 * Exceedance Probability (OEP), and Aggregate Exceedance Probability (AEP).
 * These loss metrics are all calculated in a similar way: return the nth
 * largest loss out of the list. E.g. a 500-year RP for a 10,000 year
 * investigation period will be the 20th largest loss (i.e. it's a 20-in-10000-year
 * event or 10000 / 20 = 500 RP). The difference between PML, AEP, and OEP is
 * how the list of losses gets aggregated. PML has no aggregation by year, AEP
 * is the sum losses per year, and OEP is the max loss per year.
 */
public class LossesByPeriod extends BaseRealizableFunction {

  public enum Mode {
    RANKED_CLOSEST,
    PERCENTILE
  }

  public static class Options {
    @ParameterField
    Mode mode = Mode.PERCENTILE;
  }

  @FunctionalInterface
  interface LossFunction {

    /**
     * return the loss for the given return period
     */
    Number sample(double returnPeriod);
  }

  @FunctionalInterface
  interface LossFunctionBuilder {

    /**
     * Build a LossFunction for the given losses and investigation time.
     */
    LossFunction build(List<Number> losses, long investigationTime);
  }

  public LossesByPeriod() {
    super(ArgumentList.fromArray(
        new FunctionArgument("losses",   RSList.create(Types.FLOATING)),
        new FunctionArgument("return-periods",  RSList.create(Types.FLOATING)),
        new FunctionArgument("investigation-time", Types.INTEGER),
        FunctionCallOptions.options(Options.class)
      ),
      RSList.create(Types.FLOATING));
  }

  @Override
  public ResultOrProblems<RiskscapeFunction> realize(RealizationContext context, FunctionCall fc,
      List<Type> givenTypes) {

    return ProblemException.catching(() -> {
      // sanity-check args
      if (givenTypes.size() > arguments.size() || givenTypes.size() < 3) {
        throw new ProblemException(ArgsProblems.get().wrongNumberRange(3, arguments.size(),
            givenTypes.size()));
      }
      RSList lossesType = arguments.getRequiredAs(givenTypes, 0, RSList.class).getOrThrow();
      RSList periodsType = arguments.getRequiredAs(givenTypes, 1, RSList.class).getOrThrow();

      if (!lossesType.getContainedType().isNumeric()) {
        throw new ProblemException(ArgsProblems.mismatch(arguments.get(0), givenTypes.get(0)));
      }
      if (!periodsType.getContainedType().isNumeric()) {
        throw new ProblemException(ArgsProblems.mismatch(arguments.get(1), givenTypes.get(1)));
      }
      arguments.getRequiredAs(givenTypes, 2, Integer.class).getOrThrow();

      Options options = FunctionCallOptions.bindOptionsOrThrow(Options.class, context, arguments, fc);

      LossFunctionBuilder builder;
      RSList theReturnType;
      if (options.mode == Mode.RANKED_CLOSEST) {
        // ranked closes will return the losses value, whatever they are
        theReturnType = lossesType;
        builder = closestRankedBuilder();
      } else {
        // interpolation will always result in floating losses
        theReturnType = RSList.create(Types.FLOATING);
        builder = percentileBuilder();
      }

      return RiskscapeFunction.create(this, givenTypes, theReturnType, (args) -> {
        @SuppressWarnings("unchecked")
        List<Number> losses = new ArrayList<>((List<Number>) args.get(0));
        @SuppressWarnings("unchecked")
        List<Number> requestedPeriods = (List<Number>) args.get(1);
        long investigationTime = (Long) args.get(2);

        LossFunction lossFunction = builder.build(losses, investigationTime);

        List<Object> results = new ArrayList<>(requestedPeriods.size());

        // now find the event losses that correspond to the requested RPs
        for (Number rp : requestedPeriods) {
          if (rp.longValue() > investigationTime) {
            // TODO make this a proper i18n error or return NaN in this case?
            throw new IllegalArgumentException(
                "Return period " + rp + " exceeds investigation-time " + investigationTime);
          }

          Number loss = lossFunction.sample(rp.doubleValue());

          // we return zero if the requested loss is not present, so make sure
          // everything is in the same/expected type
          results.add(theReturnType.getContainedType().coerce(loss));
        }
        return results;
      });
    });
  }

  LossFunctionBuilder closestRankedBuilder() {
    return (losses, investigationTime) -> {
      // make sure the largest losses/biggest events come first
      losses.sort(Collections.reverseOrder());

      // calculate the RP that corresponds to each event loss we have
      double[] eventRPs = calculateReturnPeriodsForEvents(investigationTime, losses.size());

      // and return the loss function
      return (rp) -> findLossClosestToReturnPeriod(rp, eventRPs, losses);
    };
  }

  /**
   * For a given RP, we find the event with the corresponding RP and return its
   * loss. E.g. if you want the 2,500-year RP for investigation-time=10,000, this
   * will return the 4th largest loss (i.e. a 4 in 10,000 or 10,000 / 4 = 2,500
   * event). Note that we find the closest event, rather than doing interpolation.
   * E.g. if you ask for RP=2475, you will get the RP=2500 event.
   */
  // package scope for testing
  Number findLossClosestToReturnPeriod(double rp, double[] periods, List<? extends Number> losses) {
    // if we don't have enough events for the requested RP, return zero
    if (rp < periods[periods.length - 1]) {
      return 0;
    }

    // find the return period that's closest to what was requested
    int index = 0;
    double closest = Double.MAX_VALUE;
    for (int i = 0; i < periods.length; i++) {
      double diff = Math.abs(rp - periods[i]);
      if (diff < closest) {
        closest = diff;
        index = i;
      }
    }
    // return the loss that corresponds to that RP/index
    return losses.get(index);
  }

  LossFunctionBuilder percentileBuilder() {
    return (losses, investigationTime) -> {
      // we build the lossPercentile up front for the given losses
      Percentile lossPercentile = buildPercentile(losses);

      // and return the loss function
      return (rp) -> percentileLossForReturnPeriod(rp, lossPercentile);
    };
  }

  /**
   * Build a percentile that has it's data set with the losses.
   */
  // package scope for testing
  Percentile buildPercentile(List<Number> losses) {
    Percentile lossPercentile = new Percentile();
    // The method R_7 implements Microsoft Excel style computation
    lossPercentile = lossPercentile.withEstimationType(Percentile.EstimationType.R_7);

    double[] l = losses.stream()
        .mapToDouble(Number::doubleValue)
        .toArray();

    lossPercentile.setData(l);
    return lossPercentile;
  }

  /**
   * Converts the given rp to a percentile that is then used to extract a loss lossPercentile.
   */
  // package scope for testing
  Number percentileLossForReturnPeriod(double rp, Percentile lossPercentile) {
    // convert return period to a percentile.
    // NB we are going to come back to this to confirm it's valid in GL#1153
    double rpAsPercentile = (1D - (1D / rp)) * 100;

    if (rpAsPercentile == 0D) {
      // don't evaluate the percentile with zero, that upsets it. just return zero.
      return 0D;
    }
    // now sample our percentile distribution to get an interpolated loss
    return lossPercentile.evaluate(rpAsPercentile);
  }

  /**
   * Calculates a RP that corresponds to each event loss that we have. E.g. for a
   * 10,000 year period, the largest event is a 10,000-year event, the second
   * biggest is a 2 in 10,000-year event (5,000-year RP), the third is 3 in 10,000
   * (3,333-year RP), and so on.
   */
  // package scope for testing
  double[] calculateReturnPeriodsForEvents(long investigationTime, int numEvents) {
    double[] periods = new double[numEvents];
    for (int n = 0; n < numEvents; n++) {
      // n should be one-based when calculating the return period
      // E.g. the 2nd largest event in a 10,000 period is 10,000 / 2 == 5,000
      periods[n] = investigationTime / (double) (n + 1);
    }
    return periods;
  }
}
