/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.cli.tests;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.util.Collections;
import java.util.List;

import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import nz.org.riskscape.engine.RowMatchers;
import nz.org.riskscape.engine.function.ArgumentList;
import nz.org.riskscape.engine.function.BaseRealizableFunction;
import nz.org.riskscape.engine.function.IdentifiedFunction.Category;
import nz.org.riskscape.engine.function.RiskscapeFunction;
import nz.org.riskscape.engine.rl.RealizationContext;
import nz.org.riskscape.engine.test.EngineTestSettings;
import nz.org.riskscape.engine.types.RSList;
import nz.org.riskscape.engine.types.Type;
import nz.org.riskscape.engine.types.Types;
import nz.org.riskscape.problem.ResultOrProblems;
import nz.org.riskscape.rl.ast.FunctionCall;

/**
 * Covers off simple cases of using the core pipeline language and execution behaviour
 *
 * * selecting and filtering
 * * groupings
 * * joins
 * * un-nesting
 * * sorting
 * * save step naming behaviour
 */
@EngineTestSettings("pipeline-threads=6")
public class PipelineBasicsTest extends BaseModelRunCommandTest {

  public static class Sort extends BaseRealizableFunction {

    public Sort() {
      super(ArgumentList.create("list", RSList.LIST_ANYTHING), Types.ANYTHING);
    }

    @SuppressWarnings({"unchecked", "rawtypes"})
    @Override
    public ResultOrProblems<RiskscapeFunction> realize(RealizationContext context, FunctionCall functionCall,
        List<Type> argumentTypes) {
      return ResultOrProblems.of(RiskscapeFunction.create(this, argumentTypes, argumentTypes.get(0), (args) -> {
        List list = (List<?>) args.get(0);
        Collections.sort(list);
        return list;
      }));
    }

  }

  String pipeline;
  List<List<String>> rows;

  @Before
  public void addSortFunction() {
    project.getFunctionSet().add(new Sort().builtin("sort", Category.MISC));
  }

  @Test
  public void canExecuteASimpleLinearPipeline() throws Exception {
    pipeline = ""
        + "input('animals') "
        + "-> "
        + "select({\"Species Common Name\" as name, \"Overall MLE\" as life_expectancy})"
        + "->"
        + "filter(starts_with(name, 'Bear'))"
        + "->"
        + "save(name: 'bears')";

    evaluate();

    rows = openCsv("bears.csv", "name", "life_expectancy");

    assertThat(
      rows,
      containsInAnyOrder(
        contains("Bear, Andean Spectacled", "26.1"),
        contains("Bear, Asiatic Black", "25"),
        contains("Bear, Brown", "24.3"),
        contains("Bear, Polar", "22.7"),
        contains("Bear, Sloth", "17.8"),
        contains("Bear, Sun", "22.6")
      )
    );
  }

  @Test
  public void canProduceManyOutputsFromASingleInput() throws Exception {
    pipeline = ""
        + "input('animals') "
        + "-> "
        + "select({"
        + "   \"Species Common Name\" as name, "
        + "   TaxonClass as class "
        + "}) as all_species"
        + " ->"
        + "save() " // no name, should come from previous step
        + ""
        + "all_species "
        + "->"
        + "filter(class = 'Mammalia') as mammals "
        + "-> "
        + "select(name) " // no braces required for a single attribute
        + "->"
        + "save(name: 'mammals') "
        + ""
        + "all_species "
        + "->"
        + "filter(class = 'Reptilia') "
        + "-> "
        + "select(name) as reptiles "// nothing comes after this, will be saved using step name
        + ""
        + "mammals "
        + "-> "
        + "filter(starts_with(name, 'Monkey'))"; // no alias, name will be filter_1 or something naff like that

    evaluate();

    rows = openCsv("all_species.csv", "name", "class");
    assertThat(rows, hasSize(330));

    rows = openCsv("mammals.csv", "name");
    assertThat(rows, hasSize(175));

    rows = openCsv("reptiles.csv", "name");
    assertThat(rows, hasSize(21));

    rows = openCsv("filter_1.csv", "name", "class");
    assertThat(rows,
      containsInAnyOrder(
        contains("Monkey, Bolivian Gray Titi", "Mammalia"),
        contains("Monkey, Central American Spider (Geoffroy's)", "Mammalia"),
        contains("Monkey, Common Squirrel", "Mammalia"),
        contains("Monkey, DeBrazza's", "Mammalia"),
        contains("Monkey, Patas", "Mammalia"),
        contains("Monkey, Robust Black Spider", "Mammalia"),
        contains("Monkey, Schmidt�s Red-tailed", "Mammalia"),
        contains("Monkey, Southern Black Howler", "Mammalia")
      )
    );
  }

  @Test
  public void canAggregateData() throws Exception {
    pipeline = ""
        + "input('animals') "
        + "-> "
        + "select({"
        + "   TaxonClass as class,"
        + "   \"Overall MLE\" as life_expectancy "
        + "}) as all_species"
        + "->"
        + "select({class, if_then_else(life_expectancy = '', null_of('text'), life_expectancy) as life_expectancy}) "
        + "-> "
        + "select({class, float(life_expectancy) as life_expectancy}) as le_float"
        + "-> "
        + "group(by: class, select: {class, mean(life_expectancy)}) as average_class "
        + ""
        + "le_float "
        + "-> "
        + "group(select: {mean(life_expectancy) as avg_le}) as average_all ";

    evaluate();

    rows = openCsv("average_class.csv", "class", "mean_life_expectancy");

    assertThat(
      rows,
      containsInAnyOrder(
        contains(equalTo("Mammalia"), RowMatchers.numberWithDelta(14.711, 0.001)),
        contains(equalTo("Arachnida"), equalTo("")),
        contains(equalTo("Reptilia"), RowMatchers.numberWithDelta(11.628, 0.001)),
        contains(equalTo("Amphibia"), RowMatchers.numberWithDelta(5.366, 0.001)),
        contains(equalTo("Chondrichthyes"), RowMatchers.numberWithDelta(16.4, 0.001)),
        contains(equalTo("Aves"), RowMatchers.numberWithDelta(12.202, 0.001))
      )
    );

    rows = openCsv("average_all.csv", "avg_le");

    assertThat(rows, contains(contains(RowMatchers.numberWithDelta(13.404, 0.001))));
  }

  @Test @Ignore("there's a bug in cycle detection")
  public void canForkAndThenJoin() throws Exception {
    pipeline = ""
        + "input('animals') "
        + "-> "
        + "select({"
        + "   TaxonClass as class,"
        + "   \"Overall MLE\" as life_expectancy "
        + "}) as all_species"
        + "->"
        + "select({class, if_then_else(life_expectancy = '', null_of('text'), life_expectancy) as life_expectancy}) "
        + "-> "
        + "select({class, float(life_expectancy) as life_expectancy}) as le_float"
        + "-> "
        + "group(by: class, select: {{class, mean(life_expectancy) as life_expectancy} as average}) "
        + "-> "
        + "join(on: average.class = class).rhs"
        + "->"
        + "select({class, life_expectancy - average.life_expectancy as sigma}) "
        + ""
        + "le_float -> select(*) -> join.lhs";
    // compute stddev manually by forking and joining - can cause deadlocks on larger datasets, but works OK for small
    // ones

    evaluate();

    rows = openCsv("average_class.csv", "class", "sigma");
  }

  @Test
  public void unnestAndThenJoinForSeriesGeneration() throws Exception {
    pipeline = ""
        + "input(value: range(0, 40)) "
        + "-> "
        + "unnest(value) "
        + "-> "
        + "join.lhs "
        + ""
        + "input('animals') "
        + "-> "
        + "select({"
        + "   \"Species Common Name\" as name, "
        + "   \"Overall MLE\" as life_expectancy "
        + "}) as all_species"
        + "->"
        + "select({name, if_then_else(life_expectancy = '', null_of('text'), life_expectancy) as life_expectancy}) "
        + "-> "
        + "select({name, float(life_expectancy) as life_expectancy}) "
        + "->"
        + "join(on: life_expectancy >= value && life_expectancy < (value + 1), join-type: 'left_outer').rhs "
        + "-> "
        + "group("
        + "  by: value, "
        + "  select: {value as age_bucket, count(life_expectancy) as count, sort(to_list(name)) as species}"
        + ")"
        + "->"
        + "sort(by: age_bucket)"
        + "";

    evaluate();

    rows = openCsv("sort.csv", "age_bucket", "count", "species");

    // we did a left join, so there should be a row for each value in the range
    assertThat(rows, hasSize(40));

    // just check a few rows
    assertThat(rows,
      hasItems(
        contains("0", "0", "[null]"),
        contains("2", "2", "[Gecko, Mossy Leaf-tailed, Gecko, Satanic Leaf-tailed]"),
        contains("19", "4", "[Anteater, Giant, Bat, Straw-Colored Fruit, Lemur, Red Ruffed, Tapir, Baird's "
            + "(Central American)]")
      )
    );
  }

  @Test
  public void canDoBasicLeftOuterJoin() throws Exception {
    // this sanity-checks the join's named inputs are processed in the correct order
    pipeline = """
        input(value: range(0, 78))
        ->
        unnest(value)
        ->
        join_sample_size.rhs

        input('animals')
        ->
        select({ *, "Overall Sample Size" as sample_size })
        ->
        select({ *, if_then_else(sample_size = '', maxint(), () -> int(sample_size)) as sample_size })
        ->
        filter(sample_size < 80)
        ->
        join(on: sample_size = value, join-type: 'left_outer') as join_sample_size
        ->
        # NB: this will blow up if the LHS ends up null
        select({ "Species Common Name" as name, if_null(value, 0 - sample_size) as value })
        ->
        sort(value) as cool_animals_bro
        """;

    evaluate();

    rows = openCsv("cool_animals_bro.csv", "name", "value");

    // 2 rows matched, plus another 2 unmatched rows on LHS
    // (if join LHS/RHS got flipped, we'd end up with 78 rows)
    assertThat(rows, hasSize(4));

    // NB: negative values are the unmatched LHS
    assertThat(rows, hasItems(
        contains("Duiker, Black", "-79"),
        contains("Buttonquail, Madagascar", "-78"),
        contains("Quoll, Tiger", "50"),
        contains("Seal, Grey", "77")
    ));
  }

  private void evaluate() {
    evalCommand.pipelineFile = pipeline;
    System.err.println(pipeline);
    assertNull(evalCommand.run());

    assertThat(this.collectedSinkProblems, empty());
  }

}
