/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.steps;

import static nz.org.riskscape.engine.Matchers.*;
import static nz.org.riskscape.engine.problem.ProblemMatchers.isProblem;
import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.hamcrest.TypeSafeDiagnosingMatcher;
import org.junit.Before;
import org.junit.Test;

import nz.org.riskscape.engine.ProjectTest;
import nz.org.riskscape.engine.Tuple;
import nz.org.riskscape.engine.function.IdentifiedFunction;
import nz.org.riskscape.engine.function.IdentifiedFunction.Category;
import nz.org.riskscape.engine.function.StringFunctions;
import nz.org.riskscape.engine.function.UnaryFunction;
import nz.org.riskscape.engine.pipeline.Collector;
import nz.org.riskscape.engine.pipeline.RealizationInputImpl;
import nz.org.riskscape.engine.pipeline.Realized;
import nz.org.riskscape.engine.pipeline.RealizedPipeline;
import nz.org.riskscape.engine.pipeline.RealizedStep;
import nz.org.riskscape.engine.problem.GeneralProblems;
import nz.org.riskscape.engine.resource.Resource;
import nz.org.riskscape.engine.rl.DefaultOperators;
import nz.org.riskscape.engine.rl.LanguageFunctions;
import nz.org.riskscape.engine.rl.LogicFunctions;
import nz.org.riskscape.engine.rl.MathsFunctions;
import nz.org.riskscape.engine.steps.GroupByStep.Params;
import nz.org.riskscape.engine.types.Struct;
import nz.org.riskscape.engine.types.TypeProblems;
import nz.org.riskscape.engine.types.Types;
import nz.org.riskscape.pipeline.ast.PipelineDeclaration;
import nz.org.riskscape.pipeline.ast.StepDefinition;
import nz.org.riskscape.problem.ResultOrProblems;
import nz.org.riskscape.rl.ast.ExpressionProblems;
import nz.org.riskscape.rl.ast.PropertyAccess;

@SuppressWarnings("unchecked")
public class GroupByStepTest extends ProjectTest {

  Struct personAndAddress;
  List<Tuple> peopleAndAddresses;

  private GroupByStep step;
  private Struct inputType;
  private List<Tuple> inputTuples;
  private List<Tuple> outputTuples;

  Params input;
  ResultOrProblems<? extends Realized> result;

  IdentifiedFunction upper = new UnaryFunction<>(Types.TEXT, Types.TEXT, str -> ((String) str).toUpperCase())
    .identified("upper", "Returns an upper case version of the given string",
        Resource.UNKNOWN_URI, Category.STRINGS);

  @Before
  public void setup() {
    step = new GroupByStep(engine);
    project.getFunctionSet().addAll(MathsFunctions.FUNCTIONS);
    project.getFunctionSet().addAll(LanguageFunctions.FUNCTIONS);
    project.getFunctionSet().addAll(LogicFunctions.LOGIC_FUNCTIONS);
    project.getFunctionSet().addAll(StringFunctions.FUNCTIONS);
    project.getFunctionSet().add(upper);
    project.getFunctionSet().insertFirst(DefaultOperators.INSTANCE);
    inputType = Struct.of("foo", Types.INTEGER, "type", Types.TEXT);

    input = new Params();
    input.by = Optional.empty();
    inputTuples = tuples(inputType, "[1, 'cat']", "[2, 'dog']", "[3, 'cat']", "[5, 'dog']");

    personAndAddress = norm(
        project.getTypeBuilder().build("struct(person: struct(name: text, age: integer, gender: text), "
            + "address: struct(street: text, suburb: text))").find(Struct.class).get());

    norm((Struct) personAndAddress.getEntry("person").getType());
    norm((Struct) personAndAddress.getEntry("address").getType());

    peopleAndAddresses = tuples(
        personAndAddress,
        "[{name: 'bob', age: 30, gender: 'male'}, {street: 'queen street', suburb: 'auckland central'}]",
        "[{name: 'linda', age: 79, gender: 'female'}, {street: 'willis street', suburb: 'te aro'}]",
        "[{name: 'petey', age: 9, gender: 'male'}, {street: 'devon street', suburb: 'te aro'}]"
    );
  }

  @Test
  public void canGroupByEverything() {
    input.select = expressionParser.parse("mean(foo)");

    Collector<?> collector = realize();

    Struct outputType = Struct.of("mean_foo", Types.FLOATING);
    process(collector);
    assertThat(
        outputTuples,
        contains(equalTo(Tuple.ofValues(outputType, 2.75D)))
    );
  }

  @Test
  public void canGroupByASinglePropertyAndSelectASingleThing() throws Exception {
    input.select = expressionParser.parse("sum(foo)");
    input.by = Optional.of(expressionParser.parse("type"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("sum_foo", Types.INTEGER));
    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList(4L),
            Arrays.asList(7L)
        )
    );
  }

  @Test
  public void canGroupByASinglePropertyAndSelectTheGroup() throws Exception {
    input.select = expressionParser.parse("{type, sum(foo) as sum}");
    input.by = Optional.of(expressionParser.parse("type"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("type", Types.TEXT, "sum", Types.INTEGER));
    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("cat", 4L),
            Arrays.asList("dog", 7L)
        )
    );
  }

  @Test
  public void canGroupByASinglePropertyAndSelectJustTheGroup() throws Exception {
    input.select = expressionParser.parse("{type}");
    input.by = Optional.of(expressionParser.parse("type"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("type", Types.TEXT));
    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("cat"),
            Arrays.asList("dog")
        )
    );
  }

  @Test
  public void canGroupByASinglePropertyAndSelectJustTheGroupWithANewName() throws Exception {
    input.select = expressionParser.parse("{type as lol}");
    input.by = Optional.of(expressionParser.parse("type"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("lol", Types.TEXT));
    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("cat"),
            Arrays.asList("dog")
        )
    );
  }

  @Test
  public void canGroupByAPropertyOfAStruct() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;

    input.select = expressionParser.parse("{suburb, mean(person.age) as avg_age}");
    input.by = Optional.of(expressionParser.parse("{address.suburb as suburb}"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("suburb", Types.TEXT, "avg_age", Types.FLOATING));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("auckland central", 30.0D),
            Arrays.asList("te aro", 44.0D)
        )
    );
  }

  @Test
  public void canGroupByAPropertyOfAStructWithoutAliases() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;

    input.select = expressionParser.parse("{suburb, mean(person.age)}");
    input.by = Optional.of(expressionParser.parse("{address.suburb}"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("suburb", Types.TEXT, "mean_age", Types.FLOATING));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("auckland central", 30.0D),
            Arrays.asList("te aro", 44.0D)
        )
    );
  }

  @Test
  public void canGroupByAFunction() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;

    input.select = expressionParser.parse("{count(*), str(round(person.age / 10)) as decades}");
    input.by = Optional.of(expressionParser.parse("{round(person.age / 10)}"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("count", Types.INTEGER, "decades", Types.TEXT));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList(1L, "3"),
            Arrays.asList(1L, "8"),
            Arrays.asList(1L, "1")
        )
    );
  }

  @Test
  public void canRepeatAnAggregateExpression() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;
    input.select = expressionParser.parse("{"
        + "address.suburb, "
        + "mean(person.age) as avg_age, "
        + "if_then_else(mean(person.age) > 35, 'middle-aged', 'youthful') as character"
        + "}");
    input.by = Optional.of(expressionParser.parse("{address.suburb }"));

    Collector<?> collector = realize();

    Struct outputType =
        norm(Struct.of("suburb", Types.TEXT, "avg_age", Types.FLOATING, "character", Types.TEXT));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("auckland central", 30.0D, "youthful"),
            Arrays.asList("te aro", 44.0D, "middle-aged")
        )
    );
  }

  @Test
  public void canGroupByAPropertyOfAStructWithoutAlias() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;
    input.select = expressionParser.parse("{address.suburb, mean(person.age)}");
    input.by = Optional.of(expressionParser.parse("{address.suburb }"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("suburb", Types.TEXT, "mean_age", Types.FLOATING));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("auckland central", 30.0D),
            Arrays.asList("te aro", 44.0D)
        )
    );
  }

  @Test
  public void canGroupByAFunctionCallAndSelectItWithoutAlias() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;
    input.select = expressionParser.parse("{upper(address.suburb), mean(person.age) as avg_age}");
    input.by = Optional.of(expressionParser.parse("upper(address.suburb)"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("upper_suburb", Types.TEXT, "avg_age", Types.FLOATING));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("AUCKLAND CENTRAL", 30.0D),
            Arrays.asList("TE ARO", 44.0D)
        )
    );
  }

  @Test
  public void canGroupByABunchOfExpressionsAndThenSelectWildcard() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;

    input.by = Optional.of(expressionParser.parse("{"
        + "upper(address.suburb) as sub, "
        + "floor((2020 - person.age) / 10) * 10 as decade"
        + "}"));

    input.select = expressionParser.parse("{*, count(person) as count}");

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("sub", Types.TEXT, "decade", Types.INTEGER, "count", Types.INTEGER));

    assertEquals(outputType, collector.getTargetType());

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("AUCKLAND CENTRAL", 1980L, 1L),
            Arrays.asList("TE ARO", 2010L, 1L),
            Arrays.asList("TE ARO", 1940L, 1L)
        )
    );
  }

  @Test
  public void canSelectAPropertyOfAGroupedStruct() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;
    input.select = expressionParser.parse("{address.suburb as suburb, mean(person.age) as avg_age}");
    input.by = Optional.of(expressionParser.parse("address"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("suburb", Types.TEXT, "avg_age", Types.FLOATING));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("auckland central", 30.0D),
            Arrays.asList("te aro", 44.0D)
        )
    );
  }

  @Test
  public void canUseASplatInGroupToGroupByAllMembersOfAStruct() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;
    input.select = expressionParser.parse("{street, suburb, mean(person.age) as avg_age}");
    input.by = Optional.of(expressionParser.parse("{address.*}"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("street", Types.TEXT, "suburb", Types.TEXT, "avg_age", Types.FLOATING));

    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("queen street", "auckland central", 30.0D),
            Arrays.asList("willis street", "te aro", 79.0D),
            Arrays.asList("devon street", "te aro", 9.0D)
        )
    );
  }

  @Test
  public void canGroupedByASinglePropertyUsingResultsInAnExpression() throws Exception {
    input.select = expressionParser.parse("type + ' was $' + str(if_null(sum(foo), 0))");
    input.by = Optional.of(expressionParser.parse("type"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("text", Types.TEXT));
    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("cat was $4"),
            Arrays.asList("dog was $7")
        )
    );
  }

  @Test
  public void canGroupByManyProperties() throws Exception {

    inputType = Struct.of("name", Types.TEXT, "gender", Types.TEXT, "age", Types.INTEGER);
    inputTuples = tuples(inputType,
        "['pete', 'male', 32]",
        "['jane', 'female', 57]",
        "['robert', 'male', 21]",
        "['gerald', 'male', 12]",
        "['lauren', 'female', 22]"
    );


    // also shows grouping by an arbitrary expression
    input.select = expressionParser.parse("{gender, adult, count(*) as count}");
    input.by = Optional.of(expressionParser.parse("{gender, age > 17 as adult}"));

    Collector<?> collector = realize();

    Struct outputType = norm(Struct.of("gender", Types.TEXT, "adult", Types.BOOLEAN, "count", Types.INTEGER));
    process(collector);
    assertThat(
        outputTuples,
        isTuples(outputType,
            Arrays.asList("male", true, 2L),
            Arrays.asList("male", false, 1L),
            Arrays.asList("female", true, 2L)
        )
    );
  }

  @Test
  public void attemptingToSelectFromUngroupedAttributesGivesAnError() throws Exception {
    input.select = expressionParser.parse("{sum(foo), type}");
    input.by = Optional.empty();

    realize();

    assertThat(
      result,
      failedResult(
        equalTo(
           ExpressionProblems.get().failedToRealize(input.select, inputType).withChildren(
            ExpressionProblems.get()
              .propertyOutsideOfAggregationFunction(expressionParser.parse("type").isA(PropertyAccess.class).get()
          )
        )
      ))
    );
  }

  @Test
  public void attemptingToSelectArgumentOfAGroupByFunctionIsNotGoingToWork() throws Exception {
    inputTuples = peopleAndAddresses;
    inputType = personAndAddress;
    input.select = expressionParser.parse("{address.suburb, mean(person.age) as avg_age}");
    input.by = Optional.of(expressionParser.parse("upper(address.suburb)"));

    Collector<?> collector = realize();

    assertThat(
        result,
        failedResult(
          equalTo(
             ExpressionProblems.get().failedToRealize(input.select, inputType).withChildren(
              ExpressionProblems.get()
                .propertyOutsideOfAggregationFunction(expressionParser.parse("address.suburb")
                    .isA(PropertyAccess.class).get()
            )
          )
        ))
      );
  }

  @Test
  public void missingFunctionGivesAnError() throws Exception {
    input.select = expressionParser.parse("{sam(foo)}");
    input.by = Optional.empty();

    realize();

    assertThat(
      result,
      failedResult(
        equalTo(
            ExpressionProblems.get().failedToRealize(input.select, inputType).withChildren(
                GeneralProblems.get().noSuchObjectExists("sam", IdentifiedFunction.class)
            )
        )
      )
    );
  }

  @Test
  public void badValueExpressionGivesAnError() throws Exception {
    input.select = expressionParser.parse("{sum(foo + type)}");
    input.by = Optional.empty();

    realize();

    assertThat(
      result,
      failedResult(
        hasAncestorProblem(equalTo(
          ExpressionProblems.get().noSuchOperatorFunction("+", Arrays.asList(Types.INTEGER, Types.TEXT))
        ))
      )
    );
  }

  @Test
  public void duplicateAttributeNamesGivesAnError() throws Exception {
    input.select = expressionParser.parse("{type, sum(foo) as type}");
    input.by = Optional.of(expressionParser.parse("type"));

    realize();

    assertThat(
      result,
      failedResult(
            hasAncestorProblem(
                isProblem(TypeProblems.class, (r, p) -> p.duplicateKeys(r.any()))
        ))
    );
  }

  private Collector<?> realize() {
    input.rInput = new RealizationInputImpl(
        RealizedPipeline.empty(executionContext, PipelineDeclaration.EMPTY),
        new StepDefinition("foo"), "foo", List.of(), Map.of()
    );
    input.input = RealizedStep.emptyInput("null", inputType);
    result = step.realize(input);

    return (Collector<?>) result.orElse(null);
  }

  private <T> List<Tuple> process(Collector<T> collector) {
    T accumulator = collector.newAccumulator();

    for (Tuple tuple : inputTuples) {
      collector.accumulate(accumulator, tuple);
    }

    outputTuples = collector.process(accumulator).collect(Collectors.toList());
    return outputTuples;
  }

  public Matcher<List<Tuple>> isTuples(Struct type, List<Object>... values) {

    return new TypeSafeDiagnosingMatcher<List<Tuple>>() {

      @Override
      public void describeTo(Description description) {
        description.appendText("list of ").appendValue(type).appendText(" tuples ")
          .appendValueList("", ", ", "", values);
      }

      @Override
      protected boolean matchesSafely(List<Tuple> item, Description mismatchDescription) {
        boolean found = false;
        for (Tuple tuple : item) {
          if (!tuple.getStruct().equals(type)) {
            mismatchDescription.appendText("struct was ").appendValue(tuple.getStruct());
            return false;
          }

          List<Object> actualValues = Arrays.asList(tuple.toArray());

          for (List<Object> expectedValues : values) {
            if (expectedValues.equals(actualValues)) {
              found = true;
              break;
            }
          }

          if (!found) {
            mismatchDescription.appendText("values ").appendValueList("", ", ", "", actualValues)
            .appendText(" not found");
            return false;

          }

        }
        return true;
      }
    };
  }

  public Matcher<Tuple> isTuple(Struct type, Object... values) {
    return new TypeSafeDiagnosingMatcher<Tuple>(Tuple.class) {

      @Override
      public void describeTo(Description description) {
        description.appendText("tuple of type ")
          .appendValue(type)
          .appendText(" and values ")
          .appendValueList("", ", ", "", values);
      }

      @Override
      protected boolean matchesSafely(Tuple item, Description mismatchDescription) {
        if (!item.getStruct().equals(type)) {
          mismatchDescription.appendText("struct was ").appendValue(item.getStruct());
          return false;
        }

        if (!Arrays.asList(values).equals(Arrays.asList(item.toArray()))) {
          mismatchDescription.appendText("values were ").appendValueList("", ", ", "", item.toArray());
          return false;
        }


        return true;
      }
    };
  }

}
