/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.steps;


import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import lombok.Getter;
import lombok.RequiredArgsConstructor;
import nz.org.riskscape.dsl.Token;
import nz.org.riskscape.engine.Engine;
import nz.org.riskscape.engine.IdentifiedException;
import nz.org.riskscape.engine.Tuple;
import nz.org.riskscape.engine.bind.ParameterField;
import nz.org.riskscape.engine.function.IdentifiedFunction;
import nz.org.riskscape.engine.pipeline.Collector;
import nz.org.riskscape.engine.pipeline.RealizationInput;
import nz.org.riskscape.engine.pipeline.Realized;
import nz.org.riskscape.engine.pipeline.RealizedStep;
import nz.org.riskscape.engine.relation.TupleIterator;
import nz.org.riskscape.engine.rl.ExpressionRealizer;
import nz.org.riskscape.engine.rl.RealizationContext;
import nz.org.riskscape.engine.rl.RealizedExpression;
import nz.org.riskscape.engine.rl.agg.Accumulator;
import nz.org.riskscape.engine.rl.agg.AggregateExpressionRealizer;
import nz.org.riskscape.engine.rl.agg.RealizedAggregateExpression;
import nz.org.riskscape.engine.types.DuplicateKeysException;
import nz.org.riskscape.engine.types.Struct;
import nz.org.riskscape.engine.types.Struct.StructMember;
import nz.org.riskscape.engine.types.TypeProblems;
import nz.org.riskscape.engine.util.Pair;
import nz.org.riskscape.problem.Problem;
import nz.org.riskscape.problem.ProblemException;
import nz.org.riskscape.problem.Problems;
import nz.org.riskscape.problem.ResultOrProblems;
import nz.org.riskscape.rl.ExpressionParser;
import nz.org.riskscape.rl.ast.BinaryOperation;
import nz.org.riskscape.rl.ast.BracketedExpression;
import nz.org.riskscape.rl.ast.Expression;
import nz.org.riskscape.rl.ast.ExpressionConverter;
import nz.org.riskscape.rl.ast.ExpressionProblems;
import nz.org.riskscape.rl.ast.FunctionCall;
import nz.org.riskscape.rl.ast.Lambda;
import nz.org.riskscape.rl.ast.ListDeclaration;
import nz.org.riskscape.rl.ast.MinimalVisitor;
import nz.org.riskscape.rl.ast.PropertyAccess;
import nz.org.riskscape.rl.ast.SelectAllExpression;
import nz.org.riskscape.rl.ast.StructDeclaration;

public class GroupByStep extends BaseStep<GroupByStep.Params> {

  @Override
  public String getId() {
    return "group";
  }

  private static final StructDeclaration GROUP_BY_NOTHING =
      new StructDeclaration(Collections.emptyList(), Optional.empty());

  public static class Params {
    @ParameterField
    public Expression select;

    @ParameterField
    public Optional<Expression> by;

    @Input
    public RealizedStep input;

    public RealizationInput rInput;
  }

  public GroupByStep(Engine engine) {
    super(engine);
  }

  // 1 - pull out all the aggregate function expressions
  // 2 - rewrite original expression with function calls removed, replaced with property references
  // 3 - realize the functions together in a struct
  // 4 - realize a second expression that processes all the output
  //
  @Override
  public ResultOrProblems<? extends Realized> realize(Params parameters) {
    return ProblemException.catching(() -> {
      Struct inputType = parameters.input.getProduces();
      RealizationContext context = parameters.rInput.getExecutionContext().getRealizationContext();
      ExpressionRealizer realizer = context.getExpressionRealizer();
      StructDeclaration groupBy =
          parameters.by
          .map(expr -> ExpressionParser.INSTANCE.toStruct(expr))
          .orElse(GROUP_BY_NOTHING);

      // 1. realize group by against input type
      RealizedExpression rGroupBy =realizer
          .realize(inputType, groupBy)
          .getOrThrow(ExpressionProblems.get().failedToRealize(groupBy, inputType));

      Map<FunctionCall, String> aggregations = new LinkedHashMap<>();
      List<Problem> findAndReplaceErrors = new ArrayList<>();

      Expression withGroupsReplaced = parameters.select.accept(new FindAndReplaceExpressions(rGroupBy), null);

      Expression rewritten =
          withGroupsReplaced.accept(new FindAndReplaceAggregations(context, aggregations, findAndReplaceErrors), null);

      if (!findAndReplaceErrors.isEmpty()) {
        throw new ProblemException(ExpressionProblems.get()
            .failedToRealize(parameters.select, inputType).withChildren(findAndReplaceErrors));
      }

      Expression toAggregate = new StructDeclaration(
          aggregations.entrySet().stream()
            .map(entry-> StructDeclaration.jsonStyleMember(entry.getValue(), entry.getKey()))
            .collect(Collectors.toList()),
          Optional.empty()
      );

      RealizedAggregateExpression rAggExpression =
          realizer.realizeAggregate(inputType, toAggregate).getOrThrow();

      Struct aggregateResult = rAggExpression.getResultType().find(Struct.class).get();
      Struct processInputType;
      try {
        processInputType = context.normalizeStruct(
            rGroupBy.getResultType().find(Struct.class).get().and(aggregateResult));
      } catch (DuplicateKeysException ex) {
        throw new ProblemException(TypeProblems.get().duplicateKeys(ex.getDuplicates()));
      }

      List<PropertyAccess> ungrouped = new ArrayList<>();
      rewritten.accept(new CheckForUngroupedPropertyAccess(inputType, processInputType), ungrouped);

      // now, rebuild select struct and replace wildcard with group by members - otherwise we end up double selecting
      // the accumulated results
      rewritten = rewritten.accept(
          new WildcardReplacer(rGroupBy.getResultType().find(Struct.class).get().getMembers()),
          null
      );

      if (!ungrouped.isEmpty()) {
        throw new ProblemException(ExpressionProblems.get()
            .failedToRealize(parameters.select, inputType)
            .withChildren(ungrouped.stream()
                .map(expr -> ExpressionProblems.get().propertyOutsideOfAggregationFunction(expr))
                .collect(Collectors.toList())
             )
        );
      }

      RealizedExpression processExpression = realizer.asStruct(context,
          realizer.realize(processInputType, rewritten).getOrThrow());

      return new Instance(inputType, processExpression, () -> new AccumInstance(rGroupBy, rAggExpression));
    });
  }

  @RequiredArgsConstructor
  private static class AccumInstance {

    private final RealizedExpression groupBy;
    private final RealizedAggregateExpression aggregateExpression;

    private Map<Tuple, Accumulator> groups = new HashMap<>();

    public void accumulate(Tuple tuple) {
      Tuple group = (Tuple) groupBy.evaluate(tuple);
      groups.computeIfAbsent(group, (k) -> aggregateExpression.newAccumulator()).accumulate(tuple);
    }

    public AccumInstance combine(AccumInstance rhs) {

      for (Entry<Tuple, Accumulator> entry : rhs.groups.entrySet()) {
        groups.merge(entry.getKey(), entry.getValue(), (l, r) -> l.combine(r));
      }

      return this;
    }

  }

  @RequiredArgsConstructor
  private static class Instance implements Collector<AccumInstance> {

    @Getter
    private final Class<AccumInstance> accumulatorClass = AccumInstance.class;

    @Getter
    private final Struct sourceType;

    @Getter
    private final RealizedExpression processExpression;

    @Getter
    private final Set<Characteristic> characteristics = EnumSet.of(Characteristic.PARALLELIZABLE);

    @Override
    public Struct getProducedType() {
      return processExpression.getResultType().find(Struct.class).get();
    }

    private final Supplier<AccumInstance> newAccumulator;

    @Override
    public AccumInstance newAccumulator() {
      return newAccumulator.get();
    }

    @Override
    public void accumulate(AccumInstance accumulator, Tuple tuple) {
      accumulator.accumulate(tuple);
    }

    @Override
    public AccumInstance combine(AccumInstance lhs, AccumInstance rhs) {
      return lhs.combine(rhs);
    }

    @Override
    public Optional<Long> size(AccumInstance accumulator) {
      return Optional.of((long) accumulator.groups.size());
    }

    @Override
    public TupleIterator process(AccumInstance accumulator) {
      Struct processInputType = processExpression.getInputType().find(Struct.class).get();

      return TupleIterator.wrappedAndMapped(accumulator.groups.entrySet().iterator(), entry -> {
        Tuple group = entry.getKey();
        Tuple processedResult = (Tuple) entry.getValue().process();

        Tuple processedInput = new Tuple(processInputType);
        // copy in group...
        processedInput.setAll(group);
        // ... then agg result
        processedInput.setAll(group.size(), processedResult);

        return (Tuple) processExpression.evaluate(processedInput);
      });
    }
  }

  @RequiredArgsConstructor
  private static class CheckForUngroupedPropertyAccess extends MinimalVisitor<List<PropertyAccess>> {

    private final Struct inputType;
    private final Struct processType;

    @Override
    public List<PropertyAccess> visit(PropertyAccess expression, List<PropertyAccess> data) {
      if (expression.getReceiver().isPresent()) {
        return super.visit(expression, data);
      } else {
        String first = expression.getFirstIdentifier().getValue();
        if (!processType.hasMember(first) && inputType.hasMember(first)) {
          // ungrouped access, very naughty
          data.add(expression);
        }
        return super.visit(expression, data);
      }
    }

    @Override
    public List<PropertyAccess> visit(Lambda expression, List<PropertyAccess> data) {
      // who knows what a lambda is going to do...
      return data;
    }

  }

  // finds references to the group by expressions in a select expression, replacing them with property access
  // expressions to the group by type's members - this makes sure a user can do something like
  // `group(foo.bar, by: foo.bar)`
  private static class FindAndReplaceExpressions extends ExpressionConverter<StructDeclaration> {

    private final List<Pair<Expression, String>> references;

    FindAndReplaceExpressions(RealizedExpression groupBy) {
      List<RealizedExpression> memberExpressions = groupBy.getDependencies();
      Iterator<StructMember> members = groupBy.getResultType().find(Struct.class).get().getMembers().iterator();

      references = new ArrayList<>();
      for (RealizedExpression childExpression : memberExpressions) {
        if (childExpression.getExpression().isA(SelectAllExpression.class).isPresent()
            || childExpression.getExpression().isA(PropertyAccess.class)
                .map(pa -> pa.isTrailingSelectAll()).orElse(false)) {

          // skip all the struct's members - they have been splatted in to the group by type's struct
          childExpression.getResultType().find(Struct.class).get().getMembers()
              .forEach(m -> members.next());
        } else {
          references.add(Pair.of(childExpression.getExpression(), members.next().getKey()));
        }
      }
    }

    @Override
    public Expression visit(BinaryOperation expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    @Override
    public Expression visit(BracketedExpression expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    @Override
    public Expression visit(FunctionCall expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    @Override
    public Expression visit(Lambda expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    @Override
    public Expression visit(ListDeclaration expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    @Override
    public Expression visit(PropertyAccess expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    @Override
    public Expression visit(StructDeclaration expression, StructDeclaration data) {
      PropertyAccess found = findInGroup(expression);
      if (found == null) {
        return super.visit(expression, data);
      } else {
        return found;
      }
    }

    private PropertyAccess findInGroup(Expression expression) {

      for (Pair<Expression, String> pair : references) {
        if (expression.equals(pair.getLeft())) {
          return PropertyAccess.of(pair.getRight());
        }
      }

      return null;
    }
  }

  /**
   * Replaces any struct wildcard members, (e.g. {foo, *, bar}) with the group by type's members - we do this
   * to simplify the logic of building the final select expression, which has the aggregate function results
   * interpolated in to it (via another converted expression)
   */
  @RequiredArgsConstructor
  private static class WildcardReplacer extends ExpressionConverter<Void> {

    final List<StructMember> members;

    @Override
    public Expression visit(StructDeclaration expression, Void data) {
      List<StructDeclaration.Member> newAttributes = new ArrayList<>();
      for (StructDeclaration.Member member : expression.getMembers()) {
        if (member.isSelectAll()) {
          // add all the input type members explicitly
          for (StructMember structMember : members) {
            newAttributes.add(
              StructDeclaration.jsonStyleMember(
                structMember.getKey(),
                PropertyAccess.of(structMember.getKey())
              )
            );
          }
        } else {
          // even if we're not changing it, always make it a json member for consistency with above - otherwise the
          // toSource representation might not parse :/
          newAttributes.add(StructDeclaration.jsonStyleMember(member.getIdentifier(), member.getExpression()));
        }
      }

      return new StructDeclaration(newAttributes, Optional.empty());
    }
  }

  // finds function calls that are on aggregate functions, collects the functions and replaces the expression with a
  // property access expression
  @RequiredArgsConstructor
  private static class FindAndReplaceAggregations extends ExpressionConverter<StructDeclaration> {

    private final RealizationContext context;
    private final Map<FunctionCall, String> collected;
    private final List<Problem> errors;

    @Override
    public Expression visit(FunctionCall expression, StructDeclaration parent) {

      IdentifiedFunction idf;
      try {
        idf = context.getProject().getFunctionSet().get(expression.getIdentifier().getValue(),
            context.getProblemSink());
      } catch (IdentifiedException ex) {
        errors.add(Problems.caught(ex));
        return expression;
      }

      boolean isAggFunction = idf.getAggregationFunction().isPresent();

      if (isAggFunction) {
        String ident = collected.get(expression);

        // this is the first time we've seen this function call, generate an identifier for it and add it to the list
        // for realization
        if (ident == null) {
          ident = AggregateExpressionRealizer.getImplicitName(expression, collected.values());
          if (parent != null) {
            // retain an attribute name that has already been set, otherwise use the generated one
            ident = parent.getMembers().stream()
                .filter(attr -> attr.getExpression() == expression)
                .findFirst()
                .flatMap(attr -> Optional.ofNullable(attr.getIdentifier()).map(Token::getValue))
                .orElse(ident);
          }

          // it must be made unique in the context of the entire expression, not just within the struct parent, so
          // another round of uniquification is required
          ident = ExpressionRealizer.makeUnique(ident, collected.values());
        }

        collected.put(expression, ident);
        return PropertyAccess.of(ident);
      } else {
        return super.visit(expression, parent);
      }
    }

    @Override
    public Expression visit(StructDeclaration expression, StructDeclaration data) {
      // pass struct in as data - this gives context to the function call method above to help with generating names
      return super.visit(expression, expression);
    }
  }
}
