/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.cpython;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;

import lombok.AccessLevel;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import nz.org.riskscape.cpython.CPythonSpawner.CPythonProcess;
import nz.org.riskscape.engine.RiskscapeException;
import nz.org.riskscape.engine.RiskscapeIOException;
import nz.org.riskscape.engine.Tuple;
import nz.org.riskscape.engine.function.FunctionCallException;
import nz.org.riskscape.engine.output.PipelineJobContext;
import nz.org.riskscape.engine.pipeline.Sink;
import nz.org.riskscape.engine.pipeline.SinkConstructor;
import nz.org.riskscape.engine.pipeline.TupleInput;
import nz.org.riskscape.engine.projection.AsyncProjector;
import nz.org.riskscape.engine.resource.Resource;
import nz.org.riskscape.engine.types.Struct;
import nz.org.riskscape.engine.types.Type;
import nz.org.riskscape.engine.types.Types;
import nz.org.riskscape.problem.Problem;
import nz.org.riskscape.problem.ProblemException;
import nz.org.riskscape.problem.Problems;
import nz.org.riskscape.problem.ResultOrProblems;

/**
 * Supports sending and receiving tuples from a python process via a buffer, rather than the 'immediate mode' that
 * normal CPython functions use.
 *
 * This requires that the function be called in its own pipeline step, but allows more flexibility in how the
 * tuples are processed (for instance, we can compile the entire dataset in to a dataframe before emitting rows)
 */
@RequiredArgsConstructor
public class CPythonAsyncProjector implements AsyncProjector {

  private final Object errorContext;
  // type of tuples being collected in the sink
  private final Struct inputType;

  // type of tuples we emit via TupleInput
  @Getter
  private final Struct producedType;

  private final Optional<Tuple> parameters;

  // the python script we will be evaluating
  private final Resource script;

  // RiskScape managed access to cpython
  private final CPythonSpawner processSpawner;

  private final AtomicBoolean inputFinished = new AtomicBoolean(false);
  private final AtomicBoolean outputFinished = new AtomicBoolean(false);

  // set once started
  @Getter(AccessLevel.PACKAGE)
  private CPythonProcess process;

  private PipelineJobContext context;

  // used for handling unexpected nulls in structs to avoid serialization errors.
  private Type nullSafeInputType;
  CPythonSerializer serializer = new CPythonSerializer();

  @Getter
  private final TupleInput input = new TupleInput() {

    @Override
    public Struct getProducedType() {
      return producedType;
    }

    @Override
    public Tuple poll() {
      try {

        // If there's nothing to read at the moment, don't block waiting for it.
        // This thread might be needed to process upstream steps - which would give us something to read!
        if (process.getChildStdOut().available() == 0) {
          return null;
        }

        long rowCount = readFunctionResponse();

        if (rowCount == 1) {
          return (Tuple) producedType.fromBytes(process.getChildStdOut());
        } else if (rowCount > 1) {
          // eventually we'll want to support batches
          throw new IllegalStateException(" expected one or zero");
        }

        return null;
      } catch (IOException e) {
        // this probably means the python code threw an exception serializing the result
        Problem problem = CPythonProblems.get().failedToReadResult(script, process.getErrorText());
        throw new FunctionCallException(problem, e);
      }
    }


    @Override
    public boolean isComplete() {
      return outputFinished.get();
    }

    @Override
    public boolean isAsync() {
      return true;
    }

    @Override
    public Optional<Long> size() {
      return Optional.empty();
    }
  };

  @Getter
  private final SinkConstructor output = new SinkConstructor() {

    @Override
    public ResultOrProblems<Sink> newInstance(PipelineJobContext newContext) {
      context = newContext;
      try {
        start();
      } catch (ProblemException e) {
        return e.toResult();
      }

      return ResultOrProblems.of(new Sink() {

        @Override
        public void finish() {
          inputFinished.set(true);
          try {
            // zero count tells python we're done
            process.getChildStdIn().writeLong(0);
            process.getChildStdIn().flush();

            // if there's no tuple input, the sink must read the response to close off the process
            if (!isExpectedToProduceOutput()) {
              readFunctionResponse();
            }
          } catch (IOException e) {
            throw new RiskscapeIOException("Failed to write tuple to cpython process:" + process.getErrorText(), e);
          }
        }

        @Override
        public boolean accept(Tuple tuple) {
          try {
            // row count - for now we support one or none, but we want to make this async if possible
            process.getChildStdIn().writeLong(1);
            process.getChildStdIn().flush();
            serializer.serialize(process.getChildStdIn(), inputType, tuple);
            process.getChildStdIn().flush();
          } catch (IOException e) {
            throw new RiskscapeIOException("Failed to write tuple to cpython process", e);
          }
          return true;
        }
      });
    }
  };

  private void start() throws ProblemException {
    nullSafeInputType = CPythonSerializer.nullSafeType(inputType);
    String outputTypeArg = CPythonSerializer.serializerType(getProducedType());
    String inputTypeArg = CPythonSerializer.serializerType(nullSafeInputType);

    List<String> pyArgs = new ArrayList<>();
    pyArgs.add(inputTypeArg);
    pyArgs.add(outputTypeArg);
    parameters.map(Tuple::getStruct)
        .map(s -> CPythonSerializer.serializerType(CPythonSerializer.nullSafeType(s)))
        .ifPresent((paramArg) -> pyArgs.add(paramArg));

    try {
      this.process = processSpawner.startExecScript(
          errorContext,
          script,
          pyArgs.toArray(new String[pyArgs.size()]));

      if (parameters.isPresent()) {
        Tuple t = parameters.get();
        serializer.serialize(process.getChildStdIn(), t.getStruct(), t);
        process.getChildStdIn().flush();
      }
    } catch (IOException e) {
      throw new ProblemException(Problems.caught(e));
    } catch (RiskscapeException e) {
      // re-wrap as checked ProblemException so that it conforms to the SinkConstructor's API
      throw new ProblemException(e.getProblem());
    }
  }

  private long readFunctionResponse() throws IOException {
    long rowCount = process.getChildStdOut().readLong();

    if (rowCount == -1) {
      // if not, we read a string containing the error message and throw an exception containing it - this should
      // help the user fix/debug their function
      String errorMesssage = (String) Types.TEXT.fromBytes(process.getChildStdOut());
      throw new FunctionCallException(
        Problems.foundWith(errorContext, CPythonProblems.get().functionCallException(errorMesssage)),
        null
      );
    }

    if (rowCount == 0) {
      checkForDynamicOutputs();

      outputFinished.set(true);
    }

    return rowCount;
  }

  private boolean isExpectedToProduceOutput() {
    return !producedType.equals(Struct.EMPTY_STRUCT);
  }

  private void checkForDynamicOutputs() throws IOException {
    int numOutputs = process.getChildStdOut().readInt();

    if (!isExpectedToProduceOutput() && numOutputs == 0) {
      // Technically nothing is actually wrong here, but probably what's
      // happened is that they've forgotten to call model_output()
      context
          .getExecutionContext()
          .getProject()
          .getProblemSink()
          .accept(
              CPythonProblems.get().noOutput().withSeverity(Problem.Severity.WARNING)
          );
    }

    for (int i = 0; i < numOutputs; i++) {
      Path outputPath = Paths.get(process.getChildStdOut().readUTF());

      if (Files.isReadable(outputPath)) {
        context.getOutputContainer().registerLocalFile(outputPath);
      } else {
        // I figure it will be really annoying if this brings the model down, might be best to throw up an error
        // message but let things otherwise continue
        context.getExecutionContext().getProject().getProblemSink().accept(
            CPythonProblems.get().missingOutput(script.getLocation(), outputPath));
      }
    }
  }
}
