/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */

package nz.org.riskscape.cpython;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.net.URI;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.Optional;
import java.util.concurrent.TimeUnit;

import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

import nz.org.riskscape.engine.OsUtils;
import nz.org.riskscape.engine.Project;
import nz.org.riskscape.engine.RiskscapeException;
import nz.org.riskscape.engine.resource.Resource;
import nz.org.riskscape.engine.resource.ResourceLoadingException;
import nz.org.riskscape.engine.resource.UriHelper;
import nz.org.riskscape.engine.types.Types;
import nz.org.riskscape.problem.ProblemSink;
import nz.org.riskscape.problem.Problems;

/**
 * Handles spawning child CPython processes.
 */
@Slf4j
public class CPythonSpawner {

  /**
   * Simple class to help track child CPython processes that we spawn.
   */
  // TODO handle cleaning up these processes once we're done with them
  public class CPythonProcess {
    @Getter
    private final Process child;
    @Getter
    private final Resource userScript;

    /**
     * Writing to this stream will send data to the child process's stdin
     */
    @Getter
    private final DataOutputStream childStdIn;

    /**
     * Reading from this stream will receive data from the child process's stdout
     */
    @Getter
    private final DataInputStream childStdOut;

    // this is referred to when a function fails unexpectedly - we use whatever was printed last as part of our
    // error message
    volatile String lastErrorText;
    volatile boolean destroyed;

    public CPythonProcess(Process proc, Resource script) {
      this.child = proc;
      this.userScript = script;
      this.childStdIn = new DataOutputStream(child.getOutputStream());
      this.childStdOut = new DataInputStream(child.getInputStream());
    }

    /**
     * @return any stderr output produced by this process without buffering - returns what is available at the time
     * without blocking (at least not on purpose)
     */
    public String getErrorText() {
      if (destroyed) {
        return "";
      }

      StringBuilder builder = new StringBuilder();
      InputStream is = child.getErrorStream();
      byte[] buf = new byte[1024];
      try {
        while (is.available() > 0) {
          int bytesRead = is.read(buf);
          builder.append(new String(buf, 0, bytesRead));
        }
      } catch (IOException e) {
        throw new UncheckedIOException(e);
      }

      lastErrorText = builder.toString();
      return lastErrorText;
    }

    public boolean hasErrorText() {
      if (destroyed) {
        return false;
      }

      try {
        return child.getErrorStream().available() > 0;
      } catch (IOException e) {
        if (child.isAlive()) {
          throw new UncheckedIOException(e);
        } else {
          // not sure how likely this is - I expect we'll be able to get it even if the process *is* dead, at least our
          // testing seems to show that, but I guess it's possible we wont and in that case, we don't want a dead
          // process to cause everything to fall over
          log.warn("Failed to read stderr from dead process {} - we may have missed output", this);
          return false;
        }
      }
    }

    /**
     * Simple test for checking the process finished successfully.  Don't call it unless you expect (and want) the
     * process to have exitted, or you'll end up inadvertantly killing it by calling this function - it will forcibly
     * destroy the process if it hasn't already exitted.
     * @return true if the process exitted ok, waiting up to a minute for this to happen before killing it forcibly
     * and returning false.
     */
    public boolean exittedOK() {
      try {
        child.waitFor(1, TimeUnit.MINUTES);
      } catch (InterruptedException e) {}

      if (child.isAlive()) {
        child.destroyForcibly();
        return false;
      }

      return child.exitValue() == 0;
    }

    public void destroy() {
      // if destroy is called, we know we don't want to do anything to this process - avoids garbage being output from
      // a dead or dying process
      destroyed = true;
      child.destroyForcibly();
    }

    @Override
    public String toString() {
      child.toString();
      return String.format("CPythonProcess(script=%s alive?=%s)", userScript.getLocation(), child.isAlive());
    }

    public boolean isAlive() {
      return !destroyed && child.isAlive();
    }

    /**
     * @return true if this process was destroyed by us (by calling destroy)
     */
    public boolean wasDestroyed() {
      return destroyed;
    }

    public int join() throws InterruptedException {
      return child.waitFor();
    }
  }

  @Getter
  private final CPythonSettings settings;
  private final LinkedList<CPythonProcess> processes = new LinkedList<>();

  private final ProblemSink problemSink;

  private final Reaper reaper;

  public CPythonSpawner(CPythonSettings settings, ProblemSink problemSink) {
    this.settings = settings;
    // Seems a pretty safe assumption for now that we can have a single problem sink for our cpython functions.
    // if this was in a multi-tenant webapp, then we wouldn't be sharing spawners (at least in this form) across
    // tenants/users.
    this.problemSink = problemSink;
    this.reaper = new Reaper(this);
  }

  /**
   * Starts cpython process that wraps a user's python script so that it can start accepting function calls
   *
   * @param errorContext context for an error message.
   * @param userScript the user's code to execute
   * @param scriptArgs arguments that get given to the script - these will get picked up by the adaptor script to prime
   * the serialization
   */
  public CPythonProcess startWrapperScript(Object errorContext,
                                           Resource userScript,
                                           Project project,
                                           String... scriptArgs) {
    List<String> commandParts = settings.newPython3Command();

    commandParts.add(settings.getAdaptorPath().toString());
    try {
      Path localPath = userScript.ensureLocal(Resource.SECURE_OPTIONS, ".py")
          .orElseThrow(problems -> new RiskscapeException(Problems.toSingleProblem(problems)));

      commandParts.add((localPath.toFile().toString()));
    } catch (RiskscapeException e) {
      throw new RiskscapeException(
          CPythonProblems.get().failedToLoadScript(errorContext)
              .withChildren(Problems.caught(e))
      );
    }

    URI relativeTo = project.getRelativeTo();
    try {
      // cpython needs a local path for inline functions to import other files.
      // so we use the resource loaders to get a local path if we can
      Optional<Path> localPath = project.getEngine().getResourceFactory()
          .load(project.getRelativeTo())
          .getLocal();
      if (localPath.isPresent()) {
        relativeTo = localPath.get().toUri();
      }
    } catch (ResourceLoadingException e) {}
    String projectRoot = UriHelper.uriFromLocation(".", relativeTo).get().getPath();

    commandParts.add(projectRoot);

    commandParts.addAll(Arrays.asList(scriptArgs));

    CPythonProcess process = null;
    try {
      process = spawn(userScript, commandParts);
      Object result = Types.BOOLEAN.fromBytes(process.childStdOut);

      if (!Boolean.TRUE.equals(result)) {
          throw new RiskscapeException(CPythonProblems.get().failedToStartScript(
              errorContext,
              process.getErrorText()));

      }
    } catch (IOException e) {
      String errorText;
      if (process == null) {
        errorText = e.getMessage();
      } else {
        errorText = process.getErrorText();
      }
      throw new RiskscapeException(CPythonProblems.get().failedToStartScript(
          errorContext,
          errorText), e);
    }

    synchronized (processes) {
      processes.add(process);
    }

    return process;
  }


  /**
   * Starts cpython process that wraps a user's python script so that it can start accepting function calls
   *
   * @param errorContext context for an error message.
   * @param userScript the user's code to execute
   * @param scriptArgs arguments that get given to the script - these will get picked up by the adaptor script to prime
   * the serialization
   */
  public CPythonProcess startExecScript(Object errorContext, Resource userScript, String... scriptArgs) {
    List<String> commandParts = settings.newPython3Command();

    commandParts.add(settings.getExecAdaptorPath().toString());
    try {
      Path localPath = userScript.ensureLocal(Resource.SECURE_OPTIONS)
          .orElseThrow(problems -> new RiskscapeException(Problems.toSingleProblem(problems)));

      commandParts.add((localPath.toFile().toString()));
    } catch (RiskscapeException e) {
      throw new RiskscapeException(
          CPythonProblems.get().failedToLoadScript(errorContext)
              .withChildren(Problems.caught(e))
      );
    }

    commandParts.addAll(Arrays.asList(scriptArgs));

    CPythonProcess process = null;
    try {
      process = spawn(userScript, commandParts);
      Object result = Types.BOOLEAN.fromBytes(process.childStdOut);

      if (!Boolean.TRUE.equals(result)) {
          throw new RiskscapeException(CPythonProblems.get().failedToStartScript(
              errorContext,
              process.getErrorText()));

      }
    } catch (IOException e) {
      String errorText;
      if (process == null) {
        errorText = e.getMessage();
      } else {
        errorText = process.getErrorText();
      }
      throw new RiskscapeException(CPythonProblems.get().failedToStartScript(
          errorContext,
          errorText), e);
    }

    synchronized (processes) {
      processes.add(process);
    }

    return process;
  }

  CPythonProcess spawn(Resource scriptDescription, List<String> commandParts) throws IOException {
    String[] command = commandParts.toArray(new String[commandParts.size()]);

    log.info("Launching command {}", commandParts);
    Process child = Runtime.getRuntime().exec(command);
    return new CPythonProcess(child, scriptDescription);
  }

  /**
   * Does a process check, removing dead processes and emptying out stderr.
   *
   * Package scoped so that the reaper can call this method
   *
   * @return true if no output was captured
   */
  synchronized boolean checkProcesses() {
    // avoid locking the process list when doing the check - we might get new processes being added after this lock
    // is released, but this is the only code that removes them (and is meant to be called from within the mutex)
    LinkedList<CPythonProcess> snaphost;
    synchronized (processes) {
      snaphost = new LinkedList<>(processes);
    }

    ListIterator<CPythonProcess> iter = snaphost.listIterator();
    boolean noOutputCaptured = true;
    while (iter.hasNext()) {
      CPythonProcess toCheck = iter.next();

      if (toCheck.hasErrorText()) {
        // NB we used to read from stderr only once stdout had been read "to completion", which had
        // the potential to deadlock if the python script fill's stderr's buffer (36kb according to a quick test
        // on my dev machine, more like 1kb on windows)
        String errorText = toCheck.getErrorText();
        List<String> lines = Arrays.asList(errorText.split(OsUtils.LINE_SEPARATOR));
        for (String line : lines) {
          problemSink.log(CPythonProblems.get().functionOutput(toCheck.getUserScript(), line.trim()));
        }
        noOutputCaptured = false;
      }

      if (!toCheck.isAlive()) {
        // avoid showing a warning if we destroyed the process forcibly - it'll have a non-zero exit value in that case
        if (!toCheck.wasDestroyed() && toCheck.child.exitValue() != 0) {
          log.warn("Non-zero exit status ({}) from Cpython process {}", toCheck.child.exitValue(), toCheck);
        }
        synchronized (processes) {
          log.info("Removing dead cpython process {}", toCheck);
          processes.remove(toCheck);
        }
      }
    }

    return noOutputCaptured;
  }

  /**
   * Blocks the calling thread until there's been a full loop of the reaper thread.  This is a good way to ensure that
   * any stderr output or processes have been reaped before doing something
   */
  public void checkForOutputAndWait() {
    boolean finished = false;
    while (!finished) {
      // keep going until there's nothing left
      finished = checkProcesses();
    }
  }

  /**
   * A Non-blocking version of checkForOutputAndBlock.  Wakes up the reaper thread, but doesn't wait for a check before
   * returning
   */
  public void checkForOutput() {
    reaper.wakeup();
  }

  /**
   * Forcibly destroy all the processes under our care.
   */
  public void killAll() {
    synchronized (processes) {
      for (CPythonProcess cPythonProcess : processes) {
        log.info("forcibly destroying {}", cPythonProcess);
        cPythonProcess.destroy();
      }
      // good bye, little processes
      processes.clear();
    }
  }

  /**
   * @return a filepath String that's safe to use in commands.
   */
  private String pathToExecute(URI uri) {
    return Paths.get(uri).toFile().toString();
  }

  /**
   * Destroys this spawner by stopping the reaper thread, killing all of the child processes and then waiting for the
   * reaper to exit.  You can't use the spawner again.
   */
  public void destroy() {
    reaper.stopReaping();
  }
}
