/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.output;


import static org.geotools.data.shapefile.files.ShpFileType.*;
import static org.geotools.referencing.wkt.Formattable.SINGLE_LINE;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.TimeZone;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.geotools.data.ShapefileWriting;
import org.geotools.data.shapefile.dbf.DbaseFileException;
import org.geotools.data.shapefile.dbf.DbaseFileHeader;
import org.geotools.data.shapefile.dbf.DbaseFileWriter;
import org.geotools.data.shapefile.files.ShpFiles;
import org.geotools.data.shapefile.files.StorageFile;
import org.geotools.data.shapefile.shp.JTSUtilities;
import org.geotools.data.shapefile.shp.ShapeHandler;
import org.geotools.data.shapefile.shp.ShapeType;
import org.geotools.data.shapefile.shp.ShapefileException;
import org.geotools.data.shapefile.shp.ShapefileWriter;
import org.geotools.referencing.wkt.Formattable;
import org.locationtech.jts.geom.Envelope;
import org.locationtech.jts.geom.Geometry;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.LineString;
import org.locationtech.jts.geom.Polygon;
import org.locationtech.jts.io.WKTWriter;
import org.geotools.api.referencing.crs.CoordinateReferenceSystem;

import lombok.Getter;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import nz.org.riskscape.engine.GeometryProblems;
import nz.org.riskscape.engine.RiskscapeException;
import nz.org.riskscape.engine.RiskscapeIOException;
import nz.org.riskscape.engine.SRIDSet;
import nz.org.riskscape.engine.Tuple;
import nz.org.riskscape.engine.types.Geom;
import nz.org.riskscape.engine.types.GeomType;
import nz.org.riskscape.engine.types.MultiGeom;
import nz.org.riskscape.engine.types.Nullable;
import nz.org.riskscape.engine.types.Struct;
import nz.org.riskscape.engine.types.Struct.StructMember;
import nz.org.riskscape.engine.types.Type;
import nz.org.riskscape.problem.Problem;
import nz.org.riskscape.problem.ProblemSink;
import nz.org.riskscape.problem.StandardCodes;

/**
 * A {@link RiskscapeWriter} that talks to geotools's codecs to avoid the performance impact of using all the feature
 * source diffing code and index generation.
 *
 * Writing out > 300k rows with the ShapefileWriter began to slow things down, eventually grinding to
 * a halt at about 550k rows due to a slight mismatch and possible misuse of the
 * Geotools ContentStore API.
 *
 * See {@link org.geotools.data.shapefile.ShapefileFeatureWriter} for more details/reference, as a lot of
 * the behaviour / details of how the codec work were figured out from there.
 *
 * Varies from the previous writer by dropping column prefixes from parent struct.
 *
 * TODO
 *  * add in file size checks from other code
 */
@Slf4j
public class ShapefileWriter2 extends RiskscapeWriter {

  /**
   * Data class to store where we are up to in writing a {@link Struct} type. Used when processing
   * recurses into nested structs to keep track of what has been done already.
   */
  @Value
  private static class WritePosition {
    final int rowIndex;
    final Tuple value;
    final Struct structType;
  }

  // used for extracting _1 suffixes from column names
  private static final Pattern ORDINAL_PATTERN = Pattern.compile("(.+)_([0-9]+)");

  private final WKTWriter writer = new WKTWriter();

  /**
   * the length of some sort of record separator
   */
  private static final short SHP_RECORD_SEPARATOR_BYTES = 8;

  /**
   * the length of some sort of a null geometry record
   */
  private static final short SHP_NULL_GEOM_BYTES = 4;

  // shapefile dbf column name limit is 10
  private static final int MAX_COLUMN_NAME_LENGTH = 10;

  private final SRIDSet sridSet;

  /**
   * See https://gitlab.catalyst.net.nz/riskscape/riskscape/-/issues/1346
   */
  private final boolean prjSingleLine;

  final ShpFiles shpFiles;
  final DbaseFileWriter dbfWriter;
  final DbaseFileHeader dbfHeader;
  final org.geotools.data.shapefile.shp.ShapefileWriter shpWriter;
  protected TimeZone timeZone = TimeZone.getDefault();
  protected Charset charset = Charset.defaultCharset();

  // updated as we go for writing out a header on close
  private int recordsWritten = 0;
  // the number of attributes that get written to the dbase file
  final int dbfRowLength;

  // memoized to avoid allocation per call to write()
  private final Object[] dbfRow;

  // a stack to use when traversing a nested struct
  private final LinkedList<WritePosition> arrayStack = new LinkedList<>();

  // the traversal order index of the geometry column that is written to the shx file (and not the dbf).  Stays at -1
  // if none is found
  int geomIndex = -1;

  // an array of booleans that indicate whether we need to toString these attribute values in order for the dbf file
  // to represent them
  private final Boolean[] toStringThese;

  // the bounds of the dataset we've written - is expanded as we write
  private Envelope bounds = new Envelope();

  // shapefile specific geometry type
  private ShapeType shapeType;

  // counts the size in bytes of the shapefile's rows
  private int shapefileLength;

  // where dbf gets written to - we keep a reference to it to support rewinding and then writing header at the end
  final FileChannel dbfChannel;

  // needed getting a shape type handler
  private GeometryFactory geometryFactory = new GeometryFactory();
  private ShapeHandler handler;

  private StorageFile shpStorageFile;
  private StorageFile dbfStorageFile;
  private StorageFile shxStorageFile;
  private StorageFile prjStorageFile;
  private StorageFile cpgStorageFile;

  private int srid = -1;
  // a utility for swapping the xy axis in geometries, should that be required.
  private AxisSwapper axisSwapper = null;

  // The type that the geometry attribute is coming from. We may need this to infer the actual
  // geometry type from if the first tuple written has a null geometry
  private Type geomType;

  @Getter
  private final URI storedAt;

  private final ProblemSink problemSink;

  private boolean loggedReprojection = false;

  ShapeFileNullMapper nullMapper = new ShapeFileNullMapper();

  // Left this way to minimise churn in the tests
  public ShapefileWriter2(File shpPath, Struct type, SRIDSet sridSet, ProblemSink problemSink) throws IOException {
    this(shpPath, type, sridSet, problemSink, false);
  }

  public ShapefileWriter2(File shpPath,
                          Struct type,
                          SRIDSet sridSet,
                          ProblemSink problemSink,
                          boolean prjSingleLine) throws IOException {
    this.sridSet = sridSet;
    this.problemSink = problemSink;
    this.storedAt = shpPath.toURI();
    this.shpFiles = new ShpFiles(shpPath);
    this.prjSingleLine = prjSingleLine;

    shpStorageFile = shpFiles.getStorageFile(SHP);
    dbfStorageFile = shpFiles.getStorageFile(DBF);
    shxStorageFile = shpFiles.getStorageFile(SHX);
    prjStorageFile = shpFiles.getStorageFile(PRJ);
    cpgStorageFile = shpFiles.getStorageFile(CPG);

    dbfChannel = dbfStorageFile.getWriteChannel();
    shpWriter = createShpWriter();

    dbfHeader = createDbaseHeader();
    List<Boolean> toStringList = new LinkedList<>();
    List<String> colsSoFar = new LinkedList<>();
    dbfRowLength = calculateDbfRowLength(0, type, toStringList, colsSoFar);
    dbfRow = new Object[dbfRowLength];

    if (geomIndex == -1) {
      // There is no geometry member. You can't have a spatial file with no geometry
      // Note we don't use TupleUtils.findGeometry for this as that will not look into nested structs.
      // Which is what happens in this writer.
      throw new RiskscapeException(Problem.error(StandardCodes.GEOMETRY_REQUIRED, type));
    }

    dbfWriter = createDbaseWriter();
    toStringThese = toStringList.toArray(new Boolean[toStringList.size()]);
  }

  protected DbaseFileHeader createDbaseHeader() {
    return new DbaseFileHeader();
  }

  protected ShapefileWriter createShpWriter() throws IOException {
    return new ShapefileWriter(shpStorageFile.getWriteChannel(), shxStorageFile.getWriteChannel());
  }

  protected DbaseFileWriter createDbaseWriter() throws IOException {
    return new DbaseFileWriter(dbfHeader, dbfChannel, charset, timeZone);
  }

  /**
   * Calculates the number of attributes found in the type that we will want to store in a dbf file, while updating
   * various housekeeping fields at the same time
   * @param startAt used to support recursive calls to track iteration order etc
   * @param type
   * @param toStringList records whether attributes at each position need to be toStringed
   * @return the number of attributes seen in the type that get added to the dbf file
   */
  private int calculateDbfRowLength(int startAt, Struct type, List<Boolean> toStringList, List<String> namesSoFar) {
    int counter = 0;

    for (StructMember member : type.getMembers()) {
      Struct childStruct = member.getType().findAllowNull(Struct.class).orElse(null);
      if (childStruct != null) {
        counter += calculateDbfRowLength(startAt + counter, childStruct, toStringList, namesSoFar);
      } else {
        // we only designate one geometry member to go in to the shapefile
        if (geomIndex == -1 && member.getType().findAllowNull(Geom.class).isPresent()) {
          geomIndex = startAt + counter;
          geomType = Nullable.strip(member.getType());
        } else {
          try {
            toStringList.add(addColumn(member, namesSoFar));
          } catch (DbaseFileException e) {
            throw new RuntimeException(e);
          }
          counter++;
        }
      }
    }

    return counter;
  }

  /**
   * Adds a column to the dbf format
   * @return true if the type can be represented by dbf, false, if it needs to be toStringed
   */
  private boolean addColumn(StructMember member, List<String> namesSoFar) throws DbaseFileException {
    Type type = Nullable.strip(member.getType());
    Class<?> colType = type.internalType();

    String rawColName = member.getKey();
    String colName = adjust(rawColName, namesSoFar);
    namesSoFar.add(colName);

    return ShapefileWriting.addDbfHeader(colType, colName, dbfHeader);
  }

  /**
   * Adjust the given column name so it's safe to use and not going to collide with any existing names
   * @param namesSoFar the column names that have already been nominated/used
   */
  String adjust(String colName, List<String> namesSoFar) {
    // magic column - geotools won't like it
    if (colName.equals("the_geom")) {
      colName = "the_geom_1";
    }

    // get rid of any potentially disallowed chars
    colName = colName.replaceAll("[^a-zA-Z0-9_]", "_");

    if (colName.length() > MAX_COLUMN_NAME_LENGTH) {
      colName = colName.substring(0, MAX_COLUMN_NAME_LENGTH);
    }

    // keep bumping an ordinal until we hit a free spot
    while (namesSoFar.contains(colName)) {
      Matcher matcher = ORDINAL_PATTERN.matcher(colName);

      int ord;
      if (matcher.find()) {
        ord = Integer.parseInt(matcher.group(2)) + 1;
        colName = matcher.group(1);
      } else {
        ord = 1;
      }

      String ordinatedColName = colName + "_" + ord;

      int delta = ordinatedColName.length() - MAX_COLUMN_NAME_LENGTH;
      if (delta > 0) {
        colName = colName.substring(0, colName.length() - delta) + "_" + ord;
      } else {
        colName = ordinatedColName;
      }
    }

    return colName;
  }

  @Override
  public void close() throws IOException {
    // re-write shp header and dbf header with totals
    if (shapeType == null) {
      // shapeType (the kind of geometry being written) is not known until the first record is written
      // so if no records have been written, try to infer the shapeType from the given geometry type
      // Just fall back to any arbitrary type, as it's better than using UNDEFINED and blowing up later
      shapeType = shapeTypeFrom(geomType, ShapeType.POINT);
    }
    shpWriter.writeHeaders(bounds, shapeType, recordsWritten, shapefileLength);

    dbfHeader.setNumRecords(recordsWritten);
    dbfChannel.position(0);
    dbfHeader.writeHeader(dbfChannel);

    shpWriter.close();
    dbfWriter.close();

    // they are written out to tmp files first, this swaps them over
    shpStorageFile.replaceOriginal();
    shxStorageFile.replaceOriginal();
    dbfStorageFile.replaceOriginal();

    writeCpg();
    try {
      writePrj();
    } catch (Exception e) {
      log.error("Failed to write prj file {}", prjStorageFile, e);
    }
  }

  private void writePrj() throws IOException {
    // if we reprojected, write that crs, not the original
    int sridToUse = axisSwapper == null ? srid : sridSet.get(axisSwapper.getNewCrs());

    if (sridToUse == -1) {
      // no srid, so no valid geometry. Fallback to WGS84 - it shouldn't matter because no records
      // were likely written, but this stops RiskScape from complaining when we read the file back later
      sridToUse = sridSet.get(SRIDSet.EPSG4326_LONLAT);
    }

    if (sridToUse > 0) {
      CoordinateReferenceSystem crs = sridSet.get(sridToUse);
      FileChannel channel = prjStorageFile.getWriteChannel();

      // All CRS implementations extend from Formattable. And Formattable.toWKT() will check the prefs for
      // a desired indent. But checking the prefs can cause a spurious warning about bad prefs nodes for
      // some users. So we cast to Formattable and specify the default indent of 2 to avoid that warning.
      // Refer to geotools ShapefileDataStore#toSingleLineWKT(CoordinateReferenceSystem crs) for their
      // implementation of this. (GL373)
      Formattable formattableCRS = (Formattable)crs;

      int indentation = prjSingleLine ? SINGLE_LINE : 2;

      String crsWKT = formattableCRS.toWKT(indentation);
      ByteBuffer bytes = ByteBuffer.wrap(crsWKT.getBytes());
      channel.write(bytes);
      channel.close();

      prjStorageFile.replaceOriginal();
    }
  }

  private void writeCpg() {
    try (FileChannel channel = cpgStorageFile.getWriteChannel()) {
      channel.write(ByteBuffer.wrap(charset.name().getBytes()));

      cpgStorageFile.replaceOriginal();
    } catch (Exception e) {
      log.error("Failed to write cpg file {}", cpgStorageFile, e);
    }
  }

  @Override
  public void write(Tuple value) {

    int rowIndex = 0;
    int iterIndex = -1;
    int dbfIndex = 0;
    // The struct whose attributes are currently being written. If we descend into a nested struct
    // then this field should be updated.
    Struct structType = value.getStruct();

    outerloop:
    while (true) {
      while (rowIndex >= value.size()) {

        if (arrayStack.isEmpty()) {
          // we are done
          break outerloop;
        } else {
          // pop the stack, son
          WritePosition popped = arrayStack.removeLast();
          rowIndex = popped.rowIndex;
          value = popped.value;
          structType = popped.structType;
        }
      }


      Object attrValue = value.fetch(rowIndex);
      // perf: get the list of members once
      Type valueType = structType.getMembers().get(rowIndex).getType();
      rowIndex++;

      // If this item is a struct type we need to push our current position onto the stack and descend
      // into the nested struct to populate all of it's attributes. This includes when it is null, in which
      //  case we need to process an array of null values to pad out the Struct length.
      Optional<Struct> nestedStruct = valueType.findAllowNull(Struct.class);
      if (nestedStruct.isPresent()) {
        Tuple child = (Tuple) attrValue;
        arrayStack.addLast(new WritePosition(rowIndex, value, structType));
        structType = nestedStruct.get();
        rowIndex = 0;
        value = child == null ? new Tuple(structType) : child;
        continue;
      }

      // Now we know that we will write something, we advance iterIndex to the correct place
      iterIndex++;

      if (iterIndex == geomIndex) {
        writeGeom((Geometry) attrValue);
        continue;
      }

      // we apply null value mapping to all members, except the geom member for now.
      // we are skipping geom null mapping in case that messes up the shapeType creation.
      attrValue = nullMapper.mapValueIfNecessary(attrValue, valueType);

      Object toSet;
      if (attrValue instanceof Geometry) {
        toSet = writer.write((Geometry) attrValue);
      } else if (toStringThese[dbfIndex]) {
        toSet = attrValue == null ? null : attrValue.toString();
      } else {
        toSet = attrValue;
      }
      dbfRow[dbfIndex++] = toSet;
    }

    writeDbf();
    recordsWritten++;
  }

  private void writeDbf() {
    try {
      dbfWriter.write(dbfRow);
    } catch (IOException e) {
      throw new RiskscapeIOException("Could not write to dbf file", e);
    }
  }

  /**
   * Make sure all the geometry is written to the shapefile with a consistent CRS
   * (i.e. what we say the CRS should be in the .prj file), reprojecting if needed
   */
  private Geometry reprojectToConsistentCrs(Geometry geom) {
    if (srid == -1 || geom.getSRID() == srid) {
      return geom;
    }

    // it's a little unusual that reprojection is needed at all, but may happen in
    // some corner-cases, e.g. if union steps are used. Warn the user, as there are
    // some CRS corner-cases where the geometry may not be reprojected safely/accurately
    if (!loggedReprojection && !sridSet.getReprojectionTransform(geom.getSRID(), srid).isIdentity()) {
      problemSink.log(GeometryProblems.get().crsMixtureReprojected(storedAt, sridSet.get(srid)));
      loggedReprojection = true;
    }

    return sridSet.reproject(geom, srid);
  }

  private void writeGeom(Geometry geom) {

    if (geom == null) {
      try {
        if (recordsWritten == 0) {
          // The first geometry is null. We have to try to guess the shape type from the type system.
          // This will cause problems if we don't know the actual geometry sub-type.
          shapeType = shapeTypeFrom(geomType);
          handler = shapeType.getShapeHandler(geometryFactory);
          shpWriter.writeHeaders(new Envelope(), ShapeType.POINT, 0, 0);
        }
        // magic number copied from ShapefileFeatureWriter
        shapefileLength += (SHP_NULL_GEOM_BYTES + SHP_RECORD_SEPARATOR_BYTES);
        shpWriter.writeGeometry(null);
      } catch (IOException e) {
        throw new RiskscapeIOException("Could not write geometry to shp file", e);
      }
      return;
    }

    // the first type we see the geometry, we have to set it up some writing code specific to the type of geometry in
    // the output.  If it ends up being different types of geometry, it's going to choke - shapefile doesn't like it
    if (shapeType == null) {
      int dims = JTSUtilities.guessCoorinateDims(geom.getCoordinates());
      try {
        shapeType = JTSUtilities.getShapeType(geom, dims);
        handler = shapeType.getShapeHandler(geometryFactory);
      } catch (ShapefileException ex) {
        // who knows...
        throw new RuntimeException(ex);
      }

      try {
        // we come back and do this properly at the end - despite the presence of a skipHeaders method, it's no good
        shpWriter.writeHeaders(new Envelope(), shapeType, 0, 0);
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    if (srid  == -1) {
      // this is the first geom we have come across. We need to set up the SRID.
      srid = geom.getSRID();

      // shapefiles are expected to be XY (long lat), despite what the crs says.
      if (srid > 0) {
        axisSwapper = AxisSwapper.getForceXY(sridSet.get(srid), getStoredAt(), problemSink).orElse(null);
      }
    }

    // sanity-check everything is in a consistent CRS
    geom = reprojectToConsistentCrs(geom);

    if (axisSwapper != null) {
      geom = axisSwapper.swapAxis(geom);
    }

    // bounds calculations (needed for the header)
    Envelope env = geom.getEnvelopeInternal();
    if (!env.isNull()) {
      bounds.expandToInclude(env);
    }

    // shapefile puts line strings and polygons in a geometry collection (but not points)
    if (geom instanceof LineString) {
      geom = geom.getFactory().createMultiLineString(new LineString[] {(LineString) geom});
    } else if (geom instanceof Polygon) {
      geom = geom.getFactory().createMultiPolygon(new Polygon[] {(Polygon) geom});
    }

    // tally length (also needed for the header) -
    shapefileLength += (handler.getLength(geom) + SHP_RECORD_SEPARATOR_BYTES);

    try {
      shpWriter.writeGeometry(geom);
    } catch (IOException e) {
      throw new RiskscapeIOException("Could not write geometry to shp file", e);
    }
  }

  /**
   * Attempts to work out the required {@link ShapeType} from the type system. This will not work correctly
   * if the types system does not know the actual geometry type, which may be the case if some function has
   * changed it to Types.GEOMETRY etc.
   *
   * This method should only be used if the actual geometry is not available.
   *
   * @param type
   * @return
   * @throws ShapefileException
   */
  private ShapeType shapeTypeFrom(Type type) throws ShapefileException {
    // we really don't know. This is probably going to blow up later.
    return shapeTypeFrom(type, ShapeType.UNDEFINED);
  }

  @SuppressWarnings({ "rawtypes", "unchecked" })
  private ShapeType shapeTypeFrom(Type type, ShapeType orElse) throws ShapefileException {
    Optional<Class> multiGeom = type.find(MultiGeom.class)
        .map(MultiGeom::internalType);
    if (multiGeom.isPresent()) {
      return JTSUtilities.getShapeType(multiGeom.get());
    }
    Optional<Class> geometryType = type.find(GeomType.class)
        .map(GeomType::internalType);
    if (geometryType.isPresent()) {
      return JTSUtilities.getShapeType(geometryType.get());
    }

    return orElse;
  }
}
