/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.relation;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;

import com.csvreader.CsvReader;

import lombok.Getter;
import lombok.NonNull;
import nz.org.riskscape.engine.RiskscapeIOException;
import nz.org.riskscape.engine.Tuple;
import nz.org.riskscape.engine.types.Struct;
import nz.org.riskscape.engine.types.Struct.StructBuilder;
import nz.org.riskscape.engine.types.Types;

/**
 * Native CSV support for a relation.  Mostly here to allow CSV files without geometry to be used in queries in support
 * of the table model template - https://bugs.riskscape.org.nz/issues/192
 *
 */
public class CsvRelation extends BaseRelation {

  /**
   * Name of magic line number struct member that is included in {@link Tuple}s when a {@link CsvRelation} is
   * constructed with includeLineNumbers.
   */
  public static final String LINE_NUM_KEY = "__linenum";
  public static final ByteOrderMark[] ACCEPTED_BOMS = {
    ByteOrderMark.UTF_8,
    ByteOrderMark.UTF_16LE,
    ByteOrderMark.UTF_16BE,
    ByteOrderMark.UTF_32LE,
    ByteOrderMark.UTF_32BE
  };

  public static CsvRelation fromUrl(URL url) {
    return fromUrl(url, false);
  }

  /**
   * Construct a new {@link CsvRelation} from the given url
   */
  public static CsvRelation fromUrl(URL url, boolean includeLineNumbers) {
    Supplier<Reader> supplier = () -> {
      try {
        return createInputStreamReader(url.openStream());
      } catch (IOException e) {
        throw new RiskscapeIOException("Failed to open stream from url " + url, e);
      }
    };

    return new CsvRelation(url.toString(), inferType(supplier.get()), includeLineNumbers, supplier);
  }

  public static CsvRelation fromBytes(byte[] bytes) {
    return fromBytes(bytes, false);
  }

  /**
   * Construct a new {@link CsvRelation} from the given bytes, assumed to be in the platform default character set.
   * relation.
   */
  public static CsvRelation fromBytes(byte[] bytes, boolean includeLineNumbers) {
    Supplier<Reader> supplier = () -> createInputStreamReader(new ByteArrayInputStream(bytes));
    return new CsvRelation("<anonymous>", inferType(supplier.get()), includeLineNumbers, supplier);
  }

  public static Struct inferType(Reader in) {

    CsvReader reader = createReader(in);
    try {
      List<String> keys = Arrays.asList(reader.getHeaders())
          .stream()
          .collect(Collectors.toList());

      StructBuilder builder = Struct.builder();
      for (String key : keys) {
        builder.add(key, Types.TEXT);
      }

      return builder.build();
    } catch (IOException e) {
      throw new RiskscapeIOException("Failed to read values from csv input", e);
    }
  }

  private static InputStreamReader createInputStreamReader(InputStream in) {
    BOMInputStream bomIn = new BOMInputStream(in, ACCEPTED_BOMS);
    String charset;
    try {
      charset = bomIn.getBOMCharsetName();
      if (charset != null) {
        return new InputStreamReader(bomIn, charset);
      } else {
        return new InputStreamReader(bomIn);
      }
    } catch (IOException e) {
      throw new RiskscapeIOException("Failed to parse csv input file encoding", e);
    }
  }

  private static CsvReader createReader(Reader in) {
    CsvReader reader = new CsvReader(in);
    try {
      reader.readHeaders();
    } catch (IOException e) {
      throw new RiskscapeIOException("Failed to read headers from csv input", e);
    }

    reader.setSkipEmptyRecords(false); // otherwise our row count gets messed up
    reader.setCaptureRawRecord(true);

    return reader;
  }

  private static Struct addLineNumbers(Struct type, boolean includeLineNumbers) {
    return includeLineNumbers ? type.add(LINE_NUM_KEY, Types.INTEGER) : type;
  }

  private final Supplier<Reader> dataSource;

  @Getter
  private final String sourceDescription;

  private final boolean includeLineNumbers;

  private Long totalRows = null;

  private CsvRelation(
      @NonNull String sourceDescription,
      @NonNull Struct type,
      boolean includeLineNumbers,
      @NonNull Supplier<Reader> dataSource) {

    super(addLineNumbers(type, includeLineNumbers));
    this.includeLineNumbers = includeLineNumbers;
    this.sourceDescription = sourceDescription;
    this.dataSource = dataSource;
  }


  private CsvRelation(
      @NonNull BaseRelation.Fields clonedFields,
      @NonNull String sourceDescription,
      boolean includeLineNumbers,
      @NonNull Supplier<Reader> dataSource) {
    super(clonedFields);

    this.includeLineNumbers = includeLineNumbers;
    this.sourceDescription = sourceDescription;
    this.dataSource = dataSource;
  }

  private class Iterator extends PeekingTupleIterator {

    private final Struct type = getRawType();
    private final int numMembers = includeLineNumbers ? type.getMembers().size() - 1 : type.getMembers().size();
    private final CsvReader csvReader;
    private final Reader csvIn;
    private long lineIndex = 1; // first row is the header, which is read on construction

    Iterator() {
      csvIn = dataSource.get();
      csvReader = createReader(csvIn);
    }

    @Override
    protected String getSource() {
      return getSourceDescription();
    }

    @Override
    public Tuple get() {
      while (true) {
        try {
          boolean exists = csvReader.readRecord();
          lineIndex++;

          if (!exists) {
            return null;
          }

          String rawRecord = csvReader.getRawRecord();
          if (rawRecord.trim().length() == 0) {
            continue;
          }

          String[] values = csvReader.getValues();

          if (values.length != numMembers) {
            throw new InvalidTupleException(Tuple.ofValues(type), String.format(
                "%s:%d contained wrong number of columns - was %d, expected %d",
                sourceDescription,
                lineIndex,
                values.length,
                numMembers
              ));
          }

          if (includeLineNumbers) {
            // insert line number
            Object[] valuesWithLineNum = new Object[values.length + 1];
            valuesWithLineNum[values.length] = lineIndex;
            System.arraycopy(values, 0, valuesWithLineNum, 0, values.length);
            return Tuple.ofValues(type, valuesWithLineNum);
          } else {
            return Tuple.ofValues(type, (Object[]) values);
          }

        } catch (IOException e) {
          throw new RelationIOException(CsvRelation.this, "error reading row from underlying input stream", e);
        }
      }
    }

    @Override
    public void close() {
      try {
        csvIn.close();
      } catch (IOException e) {
        throw new RelationIOException(CsvRelation.this, "error closing underlying input stream", e);
      }
    }

  }

  @Override
  public TupleIterator rawIterator() {
    return new Iterator();

  }

  @Override
  public String getSourceInformation() {
    return sourceDescription;
  }

  @Override
  protected CsvRelation clone(BaseRelation.Fields baseFields) {
    return new CsvRelation(baseFields, sourceDescription, includeLineNumbers, dataSource);
  }

  @Override
  public Optional<Long> size() {
    if (totalRows == null) {
      // base the tuple count on the number of non-empty lines, minus the header
      BufferedReader reader = new BufferedReader(dataSource.get());
      long numLines = reader.lines().filter(line -> !line.isEmpty()).count();
      totalRows = Math.max(0, numLines - 1);
    }
    return Optional.of(totalRows);
  }
}
