/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.hdf5;

import nz.org.riskscape.hdf5.cursor.H5FixedSizeCursor;
import nz.org.riskscape.hdf5.cursor.H5DatasetCursor;
import nz.org.riskscape.hdf5.types.H5Type;

import hdf.hdf5lib.H5;
import hdf.hdf5lib.HDF5Constants;
import lombok.Getter;
import lombok.extern.slf4j.Slf4j;

/**
 * Supports reading a one-dimensional dataset from an h5 file.  Datasets should be created via the {@link H5File} or
 * {@link H5Group} classes.
 */
@Slf4j
public class H5Dataset extends H5Object {

  @Getter
  private final H5Type dataType;
  @Getter
  private final H5DataSpace dataSpace;

  @Getter
  private final String datasetName;

  /*
   * The HDF5 dataset could potentially contains Gbs of data, so we need to
   * limit the size of the memory buffer that we read elements into. Default
   * to 64Mb (arbitrarily chosen), but this default can be overridden in
   * openCursor() for performance fine-tuning (or unit testing).
   */
  private static final int DEFAULT_CURSOR_SIZE = 64 * 1024 * 1024;

  H5Dataset(long objectId, String datasetName) {
    super(objectId);
    this.datasetName = datasetName;
    this.dataType = H5Type.build(H5.H5Dget_type(ptr));
    this.dataSpace = new H5DataSpace(H5.H5Dget_space(ptr), this);
    log.debug("Opened new H5Dataset {}", datasetName);
  }

  /**
   * @return a new cursor/iterator over this data.
   * There is as yet no support for reading from chunks stored in different data
   * spaces
   * @param bufferSize the size of the memory buffer that elements are read into
   */
  public H5DatasetCursor openCursor(int bufferSize) {
    if (dataType.isVariableLength()) {
      // it looks like we should be able to support this, and it is required for reading the oqparams dataset, but
      // couldn't find the right examples to show how to do this
      throw new RuntimeException("Variable length data types not supported");
    } else {
      return new H5FixedSizeCursor(this, dataSpace, dataType, bufferSize);
    }
  }

  public H5DatasetCursor openCursor() {
    return openCursor(DEFAULT_CURSOR_SIZE);
  }

  /**
   * @return a new dataspace that can be used to select elements from the dataset independently from the default
   * dataspace that comes from getDataSpace().  Be aware that you'll need to close this dataspace when you're finished
   * with it, otherwise you'll end up with resource leaks.
   */
  public H5DataSpace newDataSpace() {
    return new H5DataSpace(H5.H5Dget_space(ptr), this, dataSpace.getExtent().clone());
  }

  @Override
  public void close() {
    H5.H5Dclose(ptr);
    dataSpace.close();
    dataType.close();
    log.debug("Closed H5Dataset {}", this.datasetName);
  }

  /**
   * @return the dataset elements as an array of doubles
   * Warning: this will only work for datasets small enough to fit in an array
   */
  public double[] getElementsAsDouble() {
    H5DatasetCursor cursor = openCursor();
    int i = 0;
    double[] values = new double[getDataSpace().numElementsAsInt()];
    while (cursor.hasNext()) {
      values[i++] = (double) cursor.next();
    }
    return values;
  }

  /**
   * @return the total number of elements in the dataset, as per {@link H5DataSpace#numElements()}
   */
  public long numElements() {
    return dataSpace.numElements();
  }

  @Override
  public String toString() {
    return String.format("H5Dataset(name=%s type=%s size=%d)", datasetName, dataType, numElements());
  }

  /**
   * Read n variable length (vlen) elements and return them as a 2d array of bytes, where the 1st dimension contains
   * the bytes of each element.  You can then pass each byte array to a vlen type's member type to deserialize the
   * bytes in to the java representation of the data.
   *
   * This is done as a special case because H5 vlen types are stored "off to the side" and the data is not kept in the
   * same contiguous block as a non vlen type.  This makes reading the data in to java difficult/impossible, and in fact
   * required patching of the h5 library's jni bindings to even make possible in this way.
   *
   * @param memspace a dataspace that data is being read in to.  This must be big enough to hold numElements
   * @param filespace the dataspace that data is being read from.  Its selection must yield the same number of elements
   * as numElements, i.e. by calling {@link H5DataSpace#selectElements(long[], long[])
   * @param numElements the number of elements to select.  Must agree with the sizes of memspace and filespace
   * @return an array of byte arrays you can read the elements from
   */
  public byte[][] readVlenElements(H5DataSpace memspace, H5DataSpace filespace, int numElements) {
    // we just pass in an array big enough to hold the elements we want to read.
    // The byte array for each element will be replaced in the library call with the
    // raw vlen data that's stored on file for the selected element.
    byte[][] buffer = new byte[numElements][0];
    H5.H5DreadVL(ptr, dataType.ptr, memspace.ptr, filespace.ptr,
        HDF5Constants.H5P_DEFAULT, buffer);

    return buffer;
  }
}
