/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.engine.util;

import java.lang.reflect.Array;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;

import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;
import nz.org.riskscape.engine.Tuple;
import nz.org.riskscape.engine.types.Nullable;
import nz.org.riskscape.engine.types.Struct;
import nz.org.riskscape.engine.types.Type;

/**
 * Add-only list implementation that is well suited for use in aggregate functions where we may need to grow to a very
 * large size without knowing this up-front.
 *
 * List elements are stored in a linked list of exponentially increasing sized arrays, as a compromise on memory
 * efficiency and dynamic
 * growth.  Has the unbounded growth properties of a linked list without such a memory penalty that occurs with larger
 * lists (space efficiency of a linked list in java is not great compared to an array list).  Has a reasonable
 * performance benefit over using an ArrayList, too, as growth is cheaper and results in less GC churn.  Some fairly
 * artificial benchmarks showed a 37% increase in throughput when adding 1000 integers to a list when using the
 * SegmentedList over an {@link ArrayList}, where throughput was measured as the number of times we could add 1000
 * integers to a list in a second.
 *
 * Also uses primitive arrays where it can to increase memory efficiency.
 *
 * # Performance comparison to an ArrayList
 *
 * For storing objects, a segmented list will use more memory to store the same number of results, but as the list gets
 * bigger, its efficiency increases to the point where it can begin to overtake an array list due to the way an array
 * list's backing array length increases (exponentially).  A similar pattern exists for CPU performance for add - a
 * small list from either has about the same performance when adding, but as the backing array grows, an array lists's
 * performance worsens as it has to copy all the original elements in to a new list.
 *
 * A SegmentedList has terrible random access performance, similar to a linked list, but has good iteration performance,
 * similar to an array lists's, at least in theoritical terms.
 *
 * One advantage the segmented list does have over an array list is that it can be backed by native arrays for longs and
 * doubles (for riskscape integer and floating types), which are stored more compactly than object arrays, and means no
 * boxed values need to be stored - they are only used 'in flight' and as short-lived objects should be quickly cleaned
 * up by generational GC.  For a list of 10,000 longs, a segmented list can store these in 80kb + a few kb of array and
 * linked list overhead, compared to 64kb for the array list's array (assuming exponential growth) and then 320kb
 * worth of Longs.
 *
 * In a simple test, a segmented list was able to store 621,412,437 longs vs 150,242,292 in an
 * array list.  Throughput of adding longs in a single thread with the segmented list was about 2-3 times the rate of
 * the array list, with that gap increasing as the lists get bigger.  performance Stayed constant the whole way through
 * and eventually died with an OOM error.
 * The array list version's performance started tanking at about 80mil longs, throughput starts dropping and CPU usage
 * saturates all cores with GC. Half the time it died with a thread dump.
 *
 * # Notes on subclassing
 *
 * * Each subclass allows us to operate generically on native array types without a huge performance penalty of using
 * the reflective array methods
 * * We probably could have done this with composition, but sub-classing was easy, too
 * * Make sure sub classes are marked final as this improves performance (or at least it seemed to in my testing).
 * * We used to use if (componentType == ) for all the now subclasses methods, turns out this runs quicker, albeit only
 * a little
 */
@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public abstract class SegmentedList<T> extends AbstractList<T> {

  // current size of the list
  private int size;

  // linked list of arrays that holds our data.  All segments except the last must be completely filled, even if it's
  // filled with null values.
  private final LinkedList<Object> segments = new LinkedList<>();

  // the number of elements being stored in the last segment
  private int lastSegmentSize = 0;

  // the segment that we're filling with elements.  Starts off as an empty array to avoid a special case in the add
  // method.  This empty array is not added to our segments list - it's just to avoid allocating a large array for a
  // list which might never be used. Note that it doesn't matter that this isn't a type-correct array, as we never do
  // any array copy operations on an empty segment
  private Object lastSegment = new Object[0];

  private final Class<? super T> componentType;

  /**
   * Construct a SegmentedList capable of storing the given type of objects.
   *
   * For certain primitive wrapping types, the  backing arrays used will be of the primitive type,
   * saving on memory, especially for very large lists.
   *
   * If the componentType may come from a nullable type then {@link #forType(nz.org.riskscape.engine.types.Type) }
   * should be used.
   */
  @SuppressWarnings({ "rawtypes", "unchecked" })
  public static <T> List<T> forClass(Class<T> componentType) {

    // it "works" if you give us a Tuple, but it's not going to benefit from efficient storage.  This will trip in
    // tests and so point us to using the correct API
    assert componentType != Tuple.class : "Use forType instead";

    SegmentedList newList;

    // Remember to 'final' each implementation, as it seems to have a positive impact on performance.

    if (componentType == Double.class) {
      newList = new SegmentedDoubleList();
    } else if (componentType == Float.class) {
      newList = new SegmentedFloatList();
    } else if (componentType == Long.class) {
      newList = new SegmentedLongList();
    } else {
      newList = new SegmentedObjectList();
    }
    return newList;
  }

  /**
   * Construct a SegmentedList capable of storing the given type.
   *
   * Similar to {@link #forClass(java.lang.Class) } except primitive types are not used if the given type is
   * numeric and nullable (primitives cannot represent null values).
   */
  @SuppressWarnings({"rawtypes", "unchecked"})
  public static <T> List<T> forType(Type type) {
    Class componentType = type.internalType();
    if (Nullable.is(type)) {
      // can't use optimizations when types might be null - a future improvement might allow this using a bit field,
      // but let's do this later.  We could also look at commuting nullability to all members of a struct, which might
      // also be better than this.
      return (SegmentedList) new SegmentedObjectList();
    } else if (type instanceof Struct struct) {
      return (List) new SegmentedTupleList(struct);
    }

    return forClass(componentType);
  }

  @Override
  public T get(int index) {
    // naive get implementation based on doing iteration - we could do a much smarter thing here and calculate the
    // position, but I don't expect we'll need random access for what this list was intended for, which is aggregate
    // function collection and then iteration of the results
    if (index > size) {
      throw new IndexOutOfBoundsException("" + index);
    }

    // this helps for various statistical functions - once sorted, we often use random access to select from a place in
    // a list
    if (segments.size() == 1) {
      arrayGet(lastSegment, index);
    }

    int ctr = 0;
    Iterator<T> iter = iterator();
    while (ctr != index) {
      ctr++;
      iter.next();
    }

    return iter.next();
  }

  @Override
  public int size() {
    return size;
  }

  @Override
  public void add(int index, T element) {
    throw new UnsupportedOperationException();
  }

  @Override
  public boolean add(T element) {
    size++;

    if (lastSegmentSize == Array.getLength(lastSegment)) {
      lastSegment = newArray(nextSegmentSize());

      lastSegmentSize = 0;
      segments.add(lastSegment);
    }

    arraySet(lastSegment, lastSegmentSize++, element);

    return true;
  }

  // package scoped for testing
  int nextSegmentSize() {
    // uses powers of 2 up from 16 to 64k
    return 1 << Math.min(16, 4 + segments.size());
  }

  @Override
  public boolean addAll(Collection<? extends T> c) {
    if (c.isEmpty()) {
      return false;
    }

    if (c instanceof SegmentedList) {

      /*
       * Optimised routine for adding together two segmented lists that takes advantage of the fact that they're
       * grow-only lists.  We can combine together all the completed array segments, as they won't change.  There is
       * some copying required to maintain the order of the two lists wrt to each other, but we could possibly have a
       * version of this method that has only collection add all semantics and save on a few array copying operations
       */

      SegmentedList<?> rhsList = (SegmentedList<?>) c;
      if (rhsList.componentType != this.componentType) {
        throw new ClassCastException("Can not types " + rhsList.componentType + " and " + this.componentType);
      }

      // first we resize both lists so that their last segments fit exactly - this effectively freezes those arrays, as
      // we are a grow-only-list.  For `this`, it means we can append all the rhs's arrays to our linked list without
      // having gaps in the arrays.  But why do it to the rhs?
      //
      // The last segment needs special handling as we don't want writes to one of the lists to affect the other. We
      // either have to copy the rhs's last segment or guarantee that no further writes will happen to either list.
      // It's simplest to just copy them, so that just leaves the matter of the size of the new list.
      //
      // In most cases, it's assumed addAll is happening when reducing two
      // lists in to one, so it's likely that no new add operations are going to take place - both lists are now at
      // their final size w.r.t to collection, and only folds/reductions (which involve calls to addAll) are going to
      // take place.  Assuming this list is going to be used as the destination list for all further folds, we may as
      // well avoid the initial copy on the next call to fitLastSegmentToSize by getting it right this size.  For the
      // rhs, it's more than likely this list won't be used again anyway, so who cares if we make future adds slightly
      // less efficient - they're not likely to happen
      fitLastSegmentToSize();
      rhsList.fitLastSegmentToSize();

      // now we can safely copy reference's to the rhs's list - they won't be changing as they are all complete
      // why not use addAll? LinkedList's addAll implementation converts the rhs to an array before adding the elements
      // which seems pretty pointless when we know the rhs is a linked list and we can add elements more efficiently (
      // I suspect) with calls to addLast
      for (Object segment : rhsList.segments) {
        this.segments.addLast(segment);
      }
      lastSegment = this.segments.getLast();
      lastSegmentSize = Array.getLength(lastSegment);
      size += rhsList.size;

      return true;
    } else {
      return super.addAll(c);
    }
  }

  /**
   * Functional version of addAll that makes lambda expressions more succinct
   */
  public SegmentedList<T> fold(SegmentedList<T> rhs) {
    this.addAll(rhs);
    return this;
  }

  /**
   * @return the number of segments we've allocated - here for test assertions
   */
  int getNumSegments() {
    return segments.size();
  }

  @Override
  public Iterator<T> iterator() {
    return new Iter();
  }

  /**
   * Sorts this list by compacting the list down to a single array, which we can then sort.
   *
   * Possible Improvements:
   *  * Do a parallel sort of each chunks, then merge sort those in to a final array
   *  * Have primitive sort versions that allow use of quicker array sorting methods
   */
  @Override
  public void sort(Comparator<? super T> c) {
    compactArrays();

    arraySort(lastSegment, c);
  }

  class Iter implements Iterator<T> {

    Iter() {
      this.segmentIter = SegmentedList.this.segments.iterator();
      this.size = SegmentedList.this.size;
    }

    // track size independently - this will actually allow concurrent modification, which seems simpler than detecting
    // concurrent modification and throwing an exception
    private final int size;

    // iterator through linked list of segments
    private Iterator<Object> segmentIter;

    // the segment we're currently visiting - this is an empty array to avoid a special case in next with empty lists
    private Object segment = new Object[0];
    // progress through entire list
    private int counter = 0;
    // progress through current segment
    private int segmentCounter = 0;

    @Override
    public boolean hasNext() {
      return counter < size;
    }

    @Override
    public T next() {
      if (counter >= size) {
        throw new NoSuchElementException();
      }

      // detect whether we've exhausted this segment
      if (segmentCounter >= Array.getLength(segment)) {
        segment = segmentIter.next();
        segmentCounter = 0;
      }

      counter++;
      return arrayGet(segment, segmentCounter++);
    }
  }

  /**
   * Resizes the last segment so that it fits the number of elements in it exactly.  Simplifies addAll logic.
   */
  private void fitLastSegmentToSize() {
    // already fits - job done
    if (Array.getLength(lastSegment) == lastSegmentSize) {
      return;
    }
    Object resizedLastSegment = newArray(lastSegmentSize);
    System.arraycopy(lastSegment, 0, resizedLastSegment, 0, lastSegmentSize);
    segments.removeLast();
    segments.addLast(resizedLastSegment);
    lastSegment = resizedLastSegment;
  }

  /**
   * Create a new array with the correct component type.  'untyped' so native arrays can be used.
   * @return an array (possibly native, e.g. double[]) that can store values for the type of this list.  Values returned
   * from this method are 'fed' in to arraySet, arraySet and arraySort.
   */
  protected abstract Object newArray(int arraySize);

  /**
   * Set the given element in to the given array at the given index
   */
  protected abstract void arraySet(Object array, int i, T element);

  /**
   * Get the given element at the given index
   */
  protected abstract T arrayGet(Object array, int i);

  /**
   * Sort the given array with the given comparator
   */
  protected abstract void arraySort(Object array, Comparator<? super T> c);

  /**
   * Reduce our linked list of arrays down to a single array.  Used for in-place sorting.
   */
  private void compactArrays() {

    // TODO we could try and catch out of memory here and try and do a more incrememental build, which would do more
    // operations overall but would allow as to keep memory use at any one time down
    Object newArray = newArray(size);
    int counter = 0;
    int remaining = size;
    Iterator<Object> iter = segments.iterator();
    while (iter.hasNext()) {
      Object segment = iter.next();
      // remove list elements to reclaim memory as we go
      iter.remove();
      int segmentLength = Array.getLength(segment);
      // we take the smallest of how ever much is remaining, or the min length, so we avoid an oob exception on the
      // last element
      System.arraycopy(segment, 0, newArray, counter, Math.min(segmentLength, remaining));
      counter += segmentLength;
      remaining -= segmentLength;
    }

    lastSegment = newArray;
    lastSegmentSize = size;
    segments.add(lastSegment);
  }

}
