/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.dsl;

import java.util.regex.Pattern;

import lombok.Getter;
import lombok.RequiredArgsConstructor;

/**
 * Provides an API for the Lexer to consume characters.  Tracks the position within the source code as well as having
 * some convenience functions that we will go on to use to support token matching.
 */
@RequiredArgsConstructor
public class LexingStream {

  /**
   * The string being parsed
   *
   * Implementation note - we could probably avoid holding the whole string in memory and instead use a buffer, but it's
   * probably not worth it (unless we start dealing with massive projects)
   */
  @Getter
  private final String source;

  /*
   * We keep these three as fields rather than using a source location to cut down on allocating a source location as
   * we lex.  We only need the SourceLocation to exist between tokens, not at each position in the source.
   */
  private int index = 0;
  private int line = 1;
  private int col = 1;

  private SourceLocation location;

  @Getter
  private char lastChar = '\0';

  @RequiredArgsConstructor
  private static class MovingSequence implements CharSequence {

    private final String source;
    private final int start;

    /**
     * Negative index of the string, e.g. -3 would mean we are 3 characters short of the end of the source string.
     */
    private final int shrink;

    @Override
    public char charAt(int index) {
      return source.charAt(index + start);
    }

    @Override
    public int length() {
      return source.length() - start - shrink;
    }

    @Override
    public String toString() {
      return source.substring(start, source.length() + shrink);
    }

    @Override
    public CharSequence subSequence(int newStart, int end) {
      int shrinkBy = length() - end;
      return new MovingSequence(source, start + newStart, this.shrink - shrinkBy);
    }
  };

  /**
   * Offers a view over the source string without having to copy the string.  This is important for lexing using the
   * {@link Pattern} class, which wants a substring to match against.  If we use {@link String#substring(int)} or
   * {@link String#subSequence(int, int)}, it clones the string, which for our source code can be quite a lot of bytes,
   * and will end up being done numerous times as it attempts to match a token.  This approach is much cheaper.
   */
  public CharSequence asCharSequence() {
    return new MovingSequence(source, index, 0);
  }

  /**
   * Look at the next character without consuming it, returning 0 if it's EOF.
   */
  public char peek() {
    try {
      return source.charAt(index);
    } catch (IndexOutOfBoundsException e) {
      return '\0';
    }
  }

  /**
   * Consume the next character from the string, advancing the position as we go.
   */
  public char next() {
    char ch;
    try {
      ch = source.charAt(index++);
    } catch (IndexOutOfBoundsException e) {
      index--;
      return '\0';
    }

    // invalidate memoized location
    location = null;
    col++;

    if (ch == '\n') {
      if (lastChar != '\r') {
        line++;
        col = 1;
      } else {
        // don't count the trailing '\n'
        col--;
      }
    }
    if (ch == '\r') {
      line++;
      col = 1;
    }

    lastChar = ch;

    return ch;
  }

  /**
   * Consume the next character if it matches charAt
   * @return true if it matched.
   */
  public boolean nextIf(char charAt) {
    if (peek() == charAt) {
      next();
      return true;
    }

    return false;
  }
  /**
   *
   * Consume the next character if it matches any of the given chars
   * @return the matching char, or '\0' if none matched.
   */
  public char nextIfOneOf(char[] chars) {
    char ch = peek();
    for (char c : chars) {
      if (ch == c) {
        return next();
      }
    }

    return '\0';
  }

  /**
   * Consume characters from the stream while the next one matches any of the given array of chars.
   */
  public void skipWhile(char[] chars) {
    nextChar:
    while (true) {
      char ch = peek();
      for (char c : chars) {
        if (ch == c) {
          next();
          continue nextChar;
        }
      }

      return;
    }
  }

  /**
   * Advance the source if the next characters match the given string.  This method is useful to call at the beginning
   * of pattern matching to only bother returning a {@link SourceLocation} if it looks like the token is going to match
   * (most/many tokens will have specific characters they start with.)
   * @return the SourceLocation before the match occurred, use this with
   * {@link #newToken(TokenType, SourceLocation, String)}
   */
  public SourceLocation nextIf(String string) {
    int matchIdx = 0;
    int len = string.length();

    // optimization: could store this as three locals?  don't think we need to/not worth it.  We could peek the first
    // char to avoid it in the first instance?
    SourceLocation startLocation = getLocation();

    while (matchIdx < len) {
      if (!nextIf(string.charAt(matchIdx++))) {
        rewind(startLocation);
        return null;
      }
    }

    return startLocation;
  }

  /**
   * @return the current zero-based index in to the source string.
   */
  public int getIndex() {
    return index;
  }

  /**
   * @return true if the entire string has been read
   */
  public boolean isEof() {
    return source.length() == index;
  }

  /**
   * Move the parse source to the given location
   */
  public void rewind(SourceLocation moveTo) {
    if (this.index == moveTo.getIndex()) {
      return;
    }

    if (moveTo.getIndex() > this.index) {
      throw new IllegalArgumentException("Can not move forward using this method");
    }

    this.index = moveTo.getIndex();
    this.line = moveTo.getLine();
    this.col = moveTo.getColumn();
    this.location = moveTo;
  }

  /**
   * @return the unparsed substring of the source
   */
  public String getRemaining() {
    return source.substring(index);
  }

  /**
   * @return the current location within the source
   */
  public SourceLocation getLocation() {
    if (location == null) {
      // I figure a fair bit of lexing code is going to end up building a source location, we may as well end up
      // allocating one per token (which this effectively should do - can't imagine we'd be querying the location much
      // in between token start locations.
      location = new SourceLocation(index, line, col);
    }

    return location;
  }

  /**
   * Construct a new token from this parse source assuming the characters have not yet been consumed (you would have
   * used asCharSequence) to read the next available characters.
   * @param tokenType the type of the token
   * @param length how many characters forward of the current location to consume as part of this token
   * @param tokenValue the value of the token - might not be a substring, could be somewhat processed.
   */
  public Token newToken(TokenType tokenType, int length, String tokenValue) {
    Token token = new Token(tokenType, source, index, index + length, tokenValue);
    token.setLocation(getLocation());

    move(length);

    return token;
  }

  /**
   * Construct a new token from this parse source, assuming the characters were already consumed using `next()`.
   * @param tokenType the type of the token
   * @param startLocation the location where this token starts
   * @param tokenValue the value of the token - might not be a substring, could be somewhat processed.
   */
  public Token newToken(TokenType tokenType, SourceLocation startLocation, String tokenValue) {
    Token token = new Token(tokenType, source, startLocation.getIndex(), this.index, tokenValue);
    token.setLocation(startLocation);
    return token;
  }

  /**
   * Advance the location by length characters
   */
  private void move(int length) {
    while (length-- > 0) {
      next();
    }
  }
}
