/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.dsl;

import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Used by {@link TokenType}s to build tokens from a {@link LexingStream}.  This interface is to replace
 * {@link TokenMatcher} as it accepts the more useful {@link LexingStream} object instead of a string and a position.
 */
public interface LexingMatcher {

  /**
   * @return a {@link LexingMatcher} that wraps a {@link TokenMatcher} for compatibility
   */
  static LexingMatcher wrap(TokenMatcher oldMatcher) {
    return (type, stream) -> {
      Token match = oldMatcher.match(type, stream.getSource(), stream.getIndex());

      if (match != null) {
        match.setLocation(stream.getLocation());

        int consumed = match.end - match.begin;
        while (consumed-- != 0) {
          stream.next();
        }
      }

      return match;
    };
  }

  /**
   * Creates a {@link LexingMatcher} that matches a single character
   */
  static LexingMatcher forChar(char character) {
    // let's have only one copy of this string ever.
    String value = Character.toString(character).intern();
    return (tokenType, stream) -> {
      if (stream.peek() == character) {
        return stream.newToken(tokenType, 1, value);
      } else {
        return null;
      }
    };
  }


  /**
   * Creates a {@link LexingMatcher} that consumes consecutive whitespace characters.  Created tokens are empty, e.g.
   * they have no value.
   */
  static LexingMatcher forWhitespace(String whitespaceChars) {
    char[] ws = whitespaceChars.toCharArray();
    return (tokenType, stream) -> {
      SourceLocation startsAt = stream.getLocation();

      stream.skipWhile(ws);

      if (startsAt.getIndex() == stream.getIndex()) {
        return null;
      } else {
        return stream.newToken(tokenType, startsAt, "");
      }
    };
  }

  /**
   * Creates a {@link LexingMatcher} for the given fixed string
   */
  static LexingMatcher forString(String string) {
    return (tokenType, stream) -> {
      int matchIdx = 0;
      int len = string.length();

      SourceLocation startLocation = stream.getLocation();

      while (matchIdx < len) {
        if (!stream.nextIf(string.charAt(matchIdx++))) {
          return null;
        }
      }

      return stream.newToken(tokenType, startLocation, string);
    };
  }

  static LexingMatcher forPattern(String regex) {
    return forPattern(Pattern.compile(regex));
  }

  /**
   * Creates a token matcher for a {@link Pattern}
   */
  static LexingMatcher forPattern(Pattern pattern) {
    return (tokenType, stream) -> {
      Matcher match = pattern.matcher(stream.asCharSequence());
      if (match.find()) {
        String tokenValue;

        if (match.groupCount() == 1) {
          tokenValue = match.group(1);
        } else {
          tokenValue = match.group();
        }

        // for pattern based token matches, the source doesn't match the value - the regex can be used to 'pluck'
        // the interesting part out.  This is mostly going to be the case for things like quoted strings, although
        // we now have a purpose built matcher for that to avoid having to deal with horrific regular expressions
        return stream.newToken(tokenType, match.end(), tokenValue);
      } else {
        return null;
      }
    };
  }

  char[] DEFAULT_QUOTES = new char[] {'\'', '"'};

  /**
   * Creates a token matcher for a quoted string.  Strings can be quoted with either single or double quotes and
   * escaping is done by adding a backslash in front of the used quote (aka the mode) character
   */
  static LexingMatcher forQuotedString() {
    return forQuotedString(DEFAULT_QUOTES);
  }

  /**
   * Creates a token matcher for a quoted string.  Strings can be quoted with either single or double quotes and
   * escaping is done by adding a backslash in front of the used quote (aka the mode) character
   */
  static LexingMatcher forQuotedString(char quoteChar) {
    return forQuotedString(new char[] {quoteChar});
  }

  /**
   * Creates a token matcher for a string quoted with any of the characters in the given set.  The quote characters
   * operate in a 'mode' - that is if one of these characters is seen in the stream, that triggers that 'mode' and the
   * quoting is terminated by seeing that character again.
   *
   * Escaping is always done with a backslash
   */
  static LexingMatcher forQuotedString(char[] quoteWith) {
    return (tokenType, stream) -> {
      char character = stream.peek();

      char mode = '\0';
      for (char c : quoteWith) {
        if (character == c) {
          mode = character;
          break;
        }
      }

      if (mode != '\0') {
        SourceLocation startLocation = stream.getLocation();
        stream.next(); // consume the first quote
        final char escapeChar = '\\';
        final StringBuilder builder = new StringBuilder();

        boolean lastWasEscape = false;

        while (!stream.isEof()) {
          final char curChar = stream.next();
          if (curChar == escapeChar) {
            // escaped escape!
            if (lastWasEscape) {
              builder.append(escapeChar);
              lastWasEscape = false;
            } else {
              lastWasEscape = true;
            }
          } else {
            if (lastWasEscape) {
              if (curChar == mode) {
                builder.append(curChar);
              } else {
                builder.append(escapeChar);
                builder.append(curChar);
              }
            } else {
              if (curChar == mode) {
                // we've hit the end of the string - we're done
                return stream.newToken(tokenType, startLocation, builder.toString());
              } else {
                builder.append(curChar);
              }

            }

            lastWasEscape = false;
          }
        }
        throw new LexerException(LexerProblems.get().eofInString(stream.getIndex()));
      } else {
        return null;
      }
    };
  }

  String[] LINE_ENDINGS = new String[] {"\r\n", "\r", "\n"};

  static LexingMatcher forLineComment(String startsWith) {
    return (TokenType type, LexingStream stream) -> {
      SourceLocation startsAt = stream.nextIf(startsWith);

      if (startsAt == null) {
        return null;
      }

      while (!stream.isEof()) {
        char ch = stream.peek();

        if (ch == '\r' || ch == '\n') {
          break;
        }

        stream.next();
      }

      // NB not bothering to store the comment as a value here - it'll just take up space and we never use them
      // (to date)
      return stream.newToken(type, startsAt, "");
    };
  }

  /**
   * @param includeNumbers whether to consider 0-9 an identifier character.
   * @return true if the given character is something you'd find in an identifier.
   */
  static boolean isIdentifierCharacter(char ch, boolean includeNumbers) {
    // standard identifier friendly characters - might be nice to DRY this up with an identifier matcher? (instead
    // of using a regex)  Note I'm making use of the character's ordinals to quickly test
    if (ch >= 'a' && ch <= 'z') {
      return true;
    }
    if (ch >= 'A' && ch <= 'Z') {
      return true;
    }
    if (ch == '_') {
      return true;
    }
    if (includeNumbers && ch >= '0' && ch <= '9') {
      return true;
    }

    return false;
  }

  /**
   * @return a {@link LexingMatcher} that matches the given keyword (in any case).  More complex than matching a string,
   * as it checks that the keyword isn't followed by any further alphabetic characters
   */
  static LexingMatcher forKeyword(String keywordMixedCase) {
    String keyword = keywordMixedCase.toLowerCase();
    return new LexingMatcher() {

      @Override
      public Token match(TokenType type, LexingStream stream) {
        int matchIdx = 0;
        int len = keyword.length();

        SourceLocation startLocation = stream.getLocation();

        while (matchIdx < len) {
          char expected = keyword.charAt(matchIdx++);
          char ch = stream.peek();

          if (ch != expected && Character.toLowerCase(ch) != expected) {
            return null;
          } else {
            stream.next();
          }
        }

        // keywords must not match if they are partial words, e.g. we can't have `andrew` matching `and`
        if (isIdentifierCharacter(stream.peek(), true)) {
          return null;
        }

        return stream.newToken(type, startLocation, keyword);
      }

      @Override
      public Optional<String> getKeyword() {
        return Optional.of(keyword);
      }
    };
  }

  /**
   * Match a token from the stream, returning null if it doesn't match.  Implementations are free to advance the stream
   * and do not need to worry about resetting it.
   */
  Token match(TokenType type, LexingStream stream);

  /**
   * @return a human readable name for the type of thing this matcher matches
   */
  default Optional<String> getKeyword() {
    return Optional.empty();
  }
}
