/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.rl;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.google.common.collect.Sets;

import nz.org.riskscape.dsl.Lexer;
import nz.org.riskscape.dsl.Lexer.Tokens;
import nz.org.riskscape.dsl.LexerException;
import nz.org.riskscape.dsl.LexingMatcher;
import nz.org.riskscape.dsl.Token;
import nz.org.riskscape.dsl.TokenMatcher;
import nz.org.riskscape.dsl.TokenType;

/**
 * Tokens used by {@link ExpressionParser}
 */
public enum TokenTypes implements TokenType {

  KEYWORD_OR(LexingMatcher.forKeyword("or")),
  KEYWORD_AND(LexingMatcher.forKeyword("and")),
  KEYWORD_TRUE(LexingMatcher.forKeyword("true")),
  KEYWORD_FALSE(LexingMatcher.forKeyword("false")),
  KEYWORD_NULL(LexingMatcher.forKeyword("null")),
  KEYWORD_AS(LexingMatcher.forKeyword("as")),
  PARAMETER_IDENTIFIER(LexingMatcher.forPattern("^\\$([a-zA-Z]+[a-zA-Z_0-9]*)")),
  //identifiers can have hyphens when they are for a key - we do a positive look ahead for the colon with out consuming
  // it - this means we don't have to have a different parsing sequence for key identifiers
  KEY_IDENTIFIER(LexingMatcher.forPattern("^[a-zA-Z]+[a-zA-Z_\\-0-9]*(?= *\\:)")),
  //identifiers must start with alpha, then may have alpha, numeric and underscores
  // NB keep in sync with IDENTIFIER_PATTERN below
  IDENTIFIER(LexingMatcher.forPattern("^[a-zA-Z]+[a-zA-Z_0-9]*")),
  QUOTED_IDENTIFIER(LexingMatcher.forQuotedString('"')),
  STRING(LexingMatcher.forQuotedString('\'')),
  SCIENTIFIC_NOTATION(LexingMatcher.forPattern("^-?[0-9]+(?:\\.[0-9]+)?[Ee][-+]?[0-9]+")),
  DECIMAL(LexingMatcher.forPattern("^-?[0-9]*\\.[0-9]+")),
  INTEGER(LexingMatcher.forPattern("^-?[0-9]+")),
  INDEX('.'),
  LPAREN('('),
  RPAREN(')'),
  LBRACK('['),
  RBRACK(']'),
  LBRACE('{'),
  RBRACE('}'),
  COMMA(','),
  DOUBLE_COLON("::"),
  COLON(':'),
  CHAIN("->"),
  WHITESPACE(LexingMatcher.forWhitespace(" \t\r\n"), true),
  NOT_EQUALS(LexingMatcher.forPattern("^(<>|!=)")),
  COMMENT_INLINE(LexingMatcher.forPattern("(?s)^\\/\\*.*?\\*\\/"), true),  // needs to precede DIVIDE
  EQUALS('='),
  PLUS('+'),
  MINUS('-'),
  POW("**"),
  MULTIPLY('*'),
  DIVIDE('/'),
  OR("||"),
  AND("&&"),
  LESS_THAN_EQUAL("<="),
  GREATER_THAN_EQUAL(">="),
  LESS_THAN('<'),
  GREATER_THAN('>'),
  COMMENT(LexingMatcher.forLineComment("#"), true),
  EOF(LexingMatcher.forPattern("^$"));

  public static Tokens<TokenTypes> tokens() {
    return new Tokens<>(
        TokenTypes.class,
        TokenTypes.EOF,
        List.of()
    );
  }

  public static final Set<String> KEYWORDS =
      Arrays.stream(TokenTypes.values())
        .map(tt -> tt.matcher().getKeyword())
        .filter(Optional::isPresent)
        .map(Optional::get)
        .collect(Collectors.toSet());

  /**
   * The regex used for matching {@link TokenTypes#IDENTIFIER} tokens - used for quoting methods.
   * NB has to be reiterated because it can't precede the emum members
   */
  public static final Pattern IDENTIFIER_PATTERN =
      Pattern.compile("^[a-zA-Z]+[a-zA-Z_0-9]*");

  /**
   * Get a {@link TokenMatcher} for the keyword. The returned matcher will match the keyword (case insensitive),
   * but not if it is immediately followed by a valid {@link #IDENTIFIER} character.
   *
   * E.g for keyword('and') strings link 'andrew' would not match but 'and ' or 'and,' would
   * @param keyword to match
   * @return token matcher
   */
  static TokenMatcher forKeyword(String keyword) {
    TokenMatcher patternMatcher = TokenMatcher.forPattern("^(?i)" + keyword + "(?![a-zA-Z_0-9])");

    return new TokenMatcher() {

      @Override
      public Token match(TokenType type, String source, int position) {
        return patternMatcher.match(type, source, position);
      }

      @Override
      public Optional<String> getKeyword() {
        return Optional.of(keyword);
      }
    };
  }

  /**
   * @return a source representation of the given text that can be safely lexed as {@link TokenTypes#STRING}
   */
  public static String quoteText(String text) {
    if (text.contains("'")) {
      text = text.replace("'", "\\'");
    }

    return "'" + text + "'";
  }

  /**
   * Will wrap text in single quotes if it is not already so that it could be
   * parsed as a Riskscape string literal expression. Useful for cases where the user
   * isn't sure if they need to enter quotes or not, and we don't want to double-quote
   */
  public static String quoteTextIfNeeded(String text) {
    if (text.startsWith("'") && text.endsWith("'")) {
      return text; // already quoted
    }
    return quoteText(text);
  }

  /**
   * @return a source representation of the given identifier that can be safely lexed as either an
   * {@link TokenTypes#IDENTIFIER} or as a {@link TokenTypes#QUOTED_IDENTIFIER}
   */
  public static String quoteIdent(String token) {
    Matcher matched = IDENTIFIER_PATTERN.matcher(token);
    if (!matched.matches() || KEYWORDS.contains(token.toLowerCase())) {
      return '"' + token.replace("\"", "\\\"") + '"';
    } else {
      return token;
    }
  }

  /**
   * @return id with any wrapping quotes (either single or double) removed
   */
  public static String stripQuotes(String id) {
    Lexer<TokenTypes> lexer = new Lexer<>(TokenTypes.tokens(), id);

    Optional<String> unquoted;
    try {
      unquoted = lexer
              .consumeIf(Sets.newHashSet(TokenTypes.STRING, TokenTypes.QUOTED_IDENTIFIER))
              .map(Token::getValue);
    } catch (LexerException e) {
      // We've probably tried to strip the quotes off a string that begins with a quote - such as 'foo
      // Just return the original string - this is consistent with behaviour for strings like f'oo and foo'
      return id;

    }

    if (lexer.isEOF() && unquoted.isPresent()) {
      // return unquoted if it is present, and the only thing token contained id
      return unquoted.get();
    }
    return id;
  }

  /**
   * @return an identifier token, one of {@link TokenTypes#QUOTED_IDENTIFIER} or {@link TokenTypes#IDENTIFIER},
   * depending on whether the given identifier needs quoting or not.  Takes care to construct the token with the correct
   * source representation (quoted) as well as the correct unquoted value.
   */
  public static Token identToken(String identifier) {
    String quoted = quoteIdent(identifier);

    if (quoted.equals(identifier)) {
      return Token.token(TokenTypes.IDENTIFIER, identifier);
    } else {
      return new Token(TokenTypes.QUOTED_IDENTIFIER, quoted, 0, quoted.length(), identifier);
    }
  }
  TokenTypes(LexingMatcher matcher, boolean whitespace) {
    this.matcher = matcher;
    this.whitespace = whitespace;
  }

  TokenTypes(LexingMatcher matcher) {
    this.matcher = matcher;
    this.whitespace = false;
  }

  TokenTypes(char character) {
    this(LexingMatcher.forChar(character));
  }

  TokenTypes(String string) {
    // we used to use a case insensitive match here, but anything that's case sensitive should be using keyword
    // matching
    this(LexingMatcher.forString(string));
  }

  LexingMatcher matcher;
  boolean whitespace;

  @Override
  public LexingMatcher matcher() {
    return matcher;
  }

  @Override
  public boolean isWhitespace() {
    return whitespace;
  }
}
