/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.dsl;

import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;

import com.google.common.collect.Sets;

import lombok.RequiredArgsConstructor;
import nz.org.riskscape.engine.RiskscapeException;
import nz.org.riskscape.engine.typexp.AST;

/**
 * Converts a {@link String} in to a stream of {@link Token}s for parsing in to an {@link AST} by the Parser.
 */
public class Lexer<T extends TokenType> {

  private final LexingStream stream;
  private final Tokens<T> tokens;

  // index of positions within string that new lines begin
  private Token peeked;

  // lexing keeps a track of the last token scanned in the current line to allow per-line indent tracking that
  // survives arbitrary rewinds through the token stream - a more efficient implementation would potentially be
  // possible by instead keeping a list of indent counts per line, but would be a little more complex to maintain
  // state in some situations (e.g. a rewind to an indent at the start of a line)
  private Token prevTokenThisLine = null;

  /**
   * Meta-class object for token type enumeration.  Works around some limitations with enums, reflection and generics
   */
  @RequiredArgsConstructor
  public static class Tokens<T extends TokenType>  {
    public final Class<T> enumClass;
    public final T eofToken;
    public final List<T> newLines;

    public T[] getConstants() {
      return enumClass.getEnumConstants();
    }
  }

  /**
   * Builds a {@link Lexer} from a {@link String}
   */
  public Lexer(Tokens<T> tokens, String source) {
    this.tokens = tokens;
    this.stream = new LexingStream(source);
  }

  /**
   * @return the next {@link Token} or {@link Tokens#eofToken} if parsing as reached the end.
   * @throws LexerException
   */
  public Token next() throws LexerException {
    Token next = peek();
    peeked = null;

    return next;
  }

  /**
   * @return the remaining source String that still needs to be parsed.
   * This can be useful for error reporting, i.e. show the user the unadulterated
   * string that we were trying to parse next. Note that you may want to truncate
   * this string in error messages, when parsing a large file.
   */
  public String remaining() {
    int startFrom = (this.peeked == null) ? this.stream.getIndex() : this.peeked.begin;
    return stream.getSource().substring(startFrom);
  }

  /**
   * @return the {@link Token} that will be returned from the {@link Lexer} when you call {@link #next()}, without
   * consuming it.
   */
  public Token peek() throws LexerException {

    peeking:
    while (peeked == null) {
      if (stream.isEof()) {
        Token eof = Token.eof(tokens, stream.getSource());
        eof.setLocation(stream.getLocation());
        return eof;
      }

      SourceLocation location = stream.getLocation();
      for (T candidate : tokens.getConstants()) {
        Token match = candidate.matcher().match(candidate, stream);

        if (match == null) {
          // reset the location
          stream.rewind(location);
        } else {
          if (match.begin == match.end && match.type != tokens.eofToken) {
            // coding error
            throw new AssertionError("Zero length token - only eof can be zero length " + match);
          }

          // handle new line and per-line linked list of tokens
          if (tokens.newLines.contains(match.type)) {
            prevTokenThisLine = null;
          } else {
            match.setPrevious(prevTokenThisLine);
            prevTokenThisLine = match;
          }

          if (match.type.isWhitespace()) {
            continue peeking;
          } else {
            if (match.type == tokens.eofToken) {
              // this is only possible if you've got a badly configured set of tokens (i think)
              throw new LexerException(LexerProblems.get().unexpectedEof(stream.getLocation()));
            }
            peeked = match;
            return peeked;
          }
        }
      }

      throw new LexerException(LexerProblems.get().unexpectedCharacter(stream.getLocation(), stream.next()));
    }

    return peeked;
  }

  /**
   * @return true if the {@link Lexer} is at the end of the source
   */
  public boolean isEOF() throws LexerException {
    return peek().type == tokens.eofToken;
  }

  /**
   * @return shortcut for peek().getTokenType()
   */
  @SuppressWarnings("unchecked")
  public T peekType() {
    return (T) peek().type;
  }

  /**
   * Reposition the lexer so that {@link #next()} returns this {@link Token} again.
   * @param backTo
   */
  public void rewind(Token backTo) {
    if (peeked != null) {
      peeked = null;
    }

    this.prevTokenThisLine = backTo.getPrevious();
    stream.rewind(backTo.getLocation());
  }

  @SafeVarargs
  public final Token expect(TokenType... expected) {
    Set<TokenType> expectedSet = Sets.newHashSet(expected);

    // expect the expected
    return expect(expectedSet);
  }


  public final Token expect(Set<? extends TokenType> expected) {
    Token got = next();
    if (!expected.contains(got.type)) {
      rewind(got);
      throw new UnexpectedTokenException(expected, got);
    }

    return got;
  }

  @SafeVarargs
  public final <U> U tryThese(ConditionalParse<U>... attemptsArray) {
    final Token startToken = peek();

    // search through the conditions in priority order - that is first completely matching sequence wins
    for (ConditionalParse<U> cp : attemptsArray) {
      rewind(startToken);

      List<Set<? extends TokenType>> tokenSequence = cp.getNextTokens();
      boolean matched = true;
      for (Set<? extends TokenType> next : tokenSequence) {
        Token token = next();
        if (!next.contains(token.type)) {
          matched = false;
          continue;
        }
      }

      if (matched) {
        rewind(startToken);
        return cp.getParse().get();
      }
    }

    rewind(startToken);

    List<ConditionalParse<U>> attempts = Arrays.asList(attemptsArray);
    // didn't match - try and find where it went wrong
    final int maxLen = attempts
        .stream()
        .map(cp -> cp.getNextTokens().size())
        .max(Integer::compareTo)
        .orElseThrow(() -> new IllegalArgumentException("conditions had empty tokens"));

    for (int counter = 0; counter < maxLen; counter++) {
      Token currentToken = next();
      final int finalCounter = counter;

      attempts.removeIf(cp -> cp.getNextTokens().size() <= finalCounter);

      List<ConditionalParse<U>> matching = attempts
          .stream()
          .collect(Collectors.partitioningBy(cp ->  cp.getNextTokens().get(finalCounter).contains(currentToken.type)
      )).get(Boolean.TRUE);


      if (matching.size() == 0) {
        Set<TokenType> expected = Sets.newHashSet();
        attempts.forEach(cp -> expected.addAll(cp.getNextTokens().get(finalCounter)));

        rewind(startToken);
        throw new UnexpectedTokenException(expected, currentToken);
      }

      attempts = matching;
    }

    // hmm, we've somehow ended up with an ambiguous expression - this is a parser error, not a user error,
    // and should probably never happen as the first one in the attempt list will win
    throw new RiskscapeException("Ambiguous ast - " + attempts);

  }

  public Optional<Token> consumeIf(TokenType conditionalToken) {
    return consumeIf(Sets.newHashSet(conditionalToken));
  }

  public Optional<Token> consumeIf(Set<? extends TokenType> oneOf) {
    if (oneOf.contains(peekType())) {
      return Optional.of(next());
    } else {
      return Optional.empty();
    }
  }

}
