/*
 * RiskScape™ Copyright New Zealand Institute for Earth Science Limited
 * (Earth Sciences New Zealand) is distributed for research purposes only
 * under the terms of AGPLv3.
 *
 * RiskScape™ Copyright 2025 New Zealand Institute for Earth Science
 * Limited (Earth Sciences New Zealand). All rights reserved. Source code
 * available under the AGPLv3.
 * 
 * This program is free software: you can redistribute it and/or modify it under
 *  the terms of the GNU Affero General Public License as published by the Free
 *  Software Foundation, either version 3 of the License, or (at your option) any
 *  later version.
 * 
 * This program is distributed for RESEARCH PURPOSES ONLY, in the hope that it will
 * be useful for research and education initiatives.
 * 
 * If you are not a researcher, or you are a researcher who wishes to use this
 * program on terms other than AGPLv3 (including those who wish to restrict the
 * distribution of any source code created using this program), please contact:
 * https://riskscape.org.nz
 * 
 * This program is distributed WITHOUT ANY WARRANTY; without even the implied
 * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Affero General Public License for more details.  You should have received a copy
 * of the GNU Affero General Public License along with this program.  If not, see
 * <http://www.gnu.org/licenses/>.
 * 
 * By way of summary only, under the AGPLv3:
 *     • Permissions of this strongest copyleft license are conditioned
 *       on making available complete source code of licensed works and
 *       modifications, which include larger works using a licensed work,
 *       under the same license.
 *     • Copyright and license notices must be preserved.
 *     • Contributors provide an express grant of patent rights.
 *     • When a modified version is used to provide a service over a
 *       network, the complete source code of the modified version must be made
 *       available.
 */
package nz.org.riskscape.dsl;

import static org.hamcrest.Matchers.*;
import static org.junit.Assert.*;

import java.util.Arrays;
import java.util.EnumSet;

import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.hamcrest.TypeSafeMatcher;
import org.junit.Test;

import com.google.common.collect.Sets;

import nz.org.riskscape.rl.TokenTypes;

public class LexerTest {

  Lexer<TokenTypes> lexer;

  public void lex(String source) {
    // using rl tokens, but that's just to avoid having to define a set of tokens just for these tests
    lexer = new Lexer<>(TokenTypes.tokens(), source);
  }

  @Test
  public void conditionalParsingCanMatchASingleTokenType() throws Exception {
    lex("foo");

    assertEquals(
        "foo!",
        lexer.tryThese(
            ConditionalParse.parseIfIs("foo", TokenTypes.IDENTIFIER, () -> "foo!"),
            ConditionalParse.parseIfIs("bar", TokenTypes.QUOTED_IDENTIFIER, () -> "not foo!")
    ));
  }

  @Test
  public void lexerExceptionUnknownCharacters() throws Exception {
    for (String unexpected : new String[]{"^", "|", "&"}) {
      try {
        lex(unexpected);
        lexer.next();
        fail();
      } catch (LexerException e) {
        assertThat(
            e.getProblem(),
            equalTo(LexerProblems.get().unexpectedCharacter(new SourceLocation(0, 1, 1), unexpected.charAt(0)))
        );
      }
    }
  }

  @Test
  public void lexerExceptionUnknownCharactersShowsLine() throws Exception {
    String unexpected = """

              ^
        """;
    try {
      lex(unexpected);
      lexer.next();
      fail();
    } catch (LexerException e) {
      assertThat(
          e.getProblem(),
          equalTo(LexerProblems.get().unexpectedCharacter(new SourceLocation(7, 2, 7), '^'))
      );
    }
  }

  @Test
  public void conditionalParsingCanMatchASetOfTokenTypes() throws Exception {
    lex("&&");

    assertEquals(
        "foo!",
        lexer.tryThese(
            ConditionalParse.parseIfOneOf("foo", EnumSet.of(TokenTypes.IDENTIFIER, TokenTypes.AND), () -> "foo!"),
            ConditionalParse.parseIfIs("bar", TokenTypes.QUOTED_IDENTIFIER, () -> "not foo!")
    ));

  }

  @Test
  public void conditionalParsingCanMatchASequenceOfTokenTypes() throws Exception {
    lex("and || +");

    assertEquals(
        "foo!",
        lexer.tryThese(
            ConditionalParse.parseIf("foo", Arrays.asList(
                EnumSet.of(TokenTypes.AND, TokenTypes.KEYWORD_AND),
                EnumSet.of(TokenTypes.OR, TokenTypes.KEYWORD_OR),
                EnumSet.of(TokenTypes.PLUS, TokenTypes.MINUS)),
                () -> "foo!"),
            ConditionalParse.parseIfIs("bar", TokenTypes.QUOTED_IDENTIFIER, () -> "not foo!")
    ));
  }

  @Test
  public void conditionalParsingWantsTheMostGreedyRulesFirst() throws Exception {
    lex("&& ||");

    // this will always yield foo, because rules are greedy
    assertEquals(
        "foo!",
        lexer.tryThese(
            ConditionalParse.parseIfIs("foo", Arrays.asList(TokenTypes.AND), () -> "foo!"),
            ConditionalParse.parseIfIs("bar", Arrays.asList(TokenTypes.AND, TokenTypes.OR), () -> "bar!")
    ));
  }

  @Test
  public void conditionalParsingFailsIfNoConditionsMatch() {
    lex("or");

    // this will always yield foo, because rules are greedy
    try {
        lexer.tryThese(
            ConditionalParse.parseIfIs("foo", Arrays.asList(TokenTypes.AND), () -> "foo!"),
            ConditionalParse.parseIfIs("bar", Arrays.asList(TokenTypes.OR), () -> "bar!"));
        fail("should have thrown");
    } catch (UnexpectedTokenException e) {
      assertEquals(Sets.newHashSet(TokenTypes.AND, TokenTypes.OR), e.getExpected());
      assertEquals(TokenTypes.KEYWORD_OR, e.getGot().type);
    }
  }

  @Test
  public void tokensRecordTheLineTheyWereOn() throws Exception {
    lex("foo and\nbar\n\nbaz");

    assertEquals(1, lexer.next().getLocation().getLine());
    assertEquals(1, lexer.next().getLocation().getLine());
    assertEquals(2, lexer.next().getLocation().getLine());
    assertEquals(4, lexer.next().getLocation().getLine());
  }

  @Test
  public void rewindingDoesNotAffectTheLineTracking() throws Exception {
    lex("foo and\nbar\n\nbaz \npeanut butter jelly\ntime");

    assertEquals(1, lexer.next().getLocation().getLine());
    assertEquals(1, lexer.next().getLocation().getLine());
    assertEquals(2, lexer.next().getLocation().getLine());
    assertEquals(4, lexer.next().getLocation().getLine());
    Token peanut = lexer.next();
    assertEquals("peanut", peanut.value);
    assertEquals(5, peanut.getLocation().getLine());
    assertEquals(5, lexer.next().getLocation().getLine());
    assertEquals(5, lexer.next().getLocation().getLine());
    assertEquals(6, lexer.next().getLocation().getLine());

    lexer.rewind(peanut);
    assertEquals(5, lexer.next().getLocation().getLine());
    assertEquals(5, lexer.next().getLocation().getLine());
    assertEquals(5, lexer.next().getLocation().getLine());
    assertEquals(6, lexer.next().getLocation().getLine());
  }

  @Test
  public void emptySourceIsFine() throws Exception {
    lex("");
    assertEquals(TokenTypes.EOF, lexer.next().type);
    assertTrue(lexer.isEOF());

    lex("    ");
    assertEquals(TokenTypes.EOF, lexer.next().type);
    assertTrue(lexer.isEOF());
  }

  @Test
  public void findsEquivalenceTokens() {
    lex("= <= >= != < > <>");
    assertThat(lexer.next(), hasType(TokenTypes.EQUALS));
    assertThat(lexer.next(), hasType(TokenTypes.LESS_THAN_EQUAL));
    assertThat(lexer.next(), hasType(TokenTypes.GREATER_THAN_EQUAL));
    assertThat(lexer.next(), hasType(TokenTypes.NOT_EQUALS));
    assertThat(lexer.next(), hasType(TokenTypes.LESS_THAN));
    assertThat(lexer.next(), hasType(TokenTypes.GREATER_THAN));
    assertThat(lexer.next(), hasType(TokenTypes.NOT_EQUALS));
  }

  @Test
  public void findsKeywords_LowerCase() {
    lex("and, or(true)false");
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_AND));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_OR));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_TRUE));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_FALSE));
  }

  @Test
  public void findsKeywords_UpperCase() {
    lex("AND, OR(TRUE)FALSE");
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_AND));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_OR));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_TRUE));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_FALSE));
  }

  @Test
  public void findsKeywords_MixedCase() {
    //who would do this crazyness
    lex("anD, OR(TruE)FalsE");
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_AND));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_OR));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_TRUE));
    lexer.next();
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_FALSE));
  }

  @Test
  public void findScientificNumbers() {
    lex("1E3 10e3 1E10 33E10 -33E-10 1.23e-10");
    assertThat(lexer.next(), allOf(hasValue("1E3"), hasType(TokenTypes.SCIENTIFIC_NOTATION)));
    assertThat(lexer.next(), allOf(hasValue("10e3"), hasType(TokenTypes.SCIENTIFIC_NOTATION)));
    assertThat(lexer.next(), allOf(hasValue("1E10"), hasType(TokenTypes.SCIENTIFIC_NOTATION)));
    assertThat(lexer.next(), allOf(hasValue("33E10"), hasType(TokenTypes.SCIENTIFIC_NOTATION)));
    assertThat(lexer.next(), allOf(hasValue("-33E-10"), hasType(TokenTypes.SCIENTIFIC_NOTATION)));
    assertThat(lexer.next(), allOf(hasValue("1.23e-10"), hasType(TokenTypes.SCIENTIFIC_NOTATION)));
  }

  @Test
  public void identifiersCanStartWithKeywords() {
    //It would be annoying having to quote identifiers because they start with the same characters as
    //a keyword. Lets ensure that doesn't happen.
    lex("original origin");
    assertThat(lexer.next(), allOf(hasValue("original"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("origin"), hasType(TokenTypes.IDENTIFIER)));

    lex("andrew andante");
    assertThat(lexer.next(), allOf(hasValue("andrew"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("andante"), hasType(TokenTypes.IDENTIFIER)));

    lex("trueblues truepenny");
    assertThat(lexer.next(), allOf(hasValue("trueblues"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("truepenny"), hasType(TokenTypes.IDENTIFIER)));

    lex("falsehoods falsetto");
    assertThat(lexer.next(), allOf(hasValue("falsehoods"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("falsetto"), hasType(TokenTypes.IDENTIFIER)));

    lex("nullipores nullifiers");
    assertThat(lexer.next(), allOf(hasValue("nullipores"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("nullifiers"), hasType(TokenTypes.IDENTIFIER)));
  }

  @Test
  public void asIsAKeyword() {
    lex("as");
    assertThat(lexer.next(), allOf(hasValue("as"), hasType(TokenTypes.KEYWORD_AS)));

    lex(" as ");
    assertThat(lexer.next(), allOf(hasValue("as"), hasType(TokenTypes.KEYWORD_AS)));

    //as can be an identifier, but only if you quote it.
    lex("property as thing");
    assertThat(lexer.next(), allOf(hasValue("property"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasType(TokenTypes.KEYWORD_AS)));
    assertThat(lexer.next(), allOf(hasValue("thing"), hasType(TokenTypes.IDENTIFIER)));

    lex("as \"as\" aston has");
    assertThat(lexer.next(), hasType(TokenTypes.KEYWORD_AS));
    assertThat(lexer.next(), hasType(TokenTypes.QUOTED_IDENTIFIER));
    assertThat(lexer.next(), hasType(TokenTypes.IDENTIFIER));
    assertThat(lexer.next(), hasType(TokenTypes.IDENTIFIER));
  }

  @Test
  public void findsChainTokens() {
    lex("->");
    assertThat(lexer.next(), allOf(hasType(TokenTypes.CHAIN)));

    lex("this->that");
    assertThat(lexer.next(), allOf(hasValue("this"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasType(TokenTypes.CHAIN)));
    assertThat(lexer.next(), allOf(hasValue("that"), hasType(TokenTypes.IDENTIFIER)));
  }

  @Test
  public void consumeIf() {
    lex("id \"quoted\" 23");

    assertThat(lexer.consumeIf(TokenTypes.IDENTIFIER).get(),
        allOf(hasValue("id"), hasType(TokenTypes.IDENTIFIER)));

    assertThat(lexer.consumeIf(Sets.newHashSet(TokenTypes.IDENTIFIER, TokenTypes.QUOTED_IDENTIFIER)).get(),
        allOf(hasValue("quoted"), hasType(TokenTypes.QUOTED_IDENTIFIER)));

    // next token is a number
    assertThat(lexer.consumeIf(TokenTypes.IDENTIFIER).isPresent(), is(false));
    assertThat(lexer.consumeIf(Sets.newHashSet(TokenTypes.IDENTIFIER, TokenTypes.QUOTED_IDENTIFIER)).isPresent(),
        is(false));

    assertThat(lexer.consumeIf(TokenTypes.INTEGER).get(),
        allOf(hasValue("23"), hasType(TokenTypes.INTEGER)));
  }

  @Test
  public void commentsAreIgnored() {
    lex("#comment1 blah blah\n"
        + "token1\n"
        + "#comment2\n"
        + "token2");
    // comments are whitespace, which the lexer skips
    assertThat(lexer.next(), allOf(hasValue("token1"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("token2"), hasType(TokenTypes.IDENTIFIER)));
  }

  @Test
  public void inlineCommentsAreIgnored() {
    lex("/* comment1 blah blah */ token1 /* comment2 */ token2");
    // comments are whitespace, which the lexer skips
    assertThat(lexer.next(), allOf(hasValue("token1"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("token2"), hasType(TokenTypes.IDENTIFIER)));
  }

  @Test
  public void inlineCommentsAreIgnoredWhenSpanningLines() {
    lex("/* comment1\n blah\n blah */ token1 /* comment2 */ token2");
    // comments are whitespace, which the lexer skips
    assertThat(lexer.next(), allOf(hasValue("token1"), hasType(TokenTypes.IDENTIFIER)));
    assertThat(lexer.next(), allOf(hasValue("token2"), hasType(TokenTypes.IDENTIFIER)));
  }

  @Test
  public void aTokenCanDetermineIfItIsFromSingleOrMultiLineSource() throws Exception {
    lex("foo bar baz");
    while (!lexer.isEOF()) {
      assertTrue(lexer.next().isSourceSingleLine());
    }

    lex("foo bar baz\n");
    while (!lexer.isEOF()) {
      assertTrue(lexer.next().isSourceSingleLine());
    }

    lex("\nfoo bar baz\n");
    while (!lexer.isEOF()) {
      assertTrue(lexer.next().isSourceSingleLine());
    }

    lex("foo \nbar baz\n");
    while (!lexer.isEOF()) {
      assertFalse(lexer.next().isSourceSingleLine());
    }

    lex("foo bar baz\r\n");
    while (!lexer.isEOF()) {
      assertTrue(lexer.next().isSourceSingleLine());
    }

    lex("\nfoo bar baz\r\n");
    while (!lexer.isEOF()) {
      assertTrue(lexer.next().isSourceSingleLine());
    }

    lex("foo \r\nbar baz\n");
    while (!lexer.isEOF()) {
      assertFalse(lexer.next().isSourceSingleLine());
    }
  }

  private Matcher<Token> hasValue(String value) {
    return new TypeSafeMatcher<Token>(Token.class) {

      @Override
      public void describeTo(Description description) {
        description.appendText("token with value ").appendValue(value);
      }

      @Override
      protected void describeMismatchSafely(Token item, Description mismatchDescription) {
        mismatchDescription.appendText("was ").appendValue(item);
      }

      @Override
      protected boolean matchesSafely(Token item) {
        return item.value.equals(value);
      }
    };
  }

  private Matcher<Token> hasType(TokenType type) {
    return new TypeSafeMatcher<Token>(Token.class) {

      @Override
      public void describeTo(Description description) {
        description.appendText("token with type ").appendValue(type);
      }

      @Override
      protected void describeMismatchSafely(Token item, Description mismatchDescription) {
        mismatchDescription.appendText("was ").appendValue(item);
      }

      @Override
      protected boolean matchesSafely(Token item) {
        return item.type.equals(type);
      }
    };
  }

  @Test
  public void canPeekAtRemainingUnparsedSource() throws Exception {
    lex("okay sofar but^then bad things happen");
    lexer.next();
    lexer.next();
    assertThat(lexer.remaining(), is(" but^then bad things happen"));
  }
}
