// Copyright (c) 2019, the Dart project authors. All rights reserved.
// Copyright 2014 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// ES6 extends the \uxxxx escape and also allows \u{xxxxx}.

// @dart = 2.9

import 'package:expect/expect.dart';

import 'v8_regexp_utils.dart';

void testRegExpHelper(RegExp r) {
  assertTrue(r.hasMatch("foo"));
  assertTrue(r.hasMatch("boo"));
  assertFalse(r.hasMatch("moo"));
}

void TestUnicodeEscapes() {
  testRegExpHelper(RegExp(r"(\u0066|\u0062)oo"));
  testRegExpHelper(RegExp(r"(\u0066|\u0062)oo", unicode: true));
  testRegExpHelper(RegExp(r"(\u{0066}|\u{0062})oo", unicode: true));
  testRegExpHelper(RegExp(r"(\u{66}|\u{000062})oo", unicode: true));

  // Note that we need \\ inside a string, otherwise it's interpreted as a
  // unicode escape inside a string.
  testRegExpHelper(RegExp("(\\u0066|\\u0062)oo"));
  testRegExpHelper(RegExp("(\\u0066|\\u0062)oo", unicode: true));
  testRegExpHelper(RegExp("(\\u{0066}|\\u{0062})oo", unicode: true));
  testRegExpHelper(RegExp("(\\u{66}|\\u{000062})oo", unicode: true));

  // Though, unicode escapes via strings should work too.
  testRegExpHelper(RegExp("(\u0066|\u0062)oo"));
  testRegExpHelper(RegExp("(\u0066|\u0062)oo", unicode: true));
  testRegExpHelper(RegExp("(\u{0066}|\u{0062})oo", unicode: true));
  testRegExpHelper(RegExp("(\u{66}|\u{000062})oo", unicode: true));
}

void TestUnicodeEscapesInCharacterClasses() {
  testRegExpHelper(RegExp(r"[\u0062-\u0066]oo"));
  testRegExpHelper(RegExp(r"[\u0062-\u0066]oo", unicode: true));
  testRegExpHelper(RegExp(r"[\u{0062}-\u{0066}]oo", unicode: true));
  testRegExpHelper(RegExp(r"[\u{62}-\u{000066}]oo", unicode: true));

  // Note that we need \\ inside a string, otherwise it's interpreted as a
  // unicode escape inside a string.
  testRegExpHelper(RegExp("[\\u0062-\\u0066]oo"));
  testRegExpHelper(RegExp("[\\u0062-\\u0066]oo", unicode: true));
  testRegExpHelper(RegExp("[\\u{0062}-\\u{0066}]oo", unicode: true));
  testRegExpHelper(RegExp("[\\u{62}-\\u{000066}]oo", unicode: true));

  // Though, unicode escapes via strings should work too.
  testRegExpHelper(RegExp("[\u0062-\u0066]oo"));
  testRegExpHelper(RegExp("[\u0062-\u0066]oo", unicode: true));
  testRegExpHelper(RegExp("[\u{0062}-\u{0066}]oo", unicode: true));
  testRegExpHelper(RegExp("[\u{62}-\u{000066}]oo", unicode: true));
}

void TestBraceEscapesWithoutUnicodeFlag() {
  // \u followed by illegal escape will be parsed as u. {x} will be the
  // character count.
  void helper1(RegExp r) {
    assertFalse(r.hasMatch("fbar"));
    assertFalse(r.hasMatch("fubar"));
    assertTrue(r.hasMatch("fuubar"));
    assertFalse(r.hasMatch("fuuubar"));
  }

  helper1(RegExp(r"f\u{2}bar"));
  helper1(RegExp("f\\u{2}bar"));

  void helper2(RegExp r) {
    assertFalse(r.hasMatch("fbar"));
    assertTrue(r.hasMatch("fubar"));
    assertTrue(r.hasMatch("fuubar"));
    assertFalse(r.hasMatch("fuuubar"));
  }

  helper2(RegExp(r"f\u{1,2}bar"));
  helper2(RegExp("f\\u{1,2}bar"));

  void helper3(RegExp r) {
    assertTrue(r.hasMatch("u"));
    assertTrue(r.hasMatch("{"));
    assertTrue(r.hasMatch("2"));
    assertTrue(r.hasMatch("}"));
    assertFalse(r.hasMatch("q"));
    assertFalse(r.hasMatch("("));
    assertFalse(r.hasMatch(")"));
  }

  helper3(RegExp(r"[\u{2}]"));
  helper3(RegExp("[\\u{2}]"));
}

void TestInvalidEscapes() {
  // Without the u flag, invalid unicode escapes and other invalid escapes are
  // treated as identity escapes.
  void helper1(RegExp r) {
    assertTrue(r.hasMatch("firstuxz89second"));
  }

  helper1(RegExp(r"first\u\x\z\8\9second"));
  helper1(RegExp("first\\u\\x\\z\\8\\9second"));

  void helper2(RegExp r) {
    assertTrue(r.hasMatch("u"));
    assertTrue(r.hasMatch("x"));
    assertTrue(r.hasMatch("z"));
    assertTrue(r.hasMatch("8"));
    assertTrue(r.hasMatch("9"));
    assertFalse(r.hasMatch("q"));
    assertFalse(r.hasMatch("7"));
  }

  helper2(RegExp(r"[\u\x\z\8\9]"));
  helper2(RegExp("[\\u\\x\\z\\8\\9]"));

  // However, with the u flag, these are treated as invalid escapes.
  assertThrows(() => RegExp(r"\u", unicode: true));
  assertThrows(() => RegExp(r"\u12", unicode: true));
  assertThrows(() => RegExp(r"\ufoo", unicode: true));
  assertThrows(() => RegExp(r"\x", unicode: true));
  assertThrows(() => RegExp(r"\xfoo", unicode: true));
  assertThrows(() => RegExp(r"\z", unicode: true));
  assertThrows(() => RegExp(r"\8", unicode: true));
  assertThrows(() => RegExp(r"\9", unicode: true));

  assertThrows(() => RegExp("\\u", unicode: true));
  assertThrows(() => RegExp("\\u12", unicode: true));
  assertThrows(() => RegExp("\\ufoo", unicode: true));
  assertThrows(() => RegExp("\\x", unicode: true));
  assertThrows(() => RegExp("\\xfoo", unicode: true));
  assertThrows(() => RegExp("\\z", unicode: true));
  assertThrows(() => RegExp("\\8", unicode: true));
  assertThrows(() => RegExp("\\9", unicode: true));
}

void TestTooBigHexEscape() {
  // The hex number inside \u{} has a maximum value.
  RegExp(r"\u{10ffff}", unicode: true);
  RegExp("\\u{10ffff}", unicode: true);
  assertThrows(() => RegExp(r"\u{110000}", unicode: true));
  assertThrows(() => RegExp("\\u{110000}", unicode: true));

  // Without the u flag, they're of course fine ({x} is the count).
  RegExp(r"\u{110000}");
  RegExp("\\u{110000}");
}

void TestSyntaxEscapes() {
  // Syntax escapes work the same with or without the u flag.
  void helper(RegExp r) {
    assertTrue(r.hasMatch("foo[bar"));
    assertFalse(r.hasMatch("foo]bar"));
  }

  helper(RegExp(r"foo\[bar"));
  helper(RegExp("foo\\[bar"));
  helper(RegExp(r"foo\[bar", unicode: true));
  helper(RegExp("foo\\[bar", unicode: true));
}

void TestUnicodeSurrogates() {
  // U+10E6D corresponds to the surrogate pair [U+D803, U+DE6D].
  void helper(RegExp r) {
    assertTrue(r.hasMatch("foo\u{10e6d}bar"));
  }

  helper(RegExp(r"foo\ud803\ude6dbar", unicode: true));
  helper(RegExp("foo\\ud803\\ude6dbar", unicode: true));
}

void main() {
  TestUnicodeEscapes();
  TestUnicodeEscapesInCharacterClasses();
  TestBraceEscapesWithoutUnicodeFlag();
  TestInvalidEscapes();
  TestTooBigHexEscape();
  TestSyntaxEscapes();
  TestUnicodeSurrogates();

  // Non-BMP patterns.
  // Single character atom.
  assertTrue(RegExp("\u{12345}", unicode: true).hasMatch("\u{12345}"));
  assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\u{12345}"));
  assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\ud808\udf45"));
  assertTrue(RegExp(r"\u{12345}", unicode: true).hasMatch("\ud808\udf45"));
  assertFalse(RegExp(r"\u{12345}", unicode: true).hasMatch("\udf45"));
  assertFalse(RegExp(r"\u{12345}", unicode: true).hasMatch("\udf45"));

  // Multi-character atom.
  assertTrue(RegExp(r"\u{12345}\u{23456}", unicode: true)
      .hasMatch("a\u{12345}\u{23456}b"));
  assertTrue(RegExp(r"\u{12345}\u{23456}", unicode: true)
      .hasMatch("b\u{12345}\u{23456}c"));
  assertFalse(RegExp(r"\u{12345}\u{23456}", unicode: true)
      .hasMatch("a\udf45\u{23456}b"));
  assertFalse(RegExp(r"\u{12345}\u{23456}", unicode: true)
      .hasMatch("b\udf45\u{23456}c"));

  // Disjunction.
  assertTrue(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
      .hasMatch("a\u{12345}\u{23456}b"));
  assertTrue(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
      .hasMatch("b\u{12345}\u{23456}c"));
  assertFalse(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
      .hasMatch("a\udf45\u{23456}b"));
  assertFalse(RegExp(r"\u{12345}(?:\u{23456})", unicode: true)
      .hasMatch("b\udf45\u{23456}c"));

  // Alternative.
  assertTrue(
      RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("a\u{12345}b"));
  assertTrue(
      RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("b\u{23456}c"));
  assertFalse(
      RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("a\udf45\ud84db"));
  assertFalse(
      RegExp(r"\u{12345}|\u{23456}", unicode: true).hasMatch("b\udf45\ud808c"));

  // Capture.
  assertTrue(RegExp("(\u{12345}|\u{23456}).\\1", unicode: true)
      .hasMatch("\u{12345}b\u{12345}"));
  assertTrue(RegExp(r"(\u{12345}|\u{23456}).\1", unicode: true)
      .hasMatch("\u{12345}b\u{12345}"));
  assertFalse(RegExp("(\u{12345}|\u{23456}).\\1", unicode: true)
      .hasMatch("\u{12345}b\u{23456}"));
  assertFalse(RegExp(r"(\u{12345}|\u{23456}).\1", unicode: true)
      .hasMatch("\u{12345}b\u{23456}"));

  // Quantifier.
  assertTrue(RegExp("\u{12345}{3}", unicode: true)
      .hasMatch("\u{12345}\u{12345}\u{12345}"));
  assertTrue(RegExp(r"\u{12345}{3}", unicode: true)
      .hasMatch("\u{12345}\u{12345}\u{12345}"));
  assertTrue(RegExp("\u{12345}{3}").hasMatch("\u{12345}\udf45\udf45"));
  assertFalse(RegExp(r"\ud808\udf45{3}", unicode: true)
      .hasMatch("\u{12345}\udf45\udf45"));
  assertTrue(RegExp(r"\ud808\udf45{3}", unicode: true)
      .hasMatch("\u{12345}\u{12345}\u{12345}"));
  assertFalse(
      RegExp("\u{12345}{3}", unicode: true).hasMatch("\u{12345}\udf45\udf45"));
  assertFalse(
      RegExp(r"\u{12345}{3}", unicode: true).hasMatch("\u{12345}\udf45\udf45"));

  // Literal surrogates.
  shouldBe(
      RegExp("\ud800\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"),
      ["\u{10000}\u{10000}"]);
  shouldBe(
      RegExp("\\ud800\\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"),
      ["\u{10000}\u{10000}"]);

  shouldBe(
      RegExp("[\\ud800\\udc03-\\ud900\\udc01\]+", unicode: true)
          .firstMatch("\u{10003}\u{50001}"),
      ["\u{10003}\u{50001}"]);
  shouldBe(
      RegExp("[\ud800\udc03-\u{50001}\]+", unicode: true)
          .firstMatch("\u{10003}\u{50001}"),
      ["\u{10003}\u{50001}"]);

  // Unicode escape sequences to represent a non-BMP character cannot have
  // mixed notation, and must follow the rules for RegExpUnicodeEscapeSequence.
  assertThrows(() => RegExp("[\\ud800\udc03-\ud900\\udc01\]+", unicode: true));
  assertNull(
      RegExp("\\ud800\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"));
  assertNull(
      RegExp("\ud800\\udc00+", unicode: true).firstMatch("\u{10000}\u{10000}"));

  assertNull(RegExp("[\\ud800\udc00]", unicode: true).firstMatch("\u{10000}"));
  assertNull(
      RegExp("[\\{ud800}\udc00]", unicode: true).firstMatch("\u{10000}"));
  assertNull(RegExp("[\ud800\\udc00]", unicode: true).firstMatch("\u{10000}"));
  assertNull(
      RegExp("[\ud800\\{udc00}]", unicode: true).firstMatch("\u{10000}"));

  assertNull(RegExp(r"\u{d800}\u{dc00}+", unicode: true)
      .firstMatch("\ud800\udc00\udc00"));
  assertNull(RegExp(r"\ud800\u{dc00}+", unicode: true)
      .firstMatch("\ud800\udc00\udc00"));
  assertNull(RegExp(r"\u{d800}\udc00+", unicode: true)
      .firstMatch("\ud800\udc00\udc00"));
}
