blob: 158579f99d2ee91abf4fdcdc8a24dd6eda1d8485 [file] [log] [blame]
// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
// This file encapsulates all the interaction with the
// JSC regular expression library also referred to as pcre
#include "lib/regexp_jsc.h"
#include "platform/assert.h"
#include "vm/allocation.h"
#include "vm/exceptions.h"
#include "vm/globals.h"
#include "vm/isolate.h"
#include "third_party/jscre/pcre.h"
namespace dart {
static uint16_t* GetTwoByteData(const String& str) {
Zone* zone = Isolate::Current()->current_zone();
uint16_t* two_byte_str = zone->Alloc<uint16_t>(str.Length());
for (intptr_t i = 0; i < str.Length(); i++) {
two_byte_str[i] = str.CharAt(i);
}
return two_byte_str;
}
static void* JSREMalloc(size_t size) {
intptr_t regexp_size = static_cast<intptr_t>(size);
ASSERT(regexp_size > 0);
const JSRegExp& new_regex = JSRegExp::Handle(JSRegExp::New(size));
return new_regex.GetDataStartAddress();
}
static void JSREFree(void* ptr) {
USE(ptr); // Do nothing, memory is garbage collected.
}
static void ThrowExceptionOnError(const String& pattern,
const char* error_msg) {
if (error_msg == NULL) {
error_msg = "Unknown regexp compile error. ";
}
const String& errmsg = String::Handle(String::New(error_msg));
const String& message = String::Handle(String::Concat(errmsg, pattern));
const Array& args = Array::Handle(Array::New(1));
args.SetAt(0, message);
Exceptions::ThrowByType(Exceptions::kFormat, args);
}
RawJSRegExp* Jscre::Compile(const String& pattern,
bool multi_line,
bool ignore_case) {
// First convert the pattern to UTF16 format as the jscre library expects
// strings to be in UTF16 encoding.
uint16_t* two_byte_pattern = GetTwoByteData(pattern);
// A Dart regexp is always global.
bool is_global = true;
// Parse the flags.
jscre::JSRegExpIgnoreCaseOption jscre_ignore_case = ignore_case ?
jscre::JSRegExpIgnoreCase : jscre::JSRegExpDoNotIgnoreCase;
jscre::JSRegExpMultilineOption jscre_multi_line = multi_line ?
jscre::JSRegExpMultiline : jscre::JSRegExpSingleLine;
// Compile the regex by calling into the jscre library.
uint32_t num_bracket_expressions = 0;
const char* error_msg = NULL;
jscre::JSRegExp* jscregexp = jscre::jsRegExpCompile(two_byte_pattern,
pattern.Length(),
jscre_ignore_case,
jscre_multi_line,
&num_bracket_expressions,
&error_msg,
&JSREMalloc,
&JSREFree);
if (jscregexp == NULL) {
// There was an error compiling the regex, Throw an exception.
ThrowExceptionOnError(pattern, error_msg);
UNREACHABLE();
return JSRegExp::null();
} else {
// Setup the compiled regex object and return it.
JSRegExp& regexp =
JSRegExp::Handle(JSRegExp::FromDataStartAddress(jscregexp));
regexp.set_pattern(pattern);
if (jscre_multi_line == jscre::JSRegExpMultiline) {
regexp.set_is_multi_line();
}
if (jscre_ignore_case == jscre::JSRegExpIgnoreCase) {
regexp.set_is_ignore_case();
}
if (is_global) {
regexp.set_is_global();
}
regexp.set_is_complex(); // Always use jscre library.
regexp.set_num_bracket_expressions(num_bracket_expressions);
return regexp.raw();
}
}
RawArray* Jscre::Execute(const JSRegExp& regex,
const String& str,
intptr_t start_index) {
// First convert the input str to UTF16 format as the jscre library expects
// strings to be in UTF16 encoding.
uint16_t* two_byte_str = GetTwoByteData(str);
// Execute a regex match by calling into the jscre library.
jscre::JSRegExp* jscregexp =
reinterpret_cast<jscre::JSRegExp*>(regex.GetDataStartAddress());
ASSERT(jscregexp != NULL);
const Smi& num_bracket_exprs = Smi::Handle(regex.num_bracket_expressions());
intptr_t num_bracket_expressions = num_bracket_exprs.Value();
Zone* zone = Isolate::Current()->current_zone();
// The jscre library rounds the passed in size to a multiple of 3 in order
// to reuse the passed in offsets array as a temporary chunk of working
// storage during matching, so we just pass in a size which is a multiple
// of 3.
const int kJscreMultiple = 3;
int offsets_length = (num_bracket_expressions + 1) * kJscreMultiple;
int* offsets = NULL;
offsets = zone->Alloc<int>(offsets_length);
int retval = jscre::jsRegExpExecute(jscregexp,
two_byte_str,
str.Length(),
start_index,
offsets,
offsets_length);
// The KJS JavaScript engine returns null (ie, a failed match) when
// JSRE's internal match limit is exceeded. We duplicate that behavior here.
if (retval == jscre::JSRegExpErrorNoMatch
|| retval == jscre::JSRegExpErrorHitLimit) {
return Array::null();
}
// Other JSRE errors:
if (retval < 0) {
const String& pattern = String::Handle(regex.pattern());
const int kErrorLength = 256;
char error_msg[kErrorLength];
OS::SNPrint(error_msg, kErrorLength,
"jscre::jsRegExpExecute error : %d", retval);
ThrowExceptionOnError(pattern, error_msg);
UNREACHABLE();
return Array::null();
}
const int kMatchPair = 2;
Array& array =
Array::Handle(Array::New(kMatchPair * (num_bracket_expressions + 1)));
// The matches come in (start, end + 1) pairs for each bracketted expression.
Smi& start = Smi::Handle();
Smi& end = Smi::Handle();
for (intptr_t i = 0;
i < (kMatchPair * (num_bracket_expressions + 1));
i += kMatchPair) {
start = Smi::New(offsets[i]);
end = Smi::New(offsets[i + 1]);
array.SetAt(i, start);
array.SetAt(i+1, end);
}
return array.raw();
}
} // namespace dart