Helpers for null-terminated Utf8 (#3)
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..79f51c3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+.dart_tool
+.packages
+pubspec.lock
diff --git a/lib/ffi.dart b/lib/ffi.dart
new file mode 100644
index 0000000..b43c3e9
--- /dev/null
+++ b/lib/ffi.dart
@@ -0,0 +1,5 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+export 'src/utf8.dart';
diff --git a/lib/src/utf8.dart b/lib/src/utf8.dart
new file mode 100644
index 0000000..52d0e95
--- /dev/null
+++ b/lib/src/utf8.dart
@@ -0,0 +1,66 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:ffi';
+import 'dart:typed_data';
+
+const int _kMaxSmi64 = (1 << 62) - 1;
+const int _kMaxSmi32 = (1 << 30) - 1;
+final int _maxSize = sizeOf<IntPtr>() == 8 ? _kMaxSmi64 : _kMaxSmi32;
+
+/// [Utf8] implements conversion between Dart strings and null-terminated
+/// Utf8-encoded "char*" strings in C.
+///
+/// [Utf8] is respresented as a struct so that `Pointer<Utf8>` can be used in
+/// native function signatures.
+//
+// TODO(https://github.com/dart-lang/ffi/issues/4): No need to use
+// 'asExternalTypedData' when Pointer operations are performant.
+class Utf8 extends Struct<Utf8> {
+ /// Returns the length of a null-terminated string -- the number of (one-byte)
+ /// characters before the first null byte.
+ static int strlen(Pointer<Utf8> string) {
+ final Pointer<Uint8> array = string.cast<Uint8>();
+ final Uint8List nativeString = array.asExternalTypedData(count: _maxSize);
+ return nativeString.indexWhere((char) => char == 0);
+ }
+
+ /// Creates a [String] containing the characters UTF-8 encoded in [string].
+ ///
+ /// The [string] must be a zero-terminated byte sequence of valid UTF-8
+ /// encodings of Unicode code points. It may also contain UTF-8 encodings of
+ /// unpaired surrogate code points, which is not otherwise valid UTF-8, but
+ /// which may be created when encoding a Dart string containing an unpaired
+ /// surrogate. See [Utf8Decoder] for details on decoding.
+ ///
+ /// Returns a Dart string containing the decoded code points.
+ static String fromUtf8(Pointer<Utf8> string) {
+ final int length = strlen(string);
+ return utf8.decode(Uint8List.view(
+ string.cast<Uint8>().asExternalTypedData(count: length).buffer,
+ 0,
+ length));
+ }
+
+ /// Convert a [String] to a Utf8-encoded null-terminated C string.
+ ///
+ /// If 'string' contains NULL bytes, the converted string will be truncated
+ /// prematurely. Unpaired surrogate code points in [string] will be preserved
+ /// in the UTF-8 encoded result. See [Utf8Encoder] for details on encoding.
+ ///
+ /// Returns a malloc-allocated pointer to the result.
+ static Pointer<Utf8> toUtf8(String string) {
+ final units = utf8.encode(string);
+ final Pointer<Uint8> result =
+ Pointer<Uint8>.allocate(count: units.length + 1);
+ final Uint8List nativeString =
+ result.asExternalTypedData(count: units.length + 1);
+ nativeString.setAll(0, units);
+ nativeString[units.length] = 0;
+ return result.cast();
+ }
+
+ String toString() => fromUtf8(addressOf);
+}
diff --git a/pubspec.yaml b/pubspec.yaml
index 0baeb71..b16bad2 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -12,3 +12,4 @@
dev_dependencies:
pedantic: ^1.0.0
+ test: ^1.6.8
diff --git a/test/utf8_test.dart b/test/utf8_test.dart
new file mode 100644
index 0000000..cf4e97e
--- /dev/null
+++ b/test/utf8_test.dart
@@ -0,0 +1,77 @@
+// Copyright (c) 2019, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:ffi';
+import 'dart:typed_data';
+
+import 'package:test/test.dart';
+import 'package:ffi/ffi.dart';
+
+Pointer<Uint8> _bytesFromList(List<int> ints) {
+ final Pointer<Uint8> ptr = Pointer.allocate(count: ints.length);
+ final Uint8List list = ptr.asExternalTypedData(count: ints.length);
+ list.setAll(0, ints);
+ return ptr;
+}
+
+main() {
+ test("toUtf8 ASCII", () {
+ final String start = "Hello World!\n";
+ final Pointer<Uint8> converted = Utf8.toUtf8(start).cast();
+ final Uint8List end =
+ converted.asExternalTypedData(count: start.length + 1);
+ final matcher =
+ equals([72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]);
+ expect(end, matcher);
+ converted.free();
+ });
+
+ test("fromUtf8 ASCII", () {
+ final Pointer<Utf8> utf8 = _bytesFromList(
+ [72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 10, 0]).cast();
+ final String end = Utf8.fromUtf8(utf8);
+ expect(end, "Hello World!\n");
+ });
+
+ test("toUtf8 emoji", () {
+ final String start = "ππΏπ¬";
+ final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
+ final int length = Utf8.strlen(converted);
+ final Uint8List end =
+ converted.cast<Uint8>().asExternalTypedData(count: length + 1);
+ final matcher =
+ equals([240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]);
+ expect(end, matcher);
+ converted.free();
+ });
+
+ test("formUtf8 emoji", () {
+ final Pointer<Utf8> utf8 = _bytesFromList(
+ [240, 159, 152, 142, 240, 159, 145, 191, 240, 159, 146, 172, 0]).cast();
+ final String end = Utf8.fromUtf8(utf8);
+ expect(end, "ππΏπ¬");
+ });
+
+ test("toUtf8 unpaired surrogate", () {
+ final String start = String.fromCharCodes([0xD800, 0x1000]);
+ final Pointer<Utf8> converted = Utf8.toUtf8(start).cast();
+ final int length = Utf8.strlen(converted);
+ final Uint8List end =
+ converted.cast<Uint8>().asExternalTypedData(count: length + 1);
+ expect(end, equals([237, 160, 128, 225, 128, 128, 0]));
+ converted.free();
+ });
+
+ test("fromUtf8 unpaired surrogate", () {
+ final Pointer<Utf8> utf8 =
+ _bytesFromList([237, 160, 128, 225, 128, 128, 0]).cast();
+ final String end = Utf8.fromUtf8(utf8);
+ expect(end, equals(String.fromCharCodes([0xD800, 0x1000])));
+ });
+
+ test("fromUtf8 invalid", () {
+ final Pointer<Utf8> utf8 = _bytesFromList([0x80, 0x00]).cast();
+ expect(() => Utf8.fromUtf8(utf8), throwsA(isFormatException));
+ });
+}