diff options
author | Ivan Dlugos <6349682+vaind@users.noreply.github.com> | 2021-07-22 19:36:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-22 10:36:37 -0700 |
commit | c36672d8034d7f93fed586f80854aff83c93b4b9 (patch) | |
tree | 85ee7ecad1f244f5189a942b4f3ce02c19f74530 | |
parent | 2bfc8e9f019951220f765b5beb72d9c43019c3f1 (diff) | |
download | flatbuffers-c36672d8034d7f93fed586f80854aff83c93b4b9.tar.gz flatbuffers-c36672d8034d7f93fed586f80854aff83c93b4b9.tar.bz2 flatbuffers-c36672d8034d7f93fed586f80854aff83c93b4b9.zip |
Dart - optimize writeString for ASCII (#6736)
* Dart - optimize writeString for ASCII
* Dart - writeString() ASCII optimization optional
-rw-r--r-- | dart/lib/flat_buffers.dart | 61 | ||||
-rw-r--r-- | dart/test/flat_buffers_test.dart | 8 |
2 files changed, 55 insertions, 14 deletions
diff --git a/dart/lib/flat_buffers.dart b/dart/lib/flat_buffers.dart index 1510940c..83866b0b 100644 --- a/dart/lib/flat_buffers.dart +++ b/dart/lib/flat_buffers.dart @@ -668,29 +668,68 @@ class Builder { return result; } - /// Write the given string [value] and return its offset. - int writeString(String value) { + /// Write the given string [value] and return its offset + /// + /// Dart strings are UTF-16 but must be stored as UTF-8 in FlatBuffers. + /// If the given string consists only of ASCII characters, you can indicate + /// enable [asciiOptimization]. In this mode, [writeString()] first tries to + /// copy the ASCII string directly to the output buffer and if that fails + /// (because there are no-ASCII characters in the string) it falls back and to + /// the default UTF-16 -> UTF-8 conversion (with slight performance penalty). + int? writeString(String value, {bool asciiOptimization = false}) { _ensureNoVTable(); if (_strings != null) { - return _strings!.putIfAbsent(value, () => _writeString(value)); + return _strings! + .putIfAbsent(value, () => _writeString(value, asciiOptimization)); } else { - return _writeString(value); + return _writeString(value, asciiOptimization); } } - int _writeString(String value) { - // TODO(scheglov) optimize for ASCII strings - List<int> bytes = utf8.encode(value); - int length = bytes.length; + int _writeString(String value, bool asciiOptimization) { + if (asciiOptimization) { + // [utf8.encode()] is slow (up to at least Dart SDK 2.13). If the given + // string is ASCII we can just write it directly, without any conversion. + final originalTail = _tail; + if (_tryWriteASCIIString(value)) return _tail; + // if non-ASCII: reset the output buffer position for [_writeUTFString()] + _tail = originalTail; + } + _writeUTFString(value); + return _tail; + } + + // Try to write the string as ASCII, return false if there's a non-ascii char. + @pragma('vm:prefer-inline') + bool _tryWriteASCIIString(String value) { + _prepare(4, 1, additionalBytes: value.length + 1); + final length = value.length; + var offset = _buf.lengthInBytes - _tail + 4; + for (var i = 0; i < length; i++) { + // utf16 code unit, e.g. for '†' it's [0x20 0x20], which is 8224 decimal. + // ASCII characters go from 0x00 to 0x7F (which is 0 to 127 decimal). + final char = value.codeUnitAt(i); + if ((char & ~0x7F) != 0) { + return false; + } + _buf.setUint8(offset++, char); + } + _buf.setUint8(offset, 0); // trailing zero + _setUint32AtTail(_buf, _tail, value.length); + return true; + } + + @pragma('vm:prefer-inline') + void _writeUTFString(String value) { + final bytes = utf8.encode(value) as Uint8List; + final length = bytes.length; _prepare(4, 1, additionalBytes: length + 1); - final int result = _tail; _setUint32AtTail(_buf, _tail, length); - int offset = _buf.lengthInBytes - _tail + 4; + var offset = _buf.lengthInBytes - _tail + 4; for (int i = 0; i < length; i++) { _buf.setUint8(offset++, bytes[i]); } _buf.setUint8(offset, 0); // trailing zero - return result; } /// Throw an exception if there is not currently a vtable. diff --git a/dart/test/flat_buffers_test.dart b/dart/test/flat_buffers_test.dart index 39130ef2..216065c6 100644 --- a/dart/test/flat_buffers_test.dart +++ b/dart/test/flat_buffers_test.dart @@ -160,7 +160,7 @@ class BuilderTest { final str = fbBuilder.writeString('MyMonster'); fbBuilder.writeString('test1'); - fbBuilder.writeString('test2'); + fbBuilder.writeString('test2', asciiOptimization: true); final testArrayOfString = fbBuilder.endStructVector(2); final fred = fbBuilder.writeString('Fred'); @@ -360,8 +360,10 @@ class BuilderTest { List<int> byteList; { Builder builder = new Builder(initialSize: 0); - int? latinStringOffset = builder.writeString(latinString); - int? unicodeStringOffset = builder.writeString(unicodeString); + int? latinStringOffset = + builder.writeString(latinString, asciiOptimization: true); + int? unicodeStringOffset = + builder.writeString(unicodeString, asciiOptimization: true); builder.startTable(2); builder.addOffset(0, latinStringOffset); builder.addOffset(1, unicodeStringOffset); |