summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEvan Wallace <evan.exe@gmail.com>2015-10-14 23:07:40 -0700
committerEvan Wallace <evan.exe@gmail.com>2015-10-14 23:07:40 -0700
commit4bef5e89c4cc303fdcc985da66d5f2f81c3f18d6 (patch)
tree81f3d3f77a1647017edfe6e3f626caf56d628995
parent79222bf100aed80940efab986a84904e2bdc8e0e (diff)
downloadflatbuffers-4bef5e89c4cc303fdcc985da66d5f2f81c3f18d6.tar.gz
flatbuffers-4bef5e89c4cc303fdcc985da66d5f2f81c3f18d6.tar.bz2
flatbuffers-4bef5e89c4cc303fdcc985da66d5f2f81c3f18d6.zip
Add UTF-8 support to the JavaScript API
This adds an optional argument to generated getters for string fields to specify the encoding type and overloads the createString() function. It's now possible to use either JavaScript UTF-16 string objects or C-style UTF-8 byte arrays (Uint8Array) for string data.
-rw-r--r--js/flatbuffers.js83
-rw-r--r--src/idl_gen_js.cpp35
-rw-r--r--tests/JavaScriptTest.js17
-rw-r--r--tests/monster_test_generated.js21
-rw-r--r--tests/unicode_test.json8
5 files changed, 116 insertions, 48 deletions
diff --git a/js/flatbuffers.js b/js/flatbuffers.js
index 58323793..efa76d94 100644
--- a/js/flatbuffers.js
+++ b/js/flatbuffers.js
@@ -32,6 +32,14 @@ flatbuffers.SIZEOF_INT = 4;
flatbuffers.FILE_IDENTIFIER_LENGTH = 4;
/**
+ * @enum {number}
+ */
+flatbuffers.Encoding = {
+ UTF8_BYTES: 1,
+ UTF16_STRING: 2
+};
+
+/**
* @type {Int32Array}
* @const
*/
@@ -667,44 +675,49 @@ flatbuffers.Builder.prototype.endVector = function() {
};
/**
- * Encode the string `s` in the buffer using UTF-8.
+ * Encode the string `s` in the buffer using UTF-8. If a Uint8Array is passed
+ * instead of a string, it is assumed to contain valid UTF-8 encoded data.
*
- * @param {string} s The string to encode
+ * @param {string|Uint8Array} s The string to encode
* @return {flatbuffers.Offset} The offset in the buffer where the encoded string starts
*/
flatbuffers.Builder.prototype.createString = function(s) {
- var utf8 = [];
- var i = 0;
+ if (s instanceof Uint8Array) {
+ var utf8 = s;
+ } else {
+ var utf8 = [];
+ var i = 0;
- while (i < s.length) {
- var codePoint;
+ while (i < s.length) {
+ var codePoint;
- // Decode UTF-16
- var a = s.charCodeAt(i++);
- if (a < 0xD800 || a >= 0xDC00) {
- codePoint = a;
- } else {
- var b = s.charCodeAt(i++);
- codePoint = (a << 10) + b + (0x10000 - (0xD800 << 10) - 0xDC00);
- }
+ // Decode UTF-16
+ var a = s.charCodeAt(i++);
+ if (a < 0xD800 || a >= 0xDC00) {
+ codePoint = a;
+ } else {
+ var b = s.charCodeAt(i++);
+ codePoint = (a << 10) + b + (0x10000 - (0xD800 << 10) - 0xDC00);
+ }
- // Encode UTF-8
- if (codePoint < 0x80) {
- utf8.push(codePoint);
- } else {
- if (codePoint < 0x800) {
- utf8.push(((codePoint >> 6) & 0x1F) | 0xC0);
+ // Encode UTF-8
+ if (codePoint < 0x80) {
+ utf8.push(codePoint);
} else {
- if (codePoint < 0x10000) {
- utf8.push(((codePoint >> 12) & 0x0F) | 0xE0);
+ if (codePoint < 0x800) {
+ utf8.push(((codePoint >> 6) & 0x1F) | 0xC0);
} else {
- utf8.push(
- ((codePoint >> 18) & 0x07) | 0xF0,
- ((codePoint >> 12) & 0x3F) | 0x80);
+ if (codePoint < 0x10000) {
+ utf8.push(((codePoint >> 12) & 0x0F) | 0xE0);
+ } else {
+ utf8.push(
+ ((codePoint >> 18) & 0x07) | 0xF0,
+ ((codePoint >> 12) & 0x3F) | 0x80);
+ }
+ utf8.push(((codePoint >> 6) & 0x3F) | 0x80);
}
- utf8.push(((codePoint >> 6) & 0x3F) | 0x80);
+ utf8.push((codePoint & 0x3F) | 0x80);
}
- utf8.push((codePoint & 0x3F) | 0x80);
}
}
@@ -939,13 +952,19 @@ flatbuffers.ByteBuffer.prototype.__union = function(t, offset) {
};
/**
- * Create a JavaScript string from UTF-8 data stored inside the flatbuffer.
+ * Create a JavaScript string from UTF-8 data stored inside the FlatBuffer.
* This allocates a new string and converts to wide chars upon each access.
*
+ * To avoid the conversion to UTF-16, pass flatbuffers.Encoding.UTF8_BYTES as
+ * the "optionalEncoding" argument. This is useful for avoiding conversion to
+ * and from UTF-16 when the data will just be packaged back up in another
+ * FlatBuffer later on.
+ *
* @param {number} offset
- * @returns {string}
+ * @param {flatbuffers.Encoding=} optionalEncoding Defaults to UTF16_STRING
+ * @returns {string|Uint8Array}
*/
-flatbuffers.ByteBuffer.prototype.__string = function(offset) {
+flatbuffers.ByteBuffer.prototype.__string = function(offset, optionalEncoding) {
offset += this.readInt32(offset);
var length = this.readInt32(offset);
@@ -954,6 +973,10 @@ flatbuffers.ByteBuffer.prototype.__string = function(offset) {
offset += flatbuffers.SIZEOF_INT;
+ if (optionalEncoding === flatbuffers.Encoding.UTF8_BYTES) {
+ return this.bytes_.subarray(offset, offset + length);
+ }
+
while (i < length) {
var codePoint;
diff --git a/src/idl_gen_js.cpp b/src/idl_gen_js.cpp
index 0e95830d..7db80e8d 100644
--- a/src/idl_gen_js.cpp
+++ b/src/idl_gen_js.cpp
@@ -226,7 +226,7 @@ enum struct InOut {
static std::string GenTypeName(const Type &type, InOut inOut) {
if (inOut == InOut::OUT) {
if (type.base_type == BASE_TYPE_STRING) {
- return "?string";
+ return "string|Uint8Array";
}
if (type.base_type == BASE_TYPE_STRUCT) {
return WrapInNameSpace(*type.struct_def);
@@ -404,15 +404,25 @@ static void GenStruct(const Parser &parser, StructDef &struct_def,
if (IsScalar(field.value.type.base_type) ||
field.value.type.base_type == BASE_TYPE_STRING) {
GenDocComment(field.doc_comment, code_ptr,
+ std::string(field.value.type.base_type == BASE_TYPE_STRING ?
+ "@param {flatbuffers.Encoding=} optionalEncoding\n" : "") +
"@returns {" + GenTypeName(field.value.type, InOut::OUT) + "}");
code += object_name + ".prototype." + MakeCamel(field.name, false);
- code += " = function() {\n";
+ code += " = function(";
+ if (field.value.type.base_type == BASE_TYPE_STRING) {
+ code += "optionalEncoding";
+ }
+ code += ") {\n";
if (struct_def.fixed) {
code += " return " + GenGetter(field.value.type, "(this.bb_pos" +
MaybeAdd(field.value.offset) + ")") + ";\n";
} else {
+ std::string index = "this.bb_pos + offset";
+ if (field.value.type.base_type == BASE_TYPE_STRING) {
+ index += ", optionalEncoding";
+ }
code += offset_prefix + GenGetter(field.value.type,
- "(this.bb_pos + offset)") + " : " + GenDefaultValue(field.value);
+ "(" + index + ")") + " : " + GenDefaultValue(field.value);
code += ";\n";
}
}
@@ -446,16 +456,20 @@ static void GenStruct(const Parser &parser, StructDef &struct_def,
auto inline_size = InlineSize(vectortype);
auto index = "this.bb.__vector(this.bb_pos + offset) + index" +
MaybeScale(inline_size);
- GenDocComment(field.doc_comment, code_ptr,
- "@param {number} index\n" +
- std::string(vectortype.base_type == BASE_TYPE_STRUCT ?
- "@param {" + vectortypename + "=} obj\n" :
- "") +
- "@returns {" + vectortypename + "}");
+ std::string args = "@param {number} index\n";
+ if (vectortype.base_type == BASE_TYPE_STRUCT) {
+ args += "@param {" + vectortypename + "=} obj\n";
+ } else if (vectortype.base_type == BASE_TYPE_STRING) {
+ args += "@param {flatbuffers.Encoding=} optionalEncoding\n";
+ }
+ GenDocComment(field.doc_comment, code_ptr, args +
+ "@returns {" + vectortypename + "}");
code += object_name + ".prototype." + MakeCamel(field.name, false);
code += " = function(index";
if (vectortype.base_type == BASE_TYPE_STRUCT) {
code += ", obj";
+ } else if (vectortype.base_type == BASE_TYPE_STRING) {
+ code += ", optionalEncoding";
}
code += ") {\n";
if (vectortype.base_type == BASE_TYPE_STRUCT) {
@@ -466,6 +480,9 @@ static void GenStruct(const Parser &parser, StructDef &struct_def,
: "this.bb.__indirect(" + index + ")";
code += ", this.bb)";
} else {
+ if (vectortype.base_type == BASE_TYPE_STRING) {
+ index += ", optionalEncoding";
+ }
code += offset_prefix + GenGetter(vectortype, "(" + index + ")");
}
code += " : ";
diff --git a/tests/JavaScriptTest.js b/tests/JavaScriptTest.js
index 7a5945b9..8eb50f3c 100644
--- a/tests/JavaScriptTest.js
+++ b/tests/JavaScriptTest.js
@@ -125,18 +125,35 @@ function testUnicode() {
var bb = new flatbuffers.ByteBuffer(new Uint8Array(correct));
var monster = MyGame.Example.Monster.getRootAsMonster(bb);
assert.strictEqual(monster.name(), json.name);
+ assert.deepEqual(new Buffer(monster.name(flatbuffers.Encoding.UTF8_BYTES)), new Buffer(json.name));
+ assert.strictEqual(monster.testarrayoftablesLength(), json.testarrayoftables.length);
+ json.testarrayoftables.forEach(function(table, i) {
+ var value = monster.testarrayoftables(i);
+ assert.strictEqual(value.name(), table.name);
+ assert.deepEqual(new Buffer(value.name(flatbuffers.Encoding.UTF8_BYTES)), new Buffer(table.name));
+ });
assert.strictEqual(monster.testarrayofstringLength(), json.testarrayofstring.length);
json.testarrayofstring.forEach(function(string, i) {
assert.strictEqual(monster.testarrayofstring(i), string);
+ assert.deepEqual(new Buffer(monster.testarrayofstring(i, flatbuffers.Encoding.UTF8_BYTES)), new Buffer(string));
});
// Test writing
var fbb = new flatbuffers.Builder();
var name = fbb.createString(json.name);
+ var testarrayoftablesOffsets = json.testarrayoftables.map(function(table) {
+ var name = fbb.createString(new Uint8Array(new Buffer(table.name)));
+ MyGame.Example.Monster.startMonster(fbb);
+ MyGame.Example.Monster.addName(fbb, name);
+ return MyGame.Example.Monster.endMonster(fbb);
+ });
+ var testarrayoftablesOffset = MyGame.Example.Monster.createTestarrayoftablesVector(fbb,
+ testarrayoftablesOffsets);
var testarrayofstringOffset = MyGame.Example.Monster.createTestarrayofstringVector(fbb,
json.testarrayofstring.map(function(string) { return fbb.createString(string); }));
MyGame.Example.Monster.startMonster(fbb);
MyGame.Example.Monster.addTestarrayofstring(fbb, testarrayofstringOffset);
+ MyGame.Example.Monster.addTestarrayoftables(fbb, testarrayoftablesOffset);
MyGame.Example.Monster.addName(fbb, name);
MyGame.Example.Monster.finishMonsterBuffer(fbb, MyGame.Example.Monster.endMonster(fbb));
assert.deepEqual(new Buffer(fbb.asUint8Array()), correct);
diff --git a/tests/monster_test_generated.js b/tests/monster_test_generated.js
index 5687308e..d46a9c68 100644
--- a/tests/monster_test_generated.js
+++ b/tests/monster_test_generated.js
@@ -287,11 +287,12 @@ MyGame.Example.Stat.getRootAsStat = function(bb, obj) {
};
/**
- * @returns {?string}
+ * @param {flatbuffers.Encoding=} optionalEncoding
+ * @returns {string|Uint8Array}
*/
-MyGame.Example.Stat.prototype.id = function() {
+MyGame.Example.Stat.prototype.id = function(optionalEncoding) {
var offset = this.bb.__offset(this.bb_pos, 4);
- return offset ? this.bb.__string(this.bb_pos + offset) : null;
+ return offset ? this.bb.__string(this.bb_pos + offset, optionalEncoding) : null;
};
/**
@@ -419,11 +420,12 @@ MyGame.Example.Monster.prototype.hp = function() {
};
/**
- * @returns {?string}
+ * @param {flatbuffers.Encoding=} optionalEncoding
+ * @returns {string|Uint8Array}
*/
-MyGame.Example.Monster.prototype.name = function() {
+MyGame.Example.Monster.prototype.name = function(optionalEncoding) {
var offset = this.bb.__offset(this.bb_pos, 10);
- return offset ? this.bb.__string(this.bb_pos + offset) : null;
+ return offset ? this.bb.__string(this.bb_pos + offset, optionalEncoding) : null;
};
/**
@@ -488,11 +490,12 @@ MyGame.Example.Monster.prototype.test4Length = function() {
/**
* @param {number} index
- * @returns {?string}
+ * @param {flatbuffers.Encoding=} optionalEncoding
+ * @returns {string|Uint8Array}
*/
-MyGame.Example.Monster.prototype.testarrayofstring = function(index) {
+MyGame.Example.Monster.prototype.testarrayofstring = function(index, optionalEncoding) {
var offset = this.bb.__offset(this.bb_pos, 24);
- return offset ? this.bb.__string(this.bb.__vector(this.bb_pos + offset) + index * 4) : null;
+ return offset ? this.bb.__string(this.bb.__vector(this.bb_pos + offset) + index * 4, optionalEncoding) : null;
};
/**
diff --git a/tests/unicode_test.json b/tests/unicode_test.json
index a78fdf0d..75e467a5 100644
--- a/tests/unicode_test.json
+++ b/tests/unicode_test.json
@@ -1,5 +1,13 @@
{
"name": "unicode_test",
+ "testarrayoftables": [
+ { "name": "Цлїςσδε" },
+ { "name": "フムアムカモケモ" },
+ { "name": "フムヤムカモケモ" },
+ { "name": "㊀㊁㊂㊃㊄" },
+ { "name": "☳☶☲" },
+ { "name": "𡇙𝌆" }
+ ],
"testarrayofstring": [
"Цлїςσδε",
"フムアムカモケモ",