summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Hamilton <beng@fb.com>2016-08-18 10:14:32 -0700
committerBen Hamilton <beng@fb.com>2016-08-18 10:23:26 -0700
commit94d5643f97102fffd9e0131541e9385d6d0b7e34 (patch)
tree3988807e9f4f95f37b1aebc5733fa81426674209
parentf0d91fa143eed6de915a41108287c260e82a2689 (diff)
downloadflatbuffers-94d5643f97102fffd9e0131541e9385d6d0b7e34.tar.gz
flatbuffers-94d5643f97102fffd9e0131541e9385d6d0b7e34.tar.bz2
flatbuffers-94d5643f97102fffd9e0131541e9385d6d0b7e34.zip
Allow GenerateText() to indicate failure to encode flatbuffer to JSON (i.e., non-UTF-8 string data)
-rw-r--r--include/flatbuffers/idl.h4
-rw-r--r--samples/sample_text.cpp5
-rw-r--r--src/idl_gen_text.cpp129
-rw-r--r--tests/test.cpp33
4 files changed, 119 insertions, 52 deletions
diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h
index 5909a4e2..1c1e6349 100644
--- a/include/flatbuffers/idl.h
+++ b/include/flatbuffers/idl.h
@@ -596,7 +596,9 @@ extern void GenComment(const std::vector<std::string> &dc,
// if it is less than 0, no linefeeds will be generated either.
// See idl_gen_text.cpp.
// strict_json adds "quotes" around field names if true.
-extern void GenerateText(const Parser &parser,
+// If the flatbuffer cannot be encoded in JSON (e.g., it contains non-UTF-8
+// byte arrays in String values), returns false.
+extern bool GenerateText(const Parser &parser,
const void *flatbuffer,
std::string *text);
extern bool GenerateTextFile(const Parser &parser,
diff --git a/samples/sample_text.cpp b/samples/sample_text.cpp
index 557077d4..d851120d 100644
--- a/samples/sample_text.cpp
+++ b/samples/sample_text.cpp
@@ -46,7 +46,10 @@ int main(int /*argc*/, const char * /*argv*/[]) {
// to ensure it is correct, we now generate text back from the binary,
// and compare the two:
std::string jsongen;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ if (!GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen)) {
+ printf("Couldn't serialize parsed data to JSON!\n");
+ return 1;
+ }
if (jsongen != jsonfile) {
printf("%s----------------\n%s", jsongen.c_str(), jsonfile.c_str());
diff --git a/src/idl_gen_text.cpp b/src/idl_gen_text.cpp
index 3e41a0a7..4ff13c8a 100644
--- a/src/idl_gen_text.cpp
+++ b/src/idl_gen_text.cpp
@@ -22,7 +22,7 @@
namespace flatbuffers {
-static void GenStruct(const StructDef &struct_def, const Table *table,
+static bool GenStruct(const StructDef &struct_def, const Table *table,
int indent, const IDLOptions &opts,
std::string *_text);
@@ -48,7 +48,7 @@ void OutputIdentifier(const std::string &name, const IDLOptions &opts,
// Print (and its template specialization below for pointers) generate text
// for a single FlatBuffer value into JSON format.
// The general case for scalars:
-template<typename T> void Print(T val, Type type, int /*indent*/,
+template<typename T> bool Print(T val, Type type, int /*indent*/,
StructDef * /*union_sd*/,
const IDLOptions &opts,
std::string *_text) {
@@ -57,7 +57,7 @@ template<typename T> void Print(T val, Type type, int /*indent*/,
auto enum_val = type.enum_def->ReverseLookup(static_cast<int>(val));
if (enum_val) {
OutputIdentifier(enum_val->name, opts, _text);
- return;
+ return true;
}
}
@@ -66,10 +66,12 @@ template<typename T> void Print(T val, Type type, int /*indent*/,
} else {
text += NumToString(val);
}
+
+ return true;
}
// Print a vector a sequence of JSON values, comma separated, wrapped in "[]".
-template<typename T> void PrintVector(const Vector<T> &v, Type type,
+template<typename T> bool PrintVector(const Vector<T> &v, Type type,
int indent, const IDLOptions &opts,
std::string *_text) {
std::string &text = *_text;
@@ -81,19 +83,25 @@ template<typename T> void PrintVector(const Vector<T> &v, Type type,
text += NewLine(opts);
}
text.append(indent + Indent(opts), ' ');
- if (IsStruct(type))
- Print(v.GetStructFromOffset(i * type.struct_def->bytesize), type,
- indent + Indent(opts), nullptr, opts, _text);
- else
- Print(v[i], type, indent + Indent(opts), nullptr,
- opts, _text);
+ if (IsStruct(type)) {
+ if (!Print(v.GetStructFromOffset(i * type.struct_def->bytesize), type,
+ indent + Indent(opts), nullptr, opts, _text)) {
+ return false;
+ }
+ } else {
+ if (!Print(v[i], type, indent + Indent(opts), nullptr,
+ opts, _text)) {
+ return false;
+ }
+ }
}
text += NewLine(opts);
text.append(indent, ' ');
text += "]";
+ return true;
}
-static void EscapeString(const String &s, std::string *_text, const IDLOptions& opts) {
+static bool EscapeString(const String &s, std::string *_text, const IDLOptions& opts) {
std::string &text = *_text;
text += "\"";
for (uoffset_t i = 0; i < s.size(); i++) {
@@ -118,9 +126,19 @@ static void EscapeString(const String &s, std::string *_text, const IDLOptions&
text += "\\x";
text += IntToStringHex(static_cast<uint8_t>(c), 2);
} else {
- // We previously checked for non-UTF-8 and returned a parse error,
- // so we shouldn't reach here.
- assert(0);
+ // There are two cases here:
+ //
+ // 1) We reached here by parsing an IDL file. In that case,
+ // we previously checked for non-UTF-8, so we shouldn't reach
+ // here.
+ //
+ // 2) We reached here by someone calling GenerateText()
+ // on a previously-serialized flatbuffer. The data might have
+ // non-UTF-8 Strings, or might be corrupt.
+ //
+ // In both cases, we have to give up and inform the caller
+ // they have no JSON.
+ return false;
}
} else {
if (ucc <= 0xFFFF) {
@@ -145,10 +163,11 @@ static void EscapeString(const String &s, std::string *_text, const IDLOptions&
}
}
text += "\"";
+ return true;
}
// Specialization of Print above for pointer types.
-template<> void Print<const void *>(const void *val,
+template<> bool Print<const void *>(const void *val,
Type type, int indent,
StructDef *union_sd,
const IDLOptions &opts,
@@ -158,21 +177,27 @@ template<> void Print<const void *>(const void *val,
// If this assert hits, you have an corrupt buffer, a union type field
// was not present or was out of range.
assert(union_sd);
- GenStruct(*union_sd,
- reinterpret_cast<const Table *>(val),
- indent,
- opts,
- _text);
+ if (!GenStruct(*union_sd,
+ reinterpret_cast<const Table *>(val),
+ indent,
+ opts,
+ _text)) {
+ return false;
+ }
break;
case BASE_TYPE_STRUCT:
- GenStruct(*type.struct_def,
- reinterpret_cast<const Table *>(val),
- indent,
- opts,
- _text);
+ if (!GenStruct(*type.struct_def,
+ reinterpret_cast<const Table *>(val),
+ indent,
+ opts,
+ _text)) {
+ return false;
+ }
break;
case BASE_TYPE_STRING: {
- EscapeString(*reinterpret_cast<const String *>(val), _text, opts);
+ if (!EscapeString(*reinterpret_cast<const String *>(val), _text, opts)) {
+ return false;
+ }
break;
}
case BASE_TYPE_VECTOR:
@@ -182,31 +207,35 @@ template<> void Print<const void *>(const void *val,
#define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
PTYPE) \
case BASE_TYPE_ ## ENUM: \
- PrintVector<CTYPE>( \
- *reinterpret_cast<const Vector<CTYPE> *>(val), \
- type, indent, opts, _text); break;
+ if (!PrintVector<CTYPE>( \
+ *reinterpret_cast<const Vector<CTYPE> *>(val), \
+ type, indent, opts, _text)) { \
+ return false; \
+ } \
+ break;
FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD)
#undef FLATBUFFERS_TD
}
break;
default: assert(0);
}
+ return true;
}
// Generate text for a scalar field.
-template<typename T> static void GenField(const FieldDef &fd,
+template<typename T> static bool GenField(const FieldDef &fd,
const Table *table, bool fixed,
const IDLOptions &opts,
int indent,
std::string *_text) {
- Print(fixed ?
+ return Print(fixed ?
reinterpret_cast<const Struct *>(table)->GetField<T>(fd.value.offset) :
table->GetField<T>(fd.value.offset, 0), fd.value.type, indent, nullptr,
opts, _text);
}
// Generate text for non-scalar field.
-static void GenFieldOffset(const FieldDef &fd, const Table *table, bool fixed,
+static bool GenFieldOffset(const FieldDef &fd, const Table *table, bool fixed,
int indent, StructDef *union_sd,
const IDLOptions &opts, std::string *_text) {
const void *val = nullptr;
@@ -220,12 +249,12 @@ static void GenFieldOffset(const FieldDef &fd, const Table *table, bool fixed,
? table->GetStruct<const void *>(fd.value.offset)
: table->GetPointer<const void *>(fd.value.offset);
}
- Print(val, fd.value.type, indent, union_sd, opts, _text);
+ return Print(val, fd.value.type, indent, union_sd, opts, _text);
}
// Generate text for a struct or table, values separated by commas, indented,
// and bracketed by "{}"
-static void GenStruct(const StructDef &struct_def, const Table *table,
+static bool GenStruct(const StructDef &struct_def, const Table *table,
int indent, const IDLOptions &opts,
std::string *_text) {
std::string &text = *_text;
@@ -253,8 +282,10 @@ static void GenStruct(const StructDef &struct_def, const Table *table,
#define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \
PTYPE) \
case BASE_TYPE_ ## ENUM: \
- GenField<CTYPE>(fd, table, struct_def.fixed, \
- opts, indent + Indent(opts), _text); \
+ if (!GenField<CTYPE>(fd, table, struct_def.fixed, \
+ opts, indent + Indent(opts), _text)) { \
+ return false; \
+ } \
break;
FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD)
#undef FLATBUFFERS_TD
@@ -264,8 +295,10 @@ static void GenStruct(const StructDef &struct_def, const Table *table,
case BASE_TYPE_ ## ENUM:
FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD)
#undef FLATBUFFERS_TD
- GenFieldOffset(fd, table, struct_def.fixed, indent + Indent(opts),
- union_sd, opts, _text);
+ if (!GenFieldOffset(fd, table, struct_def.fixed, indent + Indent(opts),
+ union_sd, opts, _text)) {
+ return false;
+ }
break;
}
if (fd.value.type.base_type == BASE_TYPE_UTYPE) {
@@ -284,20 +317,24 @@ static void GenStruct(const StructDef &struct_def, const Table *table,
text += NewLine(opts);
text.append(indent, ' ');
text += "}";
+ return true;
}
// Generate a text representation of a flatbuffer in JSON format.
-void GenerateText(const Parser &parser, const void *flatbuffer,
+bool GenerateText(const Parser &parser, const void *flatbuffer,
std::string *_text) {
std::string &text = *_text;
assert(parser.root_struct_def_); // call SetRootType()
text.reserve(1024); // Reduce amount of inevitable reallocs.
- GenStruct(*parser.root_struct_def_,
- GetRoot<Table>(flatbuffer),
- 0,
- parser.opts,
- _text);
+ if (!GenStruct(*parser.root_struct_def_,
+ GetRoot<Table>(flatbuffer),
+ 0,
+ parser.opts,
+ _text)) {
+ return false;
+ }
text += NewLine(parser.opts);
+ return true;
}
std::string TextFileName(const std::string &path,
@@ -310,7 +347,9 @@ bool GenerateTextFile(const Parser &parser,
const std::string &file_name) {
if (!parser.builder_.GetSize() || !parser.root_struct_def_) return true;
std::string text;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &text);
+ if (!GenerateText(parser, parser.builder_.GetBufferPointer(), &text)) {
+ return false;
+ }
return flatbuffers::SaveFile(TextFileName(path, file_name).c_str(),
text,
false);
diff --git a/tests/test.cpp b/tests/test.cpp
index fd2352bd..45eb1fe2 100644
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -410,7 +410,8 @@ void ParseAndGenerateTextTest() {
// to ensure it is correct, we now generate text back from the binary,
// and compare the two:
std::string jsongen;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ TEST_EQ(result, true);
if (jsongen != jsonfile) {
printf("%s----------------\n%s", jsongen.c_str(), jsonfile.c_str());
@@ -827,7 +828,8 @@ void FuzzTest2() {
std::string jsongen;
parser.opts.indent_step = 0;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ TEST_EQ(result, true);
if (jsongen != json) {
// These strings are larger than a megabyte, so we show the bytes around
@@ -987,7 +989,8 @@ void UnicodeTest() {
true);
std::string jsongen;
parser.opts.indent_step = -1;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ TEST_EQ(result, true);
TEST_EQ(jsongen,
std::string(
"{F: \"\\u20AC\\u00A2\\u30E6\\u30FC\\u30B6\\u30FC"
@@ -1003,13 +1006,31 @@ void UnicodeTestAllowNonUTF8() {
"\\u5225\\u30B5\\u30A4\\u30C8\\x01\\x80\\u0080\\uD83D\\uDE0E\" }"), true);
std::string jsongen;
parser.opts.indent_step = -1;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ TEST_EQ(result, true);
TEST_EQ(jsongen,
std::string(
"{F: \"\\u20AC\\u00A2\\u30E6\\u30FC\\u30B6\\u30FC"
"\\u5225\\u30B5\\u30A4\\u30C8\\u0001\\x80\\u0080\\uD83D\\uDE0E\"}"));
}
+void UnicodeTestGenerateTextFailsOnNonUTF8() {
+ flatbuffers::Parser parser;
+ // Allow non-UTF-8 initially to model what happens when we load a binary flatbuffer from disk
+ // which contains non-UTF-8 strings.
+ parser.opts.allow_non_utf8 = true;
+ TEST_EQ(parser.Parse("table T { F:string; }"
+ "root_type T;"
+ "{ F:\"\\u20AC\\u00A2\\u30E6\\u30FC\\u30B6\\u30FC"
+ "\\u5225\\u30B5\\u30A4\\u30C8\\x01\\x80\\u0080\\uD83D\\uDE0E\" }"), true);
+ std::string jsongen;
+ parser.opts.indent_step = -1;
+ // Now, disallow non-UTF-8 (the default behavior) so GenerateText indicates failure.
+ parser.opts.allow_non_utf8 = false;
+ auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ TEST_EQ(result, false);
+}
+
void UnicodeSurrogatesTest() {
flatbuffers::Parser parser;
@@ -1157,7 +1178,8 @@ void UnknownFieldsTest() {
std::string jsongen;
parser.opts.indent_step = -1;
- GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen);
+ TEST_EQ(result, true);
TEST_EQ(jsongen == "{str: \"test\",i: 10}", true);
}
@@ -1222,6 +1244,7 @@ int main(int /*argc*/, const char * /*argv*/[]) {
IntegerOutOfRangeTest();
UnicodeTest();
UnicodeTestAllowNonUTF8();
+ UnicodeTestGenerateTextFailsOnNonUTF8();
UnicodeSurrogatesTest();
UnicodeInvalidSurrogatesTest();
InvalidUTF8Test();