diff options
author | Ben Hamilton <beng@fb.com> | 2016-08-18 10:14:32 -0700 |
---|---|---|
committer | Ben Hamilton <beng@fb.com> | 2016-08-18 10:23:26 -0700 |
commit | 94d5643f97102fffd9e0131541e9385d6d0b7e34 (patch) | |
tree | 3988807e9f4f95f37b1aebc5733fa81426674209 | |
parent | f0d91fa143eed6de915a41108287c260e82a2689 (diff) | |
download | flatbuffers-94d5643f97102fffd9e0131541e9385d6d0b7e34.tar.gz flatbuffers-94d5643f97102fffd9e0131541e9385d6d0b7e34.tar.bz2 flatbuffers-94d5643f97102fffd9e0131541e9385d6d0b7e34.zip |
Allow GenerateText() to indicate failure to encode flatbuffer to JSON (i.e., non-UTF-8 string data)
-rw-r--r-- | include/flatbuffers/idl.h | 4 | ||||
-rw-r--r-- | samples/sample_text.cpp | 5 | ||||
-rw-r--r-- | src/idl_gen_text.cpp | 129 | ||||
-rw-r--r-- | tests/test.cpp | 33 |
4 files changed, 119 insertions, 52 deletions
diff --git a/include/flatbuffers/idl.h b/include/flatbuffers/idl.h index 5909a4e2..1c1e6349 100644 --- a/include/flatbuffers/idl.h +++ b/include/flatbuffers/idl.h @@ -596,7 +596,9 @@ extern void GenComment(const std::vector<std::string> &dc, // if it is less than 0, no linefeeds will be generated either. // See idl_gen_text.cpp. // strict_json adds "quotes" around field names if true. -extern void GenerateText(const Parser &parser, +// If the flatbuffer cannot be encoded in JSON (e.g., it contains non-UTF-8 +// byte arrays in String values), returns false. +extern bool GenerateText(const Parser &parser, const void *flatbuffer, std::string *text); extern bool GenerateTextFile(const Parser &parser, diff --git a/samples/sample_text.cpp b/samples/sample_text.cpp index 557077d4..d851120d 100644 --- a/samples/sample_text.cpp +++ b/samples/sample_text.cpp @@ -46,7 +46,10 @@ int main(int /*argc*/, const char * /*argv*/[]) { // to ensure it is correct, we now generate text back from the binary, // and compare the two: std::string jsongen; - GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + if (!GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen)) { + printf("Couldn't serialize parsed data to JSON!\n"); + return 1; + } if (jsongen != jsonfile) { printf("%s----------------\n%s", jsongen.c_str(), jsonfile.c_str()); diff --git a/src/idl_gen_text.cpp b/src/idl_gen_text.cpp index 3e41a0a7..4ff13c8a 100644 --- a/src/idl_gen_text.cpp +++ b/src/idl_gen_text.cpp @@ -22,7 +22,7 @@ namespace flatbuffers { -static void GenStruct(const StructDef &struct_def, const Table *table, +static bool GenStruct(const StructDef &struct_def, const Table *table, int indent, const IDLOptions &opts, std::string *_text); @@ -48,7 +48,7 @@ void OutputIdentifier(const std::string &name, const IDLOptions &opts, // Print (and its template specialization below for pointers) generate text // for a single FlatBuffer value into JSON format. // The general case for scalars: -template<typename T> void Print(T val, Type type, int /*indent*/, +template<typename T> bool Print(T val, Type type, int /*indent*/, StructDef * /*union_sd*/, const IDLOptions &opts, std::string *_text) { @@ -57,7 +57,7 @@ template<typename T> void Print(T val, Type type, int /*indent*/, auto enum_val = type.enum_def->ReverseLookup(static_cast<int>(val)); if (enum_val) { OutputIdentifier(enum_val->name, opts, _text); - return; + return true; } } @@ -66,10 +66,12 @@ template<typename T> void Print(T val, Type type, int /*indent*/, } else { text += NumToString(val); } + + return true; } // Print a vector a sequence of JSON values, comma separated, wrapped in "[]". -template<typename T> void PrintVector(const Vector<T> &v, Type type, +template<typename T> bool PrintVector(const Vector<T> &v, Type type, int indent, const IDLOptions &opts, std::string *_text) { std::string &text = *_text; @@ -81,19 +83,25 @@ template<typename T> void PrintVector(const Vector<T> &v, Type type, text += NewLine(opts); } text.append(indent + Indent(opts), ' '); - if (IsStruct(type)) - Print(v.GetStructFromOffset(i * type.struct_def->bytesize), type, - indent + Indent(opts), nullptr, opts, _text); - else - Print(v[i], type, indent + Indent(opts), nullptr, - opts, _text); + if (IsStruct(type)) { + if (!Print(v.GetStructFromOffset(i * type.struct_def->bytesize), type, + indent + Indent(opts), nullptr, opts, _text)) { + return false; + } + } else { + if (!Print(v[i], type, indent + Indent(opts), nullptr, + opts, _text)) { + return false; + } + } } text += NewLine(opts); text.append(indent, ' '); text += "]"; + return true; } -static void EscapeString(const String &s, std::string *_text, const IDLOptions& opts) { +static bool EscapeString(const String &s, std::string *_text, const IDLOptions& opts) { std::string &text = *_text; text += "\""; for (uoffset_t i = 0; i < s.size(); i++) { @@ -118,9 +126,19 @@ static void EscapeString(const String &s, std::string *_text, const IDLOptions& text += "\\x"; text += IntToStringHex(static_cast<uint8_t>(c), 2); } else { - // We previously checked for non-UTF-8 and returned a parse error, - // so we shouldn't reach here. - assert(0); + // There are two cases here: + // + // 1) We reached here by parsing an IDL file. In that case, + // we previously checked for non-UTF-8, so we shouldn't reach + // here. + // + // 2) We reached here by someone calling GenerateText() + // on a previously-serialized flatbuffer. The data might have + // non-UTF-8 Strings, or might be corrupt. + // + // In both cases, we have to give up and inform the caller + // they have no JSON. + return false; } } else { if (ucc <= 0xFFFF) { @@ -145,10 +163,11 @@ static void EscapeString(const String &s, std::string *_text, const IDLOptions& } } text += "\""; + return true; } // Specialization of Print above for pointer types. -template<> void Print<const void *>(const void *val, +template<> bool Print<const void *>(const void *val, Type type, int indent, StructDef *union_sd, const IDLOptions &opts, @@ -158,21 +177,27 @@ template<> void Print<const void *>(const void *val, // If this assert hits, you have an corrupt buffer, a union type field // was not present or was out of range. assert(union_sd); - GenStruct(*union_sd, - reinterpret_cast<const Table *>(val), - indent, - opts, - _text); + if (!GenStruct(*union_sd, + reinterpret_cast<const Table *>(val), + indent, + opts, + _text)) { + return false; + } break; case BASE_TYPE_STRUCT: - GenStruct(*type.struct_def, - reinterpret_cast<const Table *>(val), - indent, - opts, - _text); + if (!GenStruct(*type.struct_def, + reinterpret_cast<const Table *>(val), + indent, + opts, + _text)) { + return false; + } break; case BASE_TYPE_STRING: { - EscapeString(*reinterpret_cast<const String *>(val), _text, opts); + if (!EscapeString(*reinterpret_cast<const String *>(val), _text, opts)) { + return false; + } break; } case BASE_TYPE_VECTOR: @@ -182,31 +207,35 @@ template<> void Print<const void *>(const void *val, #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \ PTYPE) \ case BASE_TYPE_ ## ENUM: \ - PrintVector<CTYPE>( \ - *reinterpret_cast<const Vector<CTYPE> *>(val), \ - type, indent, opts, _text); break; + if (!PrintVector<CTYPE>( \ + *reinterpret_cast<const Vector<CTYPE> *>(val), \ + type, indent, opts, _text)) { \ + return false; \ + } \ + break; FLATBUFFERS_GEN_TYPES(FLATBUFFERS_TD) #undef FLATBUFFERS_TD } break; default: assert(0); } + return true; } // Generate text for a scalar field. -template<typename T> static void GenField(const FieldDef &fd, +template<typename T> static bool GenField(const FieldDef &fd, const Table *table, bool fixed, const IDLOptions &opts, int indent, std::string *_text) { - Print(fixed ? + return Print(fixed ? reinterpret_cast<const Struct *>(table)->GetField<T>(fd.value.offset) : table->GetField<T>(fd.value.offset, 0), fd.value.type, indent, nullptr, opts, _text); } // Generate text for non-scalar field. -static void GenFieldOffset(const FieldDef &fd, const Table *table, bool fixed, +static bool GenFieldOffset(const FieldDef &fd, const Table *table, bool fixed, int indent, StructDef *union_sd, const IDLOptions &opts, std::string *_text) { const void *val = nullptr; @@ -220,12 +249,12 @@ static void GenFieldOffset(const FieldDef &fd, const Table *table, bool fixed, ? table->GetStruct<const void *>(fd.value.offset) : table->GetPointer<const void *>(fd.value.offset); } - Print(val, fd.value.type, indent, union_sd, opts, _text); + return Print(val, fd.value.type, indent, union_sd, opts, _text); } // Generate text for a struct or table, values separated by commas, indented, // and bracketed by "{}" -static void GenStruct(const StructDef &struct_def, const Table *table, +static bool GenStruct(const StructDef &struct_def, const Table *table, int indent, const IDLOptions &opts, std::string *_text) { std::string &text = *_text; @@ -253,8 +282,10 @@ static void GenStruct(const StructDef &struct_def, const Table *table, #define FLATBUFFERS_TD(ENUM, IDLTYPE, CTYPE, JTYPE, GTYPE, NTYPE, \ PTYPE) \ case BASE_TYPE_ ## ENUM: \ - GenField<CTYPE>(fd, table, struct_def.fixed, \ - opts, indent + Indent(opts), _text); \ + if (!GenField<CTYPE>(fd, table, struct_def.fixed, \ + opts, indent + Indent(opts), _text)) { \ + return false; \ + } \ break; FLATBUFFERS_GEN_TYPES_SCALAR(FLATBUFFERS_TD) #undef FLATBUFFERS_TD @@ -264,8 +295,10 @@ static void GenStruct(const StructDef &struct_def, const Table *table, case BASE_TYPE_ ## ENUM: FLATBUFFERS_GEN_TYPES_POINTER(FLATBUFFERS_TD) #undef FLATBUFFERS_TD - GenFieldOffset(fd, table, struct_def.fixed, indent + Indent(opts), - union_sd, opts, _text); + if (!GenFieldOffset(fd, table, struct_def.fixed, indent + Indent(opts), + union_sd, opts, _text)) { + return false; + } break; } if (fd.value.type.base_type == BASE_TYPE_UTYPE) { @@ -284,20 +317,24 @@ static void GenStruct(const StructDef &struct_def, const Table *table, text += NewLine(opts); text.append(indent, ' '); text += "}"; + return true; } // Generate a text representation of a flatbuffer in JSON format. -void GenerateText(const Parser &parser, const void *flatbuffer, +bool GenerateText(const Parser &parser, const void *flatbuffer, std::string *_text) { std::string &text = *_text; assert(parser.root_struct_def_); // call SetRootType() text.reserve(1024); // Reduce amount of inevitable reallocs. - GenStruct(*parser.root_struct_def_, - GetRoot<Table>(flatbuffer), - 0, - parser.opts, - _text); + if (!GenStruct(*parser.root_struct_def_, + GetRoot<Table>(flatbuffer), + 0, + parser.opts, + _text)) { + return false; + } text += NewLine(parser.opts); + return true; } std::string TextFileName(const std::string &path, @@ -310,7 +347,9 @@ bool GenerateTextFile(const Parser &parser, const std::string &file_name) { if (!parser.builder_.GetSize() || !parser.root_struct_def_) return true; std::string text; - GenerateText(parser, parser.builder_.GetBufferPointer(), &text); + if (!GenerateText(parser, parser.builder_.GetBufferPointer(), &text)) { + return false; + } return flatbuffers::SaveFile(TextFileName(path, file_name).c_str(), text, false); diff --git a/tests/test.cpp b/tests/test.cpp index fd2352bd..45eb1fe2 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -410,7 +410,8 @@ void ParseAndGenerateTextTest() { // to ensure it is correct, we now generate text back from the binary, // and compare the two: std::string jsongen; - GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + TEST_EQ(result, true); if (jsongen != jsonfile) { printf("%s----------------\n%s", jsongen.c_str(), jsonfile.c_str()); @@ -827,7 +828,8 @@ void FuzzTest2() { std::string jsongen; parser.opts.indent_step = 0; - GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + TEST_EQ(result, true); if (jsongen != json) { // These strings are larger than a megabyte, so we show the bytes around @@ -987,7 +989,8 @@ void UnicodeTest() { true); std::string jsongen; parser.opts.indent_step = -1; - GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + TEST_EQ(result, true); TEST_EQ(jsongen, std::string( "{F: \"\\u20AC\\u00A2\\u30E6\\u30FC\\u30B6\\u30FC" @@ -1003,13 +1006,31 @@ void UnicodeTestAllowNonUTF8() { "\\u5225\\u30B5\\u30A4\\u30C8\\x01\\x80\\u0080\\uD83D\\uDE0E\" }"), true); std::string jsongen; parser.opts.indent_step = -1; - GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + TEST_EQ(result, true); TEST_EQ(jsongen, std::string( "{F: \"\\u20AC\\u00A2\\u30E6\\u30FC\\u30B6\\u30FC" "\\u5225\\u30B5\\u30A4\\u30C8\\u0001\\x80\\u0080\\uD83D\\uDE0E\"}")); } +void UnicodeTestGenerateTextFailsOnNonUTF8() { + flatbuffers::Parser parser; + // Allow non-UTF-8 initially to model what happens when we load a binary flatbuffer from disk + // which contains non-UTF-8 strings. + parser.opts.allow_non_utf8 = true; + TEST_EQ(parser.Parse("table T { F:string; }" + "root_type T;" + "{ F:\"\\u20AC\\u00A2\\u30E6\\u30FC\\u30B6\\u30FC" + "\\u5225\\u30B5\\u30A4\\u30C8\\x01\\x80\\u0080\\uD83D\\uDE0E\" }"), true); + std::string jsongen; + parser.opts.indent_step = -1; + // Now, disallow non-UTF-8 (the default behavior) so GenerateText indicates failure. + parser.opts.allow_non_utf8 = false; + auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + TEST_EQ(result, false); +} + void UnicodeSurrogatesTest() { flatbuffers::Parser parser; @@ -1157,7 +1178,8 @@ void UnknownFieldsTest() { std::string jsongen; parser.opts.indent_step = -1; - GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + auto result = GenerateText(parser, parser.builder_.GetBufferPointer(), &jsongen); + TEST_EQ(result, true); TEST_EQ(jsongen == "{str: \"test\",i: 10}", true); } @@ -1222,6 +1244,7 @@ int main(int /*argc*/, const char * /*argv*/[]) { IntegerOutOfRangeTest(); UnicodeTest(); UnicodeTestAllowNonUTF8(); + UnicodeTestGenerateTextFailsOnNonUTF8(); UnicodeSurrogatesTest(); UnicodeInvalidSurrogatesTest(); InvalidUTF8Test(); |