diff options
author | Wouter van Oortmerssen <wvo@google.com> | 2021-12-10 14:59:08 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-10 14:59:08 -0800 |
commit | e367ca32ad86df16fe5862a57a9697efc18586e3 (patch) | |
tree | a9df92e0b63613908ed891fcaa10dbd6fa17ab5f | |
parent | 705f27f6eef3070dd864c3d5bdbcb25b44e2eb7e (diff) | |
download | flatbuffers-e367ca32ad86df16fe5862a57a9697efc18586e3.tar.gz flatbuffers-e367ca32ad86df16fe5862a57a9697efc18586e3.tar.bz2 flatbuffers-e367ca32ad86df16fe5862a57a9697efc18586e3.zip |
Verifier for FlexBuffers (#6977)
* Verifier for FlexBuffers
* Verifier improvements & fuzzer
-rw-r--r-- | include/flatbuffers/flexbuffers.h | 242 | ||||
-rw-r--r-- | include/flatbuffers/verifier.h | 5 | ||||
-rw-r--r-- | src/flatc.cpp | 9 | ||||
-rw-r--r-- | src/idl_gen_cpp.cpp | 3 | ||||
-rw-r--r-- | src/idl_gen_text.cpp | 6 | ||||
-rw-r--r-- | tests/cpp17/generated_cpp17/monster_test_generated.h | 1 | ||||
-rw-r--r-- | tests/fuzzer/CMakeLists.txt | 5 | ||||
-rw-r--r-- | tests/fuzzer/flexbuffers_verifier_fuzzer.cc | 14 | ||||
-rw-r--r-- | tests/fuzzer/readme.md | 9 | ||||
-rw-r--r-- | tests/monster_test_generated.h | 1 | ||||
-rw-r--r-- | tests/test.cpp | 11 |
11 files changed, 300 insertions, 6 deletions
diff --git a/include/flatbuffers/flexbuffers.h b/include/flatbuffers/flexbuffers.h index 418bc68d..d0859ff9 100644 --- a/include/flatbuffers/flexbuffers.h +++ b/include/flatbuffers/flexbuffers.h @@ -53,7 +53,7 @@ enum Type { FBT_INT = 1, FBT_UINT = 2, FBT_FLOAT = 3, - // Types above stored inline, types below store an offset. + // Types above stored inline, types below (except FBT_BOOL) store an offset. FBT_KEY = 4, FBT_STRING = 5, FBT_INDIRECT_INT = 6, @@ -81,6 +81,8 @@ enum Type { FBT_BOOL = 26, FBT_VECTOR_BOOL = 36, // To Allow the same type of conversion of type to vector type + + FBT_MAX_TYPE = 37 }; inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; } @@ -757,6 +759,8 @@ class Reference { return false; } + friend class Verifier; + const uint8_t *data_; uint8_t parent_width_; uint8_t byte_width_; @@ -1629,6 +1633,242 @@ class Builder FLATBUFFERS_FINAL_CLASS { StringOffsetMap string_pool; }; +// Helper class to verify the integrity of a FlexBuffer +class Verifier FLATBUFFERS_FINAL_CLASS { + public: + Verifier(const uint8_t *buf, size_t buf_len, + // Supplying this vector likely results in faster verification + // of larger buffers with many shared keys/strings, but + // comes at the cost of using additional memory 1/8th the size of + // the buffer being verified, so it is allowed to be null + // for special situations (memory constrained devices or + // really small buffers etc). Do note that when not supplying + // this buffer, you are not protected against buffers crafted + // specifically to DoS you, i.e. recursive sharing that causes + // exponential amounts of verification CPU time. + std::vector<bool> *reuse_tracker) + : buf_(buf), size_(buf_len), reuse_tracker_(reuse_tracker) { + FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE); + if (reuse_tracker_) { + reuse_tracker_->clear(); + reuse_tracker_->resize(size_); + } + } + + private: + // Central location where any verification failures register. + bool Check(bool ok) const { + // clang-format off + #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE + FLATBUFFERS_ASSERT(ok); + #endif + // clang-format on + return ok; + } + + // Verify any range within the buffer. + bool VerifyFrom(size_t elem, size_t elem_len) const { + return Check(elem_len < size_ && elem <= size_ - elem_len); + } + bool VerifyBefore(size_t elem, size_t elem_len) const { + return Check(elem_len <= elem); + } + + bool VerifyFromPointer(const uint8_t *p, size_t len) { + auto o = static_cast<size_t>(p - buf_); + return VerifyFrom(o, len); + } + bool VerifyBeforePointer(const uint8_t *p, size_t len) { + auto o = static_cast<size_t>(p - buf_); + return VerifyBefore(o, len); + } + + bool VerifyByteWidth(size_t width) { + return Check(width == 1 || width == 2 || width == 4 || width == 8); + } + + bool VerifyType(int type) { + return Check(type >= 0 && type < FBT_MAX_TYPE); + } + + bool VerifyOffset(uint64_t off, const uint8_t *p) { + return Check(off <= static_cast<uint64_t>(size_)) && + off <= static_cast<uint64_t>(p - buf_); + } + + bool AlreadyVerified(const uint8_t *p) { + return reuse_tracker_ && (*reuse_tracker_)[p - buf_]; + } + + void MarkVerified(const uint8_t *p) { + if (reuse_tracker_) + (*reuse_tracker_)[p - buf_] = true; + } + + bool VerifyVector(const uint8_t *p, Type elem_type, uint8_t size_byte_width, + uint8_t elem_byte_width) { + // Any kind of nesting goes thru this function, so guard against that + // here. + if (AlreadyVerified(p)) + return true; + if (!VerifyBeforePointer(p, size_byte_width)) + return false; + auto sized = Sized(p, size_byte_width); + auto num_elems = sized.size(); + auto max_elems = SIZE_MAX / elem_byte_width; + if (!Check(num_elems < max_elems)) + return false; // Protect against byte_size overflowing. + auto byte_size = num_elems * elem_byte_width; + if (!VerifyFromPointer(p, byte_size)) + return false; + if (!IsInline(elem_type)) { + if (elem_type == FBT_NULL) { + // Verify type bytes after the vector. + if (!VerifyFromPointer(p + byte_size, num_elems)) return false; + auto v = Vector(p, size_byte_width); + for (size_t i = 0; i < num_elems; i++) + if (!VerifyRef(v[i])) return false; + } else if (elem_type == FBT_KEY) { + auto v = TypedVector(p, elem_byte_width, FBT_KEY); + for (size_t i = 0; i < num_elems; i++) + if (!VerifyRef(v[i])) return false; + } else { + FLATBUFFERS_ASSERT(false); + } + } + MarkVerified(p); + return true; + } + + bool VerifyKeys(const uint8_t *p, uint8_t byte_width) { + // The vector part of the map has already been verified. + const size_t num_prefixed_fields = 3; + if (!VerifyBeforePointer(p, byte_width * num_prefixed_fields)) + return false; + p -= byte_width * num_prefixed_fields; + auto off = ReadUInt64(p, byte_width); + if (!VerifyOffset(off, p)) + return false; + auto key_byte_with = + static_cast<uint8_t>(ReadUInt64(p + byte_width, byte_width)); + if (!VerifyByteWidth(key_byte_with)) + return false; + return VerifyVector(p - off, FBT_KEY, key_byte_with, key_byte_with); + } + + bool VerifyKey(const uint8_t* p) { + if (AlreadyVerified(p)) return true; + MarkVerified(p); + while (p < buf_ + size_) + if (*p++) return true; + return false; + } + + bool VerifyTerminator(const String &s) { + return VerifyFromPointer(reinterpret_cast<const uint8_t *>(s.c_str()), + s.size() + 1); + } + + bool VerifyRef(Reference r) { + // r.parent_width_ and r.data_ already verified. + if (!VerifyByteWidth(r.byte_width_) || !VerifyType(r.type_)) { + return false; + } + if (IsInline(r.type_)) { + // Inline scalars, don't require further verification. + return true; + } + // All remaining types are an offset. + auto off = ReadUInt64(r.data_, r.parent_width_); + if (!VerifyOffset(off, r.data_)) + return false; + auto p = r.Indirect(); + switch (r.type_) { + case FBT_INDIRECT_INT: + case FBT_INDIRECT_UINT: + case FBT_INDIRECT_FLOAT: + return VerifyFromPointer(p, r.byte_width_); + case FBT_KEY: + return VerifyKey(p); + case FBT_MAP: + return VerifyVector(p, FBT_NULL, r.byte_width_, r.byte_width_) && + VerifyKeys(p, r.byte_width_); + case FBT_VECTOR: + return VerifyVector(p, FBT_NULL, r.byte_width_, r.byte_width_); + case FBT_VECTOR_INT: + return VerifyVector(p, FBT_INT, r.byte_width_, r.byte_width_); + case FBT_VECTOR_BOOL: + case FBT_VECTOR_UINT: + return VerifyVector(p, FBT_UINT, r.byte_width_, r.byte_width_); + case FBT_VECTOR_FLOAT: + return VerifyVector(p, FBT_FLOAT, r.byte_width_, r.byte_width_); + case FBT_VECTOR_KEY: + return VerifyVector(p, FBT_KEY, r.byte_width_, r.byte_width_); + case FBT_VECTOR_STRING_DEPRECATED: + // Use of FBT_KEY here intentional, see elsewhere. + return VerifyVector(p, FBT_KEY, r.byte_width_, r.byte_width_); + case FBT_BLOB: + return VerifyVector(p, FBT_UINT, r.byte_width_, 1); + case FBT_STRING: + return VerifyVector(p, FBT_UINT, r.byte_width_, 1) && + VerifyTerminator(String(p, r.byte_width_)); + case FBT_VECTOR_INT2: + case FBT_VECTOR_UINT2: + case FBT_VECTOR_FLOAT2: + case FBT_VECTOR_INT3: + case FBT_VECTOR_UINT3: + case FBT_VECTOR_FLOAT3: + case FBT_VECTOR_INT4: + case FBT_VECTOR_UINT4: + case FBT_VECTOR_FLOAT4: { + uint8_t len = 0; + auto vtype = ToFixedTypedVectorElementType(r.type_, &len); + if (!VerifyType(vtype)) + return false; + return VerifyFromPointer(p, r.byte_width_ * len); + } + default: + return false; + } + } + + public: + bool VerifyBuffer() { + if (!Check(size_ >= 3)) return false; + auto end = buf_ + size_; + auto byte_width = *--end; + auto packed_type = *--end; + return VerifyByteWidth(byte_width) && + Check(end - buf_ >= byte_width) && + VerifyRef(Reference(end - byte_width, byte_width, packed_type)); + } + + private: + const uint8_t *buf_; + size_t size_; + std::vector<bool> *reuse_tracker_; +}; + +// Utility function that contructs the Verifier for you, see above for parameters. +inline bool VerifyBuffer(const uint8_t *buf, size_t buf_len, std::vector<bool> *reuse_tracker) { + Verifier verifier(buf, buf_len, reuse_tracker); + return verifier.VerifyBuffer(); +} + + +#ifdef FLATBUFFERS_H_ +// This is a verifier utility function that works together with the +// FlatBuffers verifier, which should only be present if flatbuffer.h +// has been included (which it typically is in generated code). +inline bool VerifyNestedFlexBuffer(const flatbuffers::Vector<uint8_t> *nv, + flatbuffers::Verifier &verifier) { + if (!nv) return true; + return verifier.Check( + flexbuffers::VerifyBuffer(nv->data(), nv->size(), + &verifier.GetReuseVector())); +} +#endif + } // namespace flexbuffers #if defined(_MSC_VER) diff --git a/include/flatbuffers/verifier.h b/include/flatbuffers/verifier.h index b6971c1d..5198dcce 100644 --- a/include/flatbuffers/verifier.h +++ b/include/flatbuffers/verifier.h @@ -254,6 +254,8 @@ class Verifier FLATBUFFERS_FINAL_CLASS { // clang-format on } + std::vector<bool> &GetReuseVector() { return reuse_tracker_; } + private: const uint8_t *buf_; size_t size_; @@ -263,6 +265,9 @@ class Verifier FLATBUFFERS_FINAL_CLASS { uoffset_t max_tables_; mutable size_t upper_bound_; bool check_alignment_; + // This is here for nested FlexBuffers, cheap if not touched. + // TODO: allow user to supply memory for this. + std::vector<bool> reuse_tracker_; }; } // namespace flatbuffers diff --git a/src/flatc.cpp b/src/flatc.cpp index fec0584d..f278dab7 100644 --- a/src/flatc.cpp +++ b/src/flatc.cpp @@ -510,9 +510,12 @@ int FlatCompiler::Compile(int argc, const char **argv) { LoadBinarySchema(*parser.get(), filename, contents); } else if (opts.use_flexbuffers) { if (opts.lang_to_generate == IDLOptions::kJson) { - parser->flex_root_ = flexbuffers::GetRoot( - reinterpret_cast<const uint8_t *>(contents.c_str()), - contents.size()); + auto data = reinterpret_cast<const uint8_t *>(contents.c_str()); + auto size = contents.size(); + std::vector<bool> reuse_tracker; + if (!flexbuffers::VerifyBuffer(data, size, &reuse_tracker)) + Error("flexbuffers file failed to verify: " + filename, false); + parser->flex_root_ = flexbuffers::GetRoot(data, size); } else { parser->flex_builder_.Clear(); ParseFile(*parser.get(), filename, contents, include_directories); diff --git a/src/idl_gen_cpp.cpp b/src/idl_gen_cpp.cpp index 39d09786..df124e5a 100644 --- a/src/idl_gen_cpp.cpp +++ b/src/idl_gen_cpp.cpp @@ -2006,6 +2006,9 @@ class CppGenerator : public BaseGenerator { // FIXME: file_identifier. code_ += "{{PRE}}verifier.VerifyNestedFlatBuffer<{{CPP_NAME}}>" "({{NAME}}(), nullptr)\\"; + } else if (field.flexbuffer) { + code_ += "{{PRE}}flexbuffers::VerifyNestedFlexBuffer" + "({{NAME}}(), verifier)\\"; } break; } diff --git a/src/idl_gen_text.cpp b/src/idl_gen_text.cpp index f32243c8..805e9343 100644 --- a/src/idl_gen_text.cpp +++ b/src/idl_gen_text.cpp @@ -265,6 +265,12 @@ struct JsonPrinter { val = reinterpret_cast<const Struct *>(table)->GetStruct<const void *>( fd.value.offset); } else if (fd.flexbuffer && opts.json_nested_flexbuffers) { + // We could verify this FlexBuffer before access, but since this sits + // inside a FlatBuffer that we don't know wether it has been verified or + // not, there is little point making this part safer than the parent.. + // The caller should really be verifying the whole. + // If the whole buffer is corrupt, we likely crash before we even get + // here. auto vec = table->GetPointer<const Vector<uint8_t> *>(fd.value.offset); auto root = flexbuffers::GetRoot(vec->data(), vec->size()); root.ToString(true, opts.strict_json, text); diff --git a/tests/cpp17/generated_cpp17/monster_test_generated.h b/tests/cpp17/generated_cpp17/monster_test_generated.h index c2fdf76f..10228db8 100644 --- a/tests/cpp17/generated_cpp17/monster_test_generated.h +++ b/tests/cpp17/generated_cpp17/monster_test_generated.h @@ -1740,6 +1740,7 @@ struct Monster FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { verifier.VerifyVector(testarrayofsortedstruct()) && VerifyOffset(verifier, VT_FLEX) && verifier.VerifyVector(flex()) && + flexbuffers::VerifyNestedFlexBuffer(flex(), verifier) && VerifyOffset(verifier, VT_TEST5) && verifier.VerifyVector(test5()) && VerifyOffset(verifier, VT_VECTOR_OF_LONGS) && diff --git a/tests/fuzzer/CMakeLists.txt b/tests/fuzzer/CMakeLists.txt index 74dcc117..9cc5a5fe 100644 --- a/tests/fuzzer/CMakeLists.txt +++ b/tests/fuzzer/CMakeLists.txt @@ -53,7 +53,7 @@ target_compile_options( fuzzer_config INTERFACE $<$<NOT:$<BOOL:${OSS_FUZZ}>>: - -fsanitize-coverage=edge,trace-cmp + -fsanitize-coverage=trace-cmp > $<$<BOOL:${USE_ASAN}>: -fsanitize=fuzzer,undefined,address @@ -146,6 +146,9 @@ target_link_libraries(parser_fuzzer PRIVATE flatbuffers_fuzzed) add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc) target_link_libraries(verifier_fuzzer PRIVATE flatbuffers_fuzzed) +add_executable(flexverifier_fuzzer flexbuffers_verifier_fuzzer.cc) +target_link_libraries(flexverifier_fuzzer PRIVATE flatbuffers_fuzzed) + add_executable(monster_fuzzer flatbuffers_monster_fuzzer.cc) target_link_libraries(monster_fuzzer PRIVATE flatbuffers_fuzzed) add_custom_command( diff --git a/tests/fuzzer/flexbuffers_verifier_fuzzer.cc b/tests/fuzzer/flexbuffers_verifier_fuzzer.cc new file mode 100644 index 00000000..3853db83 --- /dev/null +++ b/tests/fuzzer/flexbuffers_verifier_fuzzer.cc @@ -0,0 +1,14 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include <stddef.h> +#include <stdint.h> +#include <string> + +#include "flatbuffers/flexbuffers.h" + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { + std::vector<bool> reuse_tracker; + flexbuffers::VerifyBuffer(data, size, &reuse_tracker); + return 0; +} diff --git a/tests/fuzzer/readme.md b/tests/fuzzer/readme.md index b2c7db41..1d0b392c 100644 --- a/tests/fuzzer/readme.md +++ b/tests/fuzzer/readme.md @@ -10,10 +10,17 @@ For details about **libFuzzer** see: https://llvm.org/docs/LibFuzzer.html To build and run these tests LLVM compiler (with clang frontend) and CMake should be installed before. -The fuzzer section include three tests: +The fuzzer section include four tests: - `verifier_fuzzer` checks stability of deserialization engine for `Monster` schema; - `parser_fuzzer` checks stability of schema and json parser under various inputs; - `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files; +- `flexverifier_fuzzer` checks stability of deserialization engine for FlexBuffers only; + +## Build +```sh +cd tests/fuzzer +CC=clang CXX=clang++ cmake . -DCMAKE_BUILD_TYPE=Debug -DUSE_ASAN=ON +``` ## Run tests with a specific locale The grammar of the Flatbuffers library is based on printable-ASCII characters. diff --git a/tests/monster_test_generated.h b/tests/monster_test_generated.h index cec75234..459c5ee0 100644 --- a/tests/monster_test_generated.h +++ b/tests/monster_test_generated.h @@ -1680,6 +1680,7 @@ struct Monster FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { verifier.VerifyVector(testarrayofsortedstruct()) && VerifyOffset(verifier, VT_FLEX) && verifier.VerifyVector(flex()) && + flexbuffers::VerifyNestedFlexBuffer(flex(), verifier) && VerifyOffset(verifier, VT_TEST5) && verifier.VerifyVector(test5()) && VerifyOffset(verifier, VT_VECTOR_OF_LONGS) && diff --git a/tests/test.cpp b/tests/test.cpp index 17c787d5..1e6e9d3a 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -3022,6 +3022,10 @@ void FlexBuffersTest() { #endif // clang-format on + std::vector<bool> reuse_tracker; + TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(), + &reuse_tracker), true); + auto map = flexbuffers::GetRoot(slb.GetBuffer()).AsMap(); TEST_EQ(map.size(), 7); auto vec = map["vec"].AsVector(); @@ -3079,6 +3083,8 @@ void FlexBuffersTest() { slb.Clear(); auto jsontest = "{ a: [ 123, 456.0 ], b: \"hello\", c: true, d: false }"; TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true); + TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(), + &reuse_tracker), true); auto jroot = flexbuffers::GetRoot(slb.GetBuffer()); auto jmap = jroot.AsMap(); auto jvec = jmap["a"].AsVector(); @@ -3116,6 +3122,8 @@ void FlexBuffersFloatingPointTest() { "{ a: [1.0, nan, inf, infinity, -inf, +inf, -infinity, 8.0] }"; TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true); auto jroot = flexbuffers::GetRoot(slb.GetBuffer()); + TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(), + nullptr), true); auto jmap = jroot.AsMap(); auto jvec = jmap["a"].AsVector(); TEST_EQ(8, jvec.size()); @@ -3159,6 +3167,9 @@ void FlexBuffersDeprecatedTest() { // same way, the fix lies on the reading side. slb.EndVector(start, true, false); slb.Finish(); + // Verify because why not. + TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(), + nullptr), true); // So now lets read this data back. // For existing data, since we have no way of knowing what the actual // bit-width of the size field of the string is, we are going to ignore this |