summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWouter van Oortmerssen <wvo@google.com>2021-12-10 14:59:08 -0800
committerGitHub <noreply@github.com>2021-12-10 14:59:08 -0800
commite367ca32ad86df16fe5862a57a9697efc18586e3 (patch)
treea9df92e0b63613908ed891fcaa10dbd6fa17ab5f
parent705f27f6eef3070dd864c3d5bdbcb25b44e2eb7e (diff)
downloadflatbuffers-e367ca32ad86df16fe5862a57a9697efc18586e3.tar.gz
flatbuffers-e367ca32ad86df16fe5862a57a9697efc18586e3.tar.bz2
flatbuffers-e367ca32ad86df16fe5862a57a9697efc18586e3.zip
Verifier for FlexBuffers (#6977)
* Verifier for FlexBuffers * Verifier improvements & fuzzer
-rw-r--r--include/flatbuffers/flexbuffers.h242
-rw-r--r--include/flatbuffers/verifier.h5
-rw-r--r--src/flatc.cpp9
-rw-r--r--src/idl_gen_cpp.cpp3
-rw-r--r--src/idl_gen_text.cpp6
-rw-r--r--tests/cpp17/generated_cpp17/monster_test_generated.h1
-rw-r--r--tests/fuzzer/CMakeLists.txt5
-rw-r--r--tests/fuzzer/flexbuffers_verifier_fuzzer.cc14
-rw-r--r--tests/fuzzer/readme.md9
-rw-r--r--tests/monster_test_generated.h1
-rw-r--r--tests/test.cpp11
11 files changed, 300 insertions, 6 deletions
diff --git a/include/flatbuffers/flexbuffers.h b/include/flatbuffers/flexbuffers.h
index 418bc68d..d0859ff9 100644
--- a/include/flatbuffers/flexbuffers.h
+++ b/include/flatbuffers/flexbuffers.h
@@ -53,7 +53,7 @@ enum Type {
FBT_INT = 1,
FBT_UINT = 2,
FBT_FLOAT = 3,
- // Types above stored inline, types below store an offset.
+ // Types above stored inline, types below (except FBT_BOOL) store an offset.
FBT_KEY = 4,
FBT_STRING = 5,
FBT_INDIRECT_INT = 6,
@@ -81,6 +81,8 @@ enum Type {
FBT_BOOL = 26,
FBT_VECTOR_BOOL =
36, // To Allow the same type of conversion of type to vector type
+
+ FBT_MAX_TYPE = 37
};
inline bool IsInline(Type t) { return t <= FBT_FLOAT || t == FBT_BOOL; }
@@ -757,6 +759,8 @@ class Reference {
return false;
}
+ friend class Verifier;
+
const uint8_t *data_;
uint8_t parent_width_;
uint8_t byte_width_;
@@ -1629,6 +1633,242 @@ class Builder FLATBUFFERS_FINAL_CLASS {
StringOffsetMap string_pool;
};
+// Helper class to verify the integrity of a FlexBuffer
+class Verifier FLATBUFFERS_FINAL_CLASS {
+ public:
+ Verifier(const uint8_t *buf, size_t buf_len,
+ // Supplying this vector likely results in faster verification
+ // of larger buffers with many shared keys/strings, but
+ // comes at the cost of using additional memory 1/8th the size of
+ // the buffer being verified, so it is allowed to be null
+ // for special situations (memory constrained devices or
+ // really small buffers etc). Do note that when not supplying
+ // this buffer, you are not protected against buffers crafted
+ // specifically to DoS you, i.e. recursive sharing that causes
+ // exponential amounts of verification CPU time.
+ std::vector<bool> *reuse_tracker)
+ : buf_(buf), size_(buf_len), reuse_tracker_(reuse_tracker) {
+ FLATBUFFERS_ASSERT(size_ < FLATBUFFERS_MAX_BUFFER_SIZE);
+ if (reuse_tracker_) {
+ reuse_tracker_->clear();
+ reuse_tracker_->resize(size_);
+ }
+ }
+
+ private:
+ // Central location where any verification failures register.
+ bool Check(bool ok) const {
+ // clang-format off
+ #ifdef FLATBUFFERS_DEBUG_VERIFICATION_FAILURE
+ FLATBUFFERS_ASSERT(ok);
+ #endif
+ // clang-format on
+ return ok;
+ }
+
+ // Verify any range within the buffer.
+ bool VerifyFrom(size_t elem, size_t elem_len) const {
+ return Check(elem_len < size_ && elem <= size_ - elem_len);
+ }
+ bool VerifyBefore(size_t elem, size_t elem_len) const {
+ return Check(elem_len <= elem);
+ }
+
+ bool VerifyFromPointer(const uint8_t *p, size_t len) {
+ auto o = static_cast<size_t>(p - buf_);
+ return VerifyFrom(o, len);
+ }
+ bool VerifyBeforePointer(const uint8_t *p, size_t len) {
+ auto o = static_cast<size_t>(p - buf_);
+ return VerifyBefore(o, len);
+ }
+
+ bool VerifyByteWidth(size_t width) {
+ return Check(width == 1 || width == 2 || width == 4 || width == 8);
+ }
+
+ bool VerifyType(int type) {
+ return Check(type >= 0 && type < FBT_MAX_TYPE);
+ }
+
+ bool VerifyOffset(uint64_t off, const uint8_t *p) {
+ return Check(off <= static_cast<uint64_t>(size_)) &&
+ off <= static_cast<uint64_t>(p - buf_);
+ }
+
+ bool AlreadyVerified(const uint8_t *p) {
+ return reuse_tracker_ && (*reuse_tracker_)[p - buf_];
+ }
+
+ void MarkVerified(const uint8_t *p) {
+ if (reuse_tracker_)
+ (*reuse_tracker_)[p - buf_] = true;
+ }
+
+ bool VerifyVector(const uint8_t *p, Type elem_type, uint8_t size_byte_width,
+ uint8_t elem_byte_width) {
+ // Any kind of nesting goes thru this function, so guard against that
+ // here.
+ if (AlreadyVerified(p))
+ return true;
+ if (!VerifyBeforePointer(p, size_byte_width))
+ return false;
+ auto sized = Sized(p, size_byte_width);
+ auto num_elems = sized.size();
+ auto max_elems = SIZE_MAX / elem_byte_width;
+ if (!Check(num_elems < max_elems))
+ return false; // Protect against byte_size overflowing.
+ auto byte_size = num_elems * elem_byte_width;
+ if (!VerifyFromPointer(p, byte_size))
+ return false;
+ if (!IsInline(elem_type)) {
+ if (elem_type == FBT_NULL) {
+ // Verify type bytes after the vector.
+ if (!VerifyFromPointer(p + byte_size, num_elems)) return false;
+ auto v = Vector(p, size_byte_width);
+ for (size_t i = 0; i < num_elems; i++)
+ if (!VerifyRef(v[i])) return false;
+ } else if (elem_type == FBT_KEY) {
+ auto v = TypedVector(p, elem_byte_width, FBT_KEY);
+ for (size_t i = 0; i < num_elems; i++)
+ if (!VerifyRef(v[i])) return false;
+ } else {
+ FLATBUFFERS_ASSERT(false);
+ }
+ }
+ MarkVerified(p);
+ return true;
+ }
+
+ bool VerifyKeys(const uint8_t *p, uint8_t byte_width) {
+ // The vector part of the map has already been verified.
+ const size_t num_prefixed_fields = 3;
+ if (!VerifyBeforePointer(p, byte_width * num_prefixed_fields))
+ return false;
+ p -= byte_width * num_prefixed_fields;
+ auto off = ReadUInt64(p, byte_width);
+ if (!VerifyOffset(off, p))
+ return false;
+ auto key_byte_with =
+ static_cast<uint8_t>(ReadUInt64(p + byte_width, byte_width));
+ if (!VerifyByteWidth(key_byte_with))
+ return false;
+ return VerifyVector(p - off, FBT_KEY, key_byte_with, key_byte_with);
+ }
+
+ bool VerifyKey(const uint8_t* p) {
+ if (AlreadyVerified(p)) return true;
+ MarkVerified(p);
+ while (p < buf_ + size_)
+ if (*p++) return true;
+ return false;
+ }
+
+ bool VerifyTerminator(const String &s) {
+ return VerifyFromPointer(reinterpret_cast<const uint8_t *>(s.c_str()),
+ s.size() + 1);
+ }
+
+ bool VerifyRef(Reference r) {
+ // r.parent_width_ and r.data_ already verified.
+ if (!VerifyByteWidth(r.byte_width_) || !VerifyType(r.type_)) {
+ return false;
+ }
+ if (IsInline(r.type_)) {
+ // Inline scalars, don't require further verification.
+ return true;
+ }
+ // All remaining types are an offset.
+ auto off = ReadUInt64(r.data_, r.parent_width_);
+ if (!VerifyOffset(off, r.data_))
+ return false;
+ auto p = r.Indirect();
+ switch (r.type_) {
+ case FBT_INDIRECT_INT:
+ case FBT_INDIRECT_UINT:
+ case FBT_INDIRECT_FLOAT:
+ return VerifyFromPointer(p, r.byte_width_);
+ case FBT_KEY:
+ return VerifyKey(p);
+ case FBT_MAP:
+ return VerifyVector(p, FBT_NULL, r.byte_width_, r.byte_width_) &&
+ VerifyKeys(p, r.byte_width_);
+ case FBT_VECTOR:
+ return VerifyVector(p, FBT_NULL, r.byte_width_, r.byte_width_);
+ case FBT_VECTOR_INT:
+ return VerifyVector(p, FBT_INT, r.byte_width_, r.byte_width_);
+ case FBT_VECTOR_BOOL:
+ case FBT_VECTOR_UINT:
+ return VerifyVector(p, FBT_UINT, r.byte_width_, r.byte_width_);
+ case FBT_VECTOR_FLOAT:
+ return VerifyVector(p, FBT_FLOAT, r.byte_width_, r.byte_width_);
+ case FBT_VECTOR_KEY:
+ return VerifyVector(p, FBT_KEY, r.byte_width_, r.byte_width_);
+ case FBT_VECTOR_STRING_DEPRECATED:
+ // Use of FBT_KEY here intentional, see elsewhere.
+ return VerifyVector(p, FBT_KEY, r.byte_width_, r.byte_width_);
+ case FBT_BLOB:
+ return VerifyVector(p, FBT_UINT, r.byte_width_, 1);
+ case FBT_STRING:
+ return VerifyVector(p, FBT_UINT, r.byte_width_, 1) &&
+ VerifyTerminator(String(p, r.byte_width_));
+ case FBT_VECTOR_INT2:
+ case FBT_VECTOR_UINT2:
+ case FBT_VECTOR_FLOAT2:
+ case FBT_VECTOR_INT3:
+ case FBT_VECTOR_UINT3:
+ case FBT_VECTOR_FLOAT3:
+ case FBT_VECTOR_INT4:
+ case FBT_VECTOR_UINT4:
+ case FBT_VECTOR_FLOAT4: {
+ uint8_t len = 0;
+ auto vtype = ToFixedTypedVectorElementType(r.type_, &len);
+ if (!VerifyType(vtype))
+ return false;
+ return VerifyFromPointer(p, r.byte_width_ * len);
+ }
+ default:
+ return false;
+ }
+ }
+
+ public:
+ bool VerifyBuffer() {
+ if (!Check(size_ >= 3)) return false;
+ auto end = buf_ + size_;
+ auto byte_width = *--end;
+ auto packed_type = *--end;
+ return VerifyByteWidth(byte_width) &&
+ Check(end - buf_ >= byte_width) &&
+ VerifyRef(Reference(end - byte_width, byte_width, packed_type));
+ }
+
+ private:
+ const uint8_t *buf_;
+ size_t size_;
+ std::vector<bool> *reuse_tracker_;
+};
+
+// Utility function that contructs the Verifier for you, see above for parameters.
+inline bool VerifyBuffer(const uint8_t *buf, size_t buf_len, std::vector<bool> *reuse_tracker) {
+ Verifier verifier(buf, buf_len, reuse_tracker);
+ return verifier.VerifyBuffer();
+}
+
+
+#ifdef FLATBUFFERS_H_
+// This is a verifier utility function that works together with the
+// FlatBuffers verifier, which should only be present if flatbuffer.h
+// has been included (which it typically is in generated code).
+inline bool VerifyNestedFlexBuffer(const flatbuffers::Vector<uint8_t> *nv,
+ flatbuffers::Verifier &verifier) {
+ if (!nv) return true;
+ return verifier.Check(
+ flexbuffers::VerifyBuffer(nv->data(), nv->size(),
+ &verifier.GetReuseVector()));
+}
+#endif
+
} // namespace flexbuffers
#if defined(_MSC_VER)
diff --git a/include/flatbuffers/verifier.h b/include/flatbuffers/verifier.h
index b6971c1d..5198dcce 100644
--- a/include/flatbuffers/verifier.h
+++ b/include/flatbuffers/verifier.h
@@ -254,6 +254,8 @@ class Verifier FLATBUFFERS_FINAL_CLASS {
// clang-format on
}
+ std::vector<bool> &GetReuseVector() { return reuse_tracker_; }
+
private:
const uint8_t *buf_;
size_t size_;
@@ -263,6 +265,9 @@ class Verifier FLATBUFFERS_FINAL_CLASS {
uoffset_t max_tables_;
mutable size_t upper_bound_;
bool check_alignment_;
+ // This is here for nested FlexBuffers, cheap if not touched.
+ // TODO: allow user to supply memory for this.
+ std::vector<bool> reuse_tracker_;
};
} // namespace flatbuffers
diff --git a/src/flatc.cpp b/src/flatc.cpp
index fec0584d..f278dab7 100644
--- a/src/flatc.cpp
+++ b/src/flatc.cpp
@@ -510,9 +510,12 @@ int FlatCompiler::Compile(int argc, const char **argv) {
LoadBinarySchema(*parser.get(), filename, contents);
} else if (opts.use_flexbuffers) {
if (opts.lang_to_generate == IDLOptions::kJson) {
- parser->flex_root_ = flexbuffers::GetRoot(
- reinterpret_cast<const uint8_t *>(contents.c_str()),
- contents.size());
+ auto data = reinterpret_cast<const uint8_t *>(contents.c_str());
+ auto size = contents.size();
+ std::vector<bool> reuse_tracker;
+ if (!flexbuffers::VerifyBuffer(data, size, &reuse_tracker))
+ Error("flexbuffers file failed to verify: " + filename, false);
+ parser->flex_root_ = flexbuffers::GetRoot(data, size);
} else {
parser->flex_builder_.Clear();
ParseFile(*parser.get(), filename, contents, include_directories);
diff --git a/src/idl_gen_cpp.cpp b/src/idl_gen_cpp.cpp
index 39d09786..df124e5a 100644
--- a/src/idl_gen_cpp.cpp
+++ b/src/idl_gen_cpp.cpp
@@ -2006,6 +2006,9 @@ class CppGenerator : public BaseGenerator {
// FIXME: file_identifier.
code_ += "{{PRE}}verifier.VerifyNestedFlatBuffer<{{CPP_NAME}}>"
"({{NAME}}(), nullptr)\\";
+ } else if (field.flexbuffer) {
+ code_ += "{{PRE}}flexbuffers::VerifyNestedFlexBuffer"
+ "({{NAME}}(), verifier)\\";
}
break;
}
diff --git a/src/idl_gen_text.cpp b/src/idl_gen_text.cpp
index f32243c8..805e9343 100644
--- a/src/idl_gen_text.cpp
+++ b/src/idl_gen_text.cpp
@@ -265,6 +265,12 @@ struct JsonPrinter {
val = reinterpret_cast<const Struct *>(table)->GetStruct<const void *>(
fd.value.offset);
} else if (fd.flexbuffer && opts.json_nested_flexbuffers) {
+ // We could verify this FlexBuffer before access, but since this sits
+ // inside a FlatBuffer that we don't know wether it has been verified or
+ // not, there is little point making this part safer than the parent..
+ // The caller should really be verifying the whole.
+ // If the whole buffer is corrupt, we likely crash before we even get
+ // here.
auto vec = table->GetPointer<const Vector<uint8_t> *>(fd.value.offset);
auto root = flexbuffers::GetRoot(vec->data(), vec->size());
root.ToString(true, opts.strict_json, text);
diff --git a/tests/cpp17/generated_cpp17/monster_test_generated.h b/tests/cpp17/generated_cpp17/monster_test_generated.h
index c2fdf76f..10228db8 100644
--- a/tests/cpp17/generated_cpp17/monster_test_generated.h
+++ b/tests/cpp17/generated_cpp17/monster_test_generated.h
@@ -1740,6 +1740,7 @@ struct Monster FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
verifier.VerifyVector(testarrayofsortedstruct()) &&
VerifyOffset(verifier, VT_FLEX) &&
verifier.VerifyVector(flex()) &&
+ flexbuffers::VerifyNestedFlexBuffer(flex(), verifier) &&
VerifyOffset(verifier, VT_TEST5) &&
verifier.VerifyVector(test5()) &&
VerifyOffset(verifier, VT_VECTOR_OF_LONGS) &&
diff --git a/tests/fuzzer/CMakeLists.txt b/tests/fuzzer/CMakeLists.txt
index 74dcc117..9cc5a5fe 100644
--- a/tests/fuzzer/CMakeLists.txt
+++ b/tests/fuzzer/CMakeLists.txt
@@ -53,7 +53,7 @@ target_compile_options(
fuzzer_config
INTERFACE
$<$<NOT:$<BOOL:${OSS_FUZZ}>>:
- -fsanitize-coverage=edge,trace-cmp
+ -fsanitize-coverage=trace-cmp
>
$<$<BOOL:${USE_ASAN}>:
-fsanitize=fuzzer,undefined,address
@@ -146,6 +146,9 @@ target_link_libraries(parser_fuzzer PRIVATE flatbuffers_fuzzed)
add_executable(verifier_fuzzer flatbuffers_verifier_fuzzer.cc)
target_link_libraries(verifier_fuzzer PRIVATE flatbuffers_fuzzed)
+add_executable(flexverifier_fuzzer flexbuffers_verifier_fuzzer.cc)
+target_link_libraries(flexverifier_fuzzer PRIVATE flatbuffers_fuzzed)
+
add_executable(monster_fuzzer flatbuffers_monster_fuzzer.cc)
target_link_libraries(monster_fuzzer PRIVATE flatbuffers_fuzzed)
add_custom_command(
diff --git a/tests/fuzzer/flexbuffers_verifier_fuzzer.cc b/tests/fuzzer/flexbuffers_verifier_fuzzer.cc
new file mode 100644
index 00000000..3853db83
--- /dev/null
+++ b/tests/fuzzer/flexbuffers_verifier_fuzzer.cc
@@ -0,0 +1,14 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include <stddef.h>
+#include <stdint.h>
+#include <string>
+
+#include "flatbuffers/flexbuffers.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ std::vector<bool> reuse_tracker;
+ flexbuffers::VerifyBuffer(data, size, &reuse_tracker);
+ return 0;
+}
diff --git a/tests/fuzzer/readme.md b/tests/fuzzer/readme.md
index b2c7db41..1d0b392c 100644
--- a/tests/fuzzer/readme.md
+++ b/tests/fuzzer/readme.md
@@ -10,10 +10,17 @@ For details about **libFuzzer** see: https://llvm.org/docs/LibFuzzer.html
To build and run these tests LLVM compiler (with clang frontend) and CMake should be installed before.
-The fuzzer section include three tests:
+The fuzzer section include four tests:
- `verifier_fuzzer` checks stability of deserialization engine for `Monster` schema;
- `parser_fuzzer` checks stability of schema and json parser under various inputs;
- `scalar_parser` focused on validation of the parser while parse numeric scalars in schema and/or json files;
+- `flexverifier_fuzzer` checks stability of deserialization engine for FlexBuffers only;
+
+## Build
+```sh
+cd tests/fuzzer
+CC=clang CXX=clang++ cmake . -DCMAKE_BUILD_TYPE=Debug -DUSE_ASAN=ON
+```
## Run tests with a specific locale
The grammar of the Flatbuffers library is based on printable-ASCII characters.
diff --git a/tests/monster_test_generated.h b/tests/monster_test_generated.h
index cec75234..459c5ee0 100644
--- a/tests/monster_test_generated.h
+++ b/tests/monster_test_generated.h
@@ -1680,6 +1680,7 @@ struct Monster FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
verifier.VerifyVector(testarrayofsortedstruct()) &&
VerifyOffset(verifier, VT_FLEX) &&
verifier.VerifyVector(flex()) &&
+ flexbuffers::VerifyNestedFlexBuffer(flex(), verifier) &&
VerifyOffset(verifier, VT_TEST5) &&
verifier.VerifyVector(test5()) &&
VerifyOffset(verifier, VT_VECTOR_OF_LONGS) &&
diff --git a/tests/test.cpp b/tests/test.cpp
index 17c787d5..1e6e9d3a 100644
--- a/tests/test.cpp
+++ b/tests/test.cpp
@@ -3022,6 +3022,10 @@ void FlexBuffersTest() {
#endif
// clang-format on
+ std::vector<bool> reuse_tracker;
+ TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
+ &reuse_tracker), true);
+
auto map = flexbuffers::GetRoot(slb.GetBuffer()).AsMap();
TEST_EQ(map.size(), 7);
auto vec = map["vec"].AsVector();
@@ -3079,6 +3083,8 @@ void FlexBuffersTest() {
slb.Clear();
auto jsontest = "{ a: [ 123, 456.0 ], b: \"hello\", c: true, d: false }";
TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true);
+ TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
+ &reuse_tracker), true);
auto jroot = flexbuffers::GetRoot(slb.GetBuffer());
auto jmap = jroot.AsMap();
auto jvec = jmap["a"].AsVector();
@@ -3116,6 +3122,8 @@ void FlexBuffersFloatingPointTest() {
"{ a: [1.0, nan, inf, infinity, -inf, +inf, -infinity, 8.0] }";
TEST_EQ(parser.ParseFlexBuffer(jsontest, nullptr, &slb), true);
auto jroot = flexbuffers::GetRoot(slb.GetBuffer());
+ TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
+ nullptr), true);
auto jmap = jroot.AsMap();
auto jvec = jmap["a"].AsVector();
TEST_EQ(8, jvec.size());
@@ -3159,6 +3167,9 @@ void FlexBuffersDeprecatedTest() {
// same way, the fix lies on the reading side.
slb.EndVector(start, true, false);
slb.Finish();
+ // Verify because why not.
+ TEST_EQ(flexbuffers::VerifyBuffer(slb.GetBuffer().data(), slb.GetBuffer().size(),
+ nullptr), true);
// So now lets read this data back.
// For existing data, since we have no way of knowing what the actual
// bit-width of the size field of the string is, we are going to ignore this