diff options
author | Tyutyunkov Vyacheslav <tve@softmotions.com> | 2015-07-13 17:39:47 +0600 |
---|---|---|
committer | Tyutyunkov Vyacheslav <tve@softmotions.com> | 2015-07-13 17:39:47 +0600 |
commit | 0e05906911a3e739262ceb75fba44ebe9a726fd4 (patch) | |
tree | 5e06442dbe3331a27203e0b07a66a39df24bb98c | |
parent | 5cc701a95c261840c75075a00c2908896d1f9fb8 (diff) | |
download | ejdb-0e05906911a3e739262ceb75fba44ebe9a726fd4.tar.gz ejdb-0e05906911a3e739262ceb75fba44ebe9a726fd4.tar.bz2 ejdb-0e05906911a3e739262ceb75fba44ebe9a726fd4.zip |
#148 - fix duplicate paths in bson
-rw-r--r-- | src/bson/CMakeLists.txt | 0 | ||||
-rw-r--r-- | src/bson/bson.c | 149 | ||||
-rw-r--r-- | src/bson/bson.h | 67 | ||||
-rw-r--r-- | src/bson/bsontest.c | 0 | ||||
-rw-r--r-- | src/bson/tests/CMakeLists.txt | 14 | ||||
-rw-r--r-- | src/bson/tests/bsontest.c | 183 | ||||
-rw-r--r-- | src/ejdb/tests/ejdbtest2.c | 76 |
7 files changed, 488 insertions, 1 deletions
diff --git a/src/bson/CMakeLists.txt b/src/bson/CMakeLists.txt new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/bson/CMakeLists.txt diff --git a/src/bson/bson.c b/src/bson/bson.c index d16512f..6e9feef 100644 --- a/src/bson/bson.c +++ b/src/bson/bson.c @@ -1621,6 +1621,19 @@ int bson_merge_fieldpaths(const void *bsdata1, const void *bsdata2, bson *out) { } } tcmapdel(mfields); + + if (!out->err) { + // check duplicate paths + bson_finish(out); + if (bson_check_duplicate_keys(out)) { + bson bstmp; + bson_copy(&bstmp, out); + bson_destroy(out); + bson_init(out); + bson_fix_duplicate_keys(&bstmp, out); + bson_destroy(&bstmp); + } + } return out->err; } @@ -1724,6 +1737,142 @@ int bson_merge_recursive(const bson *b1, const bson *b2, bson_bool_t overwrite, return bson_merge_recursive2(bson_data(b1), bson_data(b2), overwrite, out); } +static bson_bool_t _bson_check_duplicate_keys(bson_iterator *it) { + bson_iterator it2; + bson_type bt, bt2; + while ((bt = bson_iterator_next(it)) != BSON_EOO) { + BSON_ITERATOR_CLONE(it, &it2); + while((bt2 = bson_iterator_next(&it2)) != BSON_EOO) { + if (!strcmp(BSON_ITERATOR_KEY(it), BSON_ITERATOR_KEY(&it2))) { + return true; + } + } + if (bt == BSON_OBJECT || bt == BSON_ARRAY) { + BSON_ITERATOR_SUBITERATOR(it, &it2); + if (_bson_check_duplicate_keys(&it2)) { + return true; + } + } + } + + return false; +} + +bson_bool_t bson_check_duplicate_keys(const bson *bs) { + bson_iterator it; + BSON_ITERATOR_INIT(&it, bs); + return _bson_check_duplicate_keys(&it); +} + +static void _bson_fix_duplicate_keys(bson_iterator *it, bson *bso) { + bson_iterator it2; + bson_type bt, bt2; + + TCMAP *keys = tcmapnew(); + while((bt = bson_iterator_next(it)) != BSON_EOO) { + if (NULL != tcmapget2(keys, BSON_ITERATOR_KEY(it))) { + continue; + } + tcmapput2(keys, BSON_ITERATOR_KEY(it), BSON_ITERATOR_KEY(it)); + + TCLIST *dups = tclistnew(); + off_t itoff = 0; + tclistpush(dups, &itoff, sizeof(itoff)); + + BSON_ITERATOR_CLONE(it, &it2); + while((bt2 = bson_iterator_next(&it2)) != BSON_EOO) { + if (!strcmp(BSON_ITERATOR_KEY(it), BSON_ITERATOR_KEY(&it2))) { + bt2 = BSON_ITERATOR_TYPE(&it2); + if (bt != bt2 || (bt != BSON_OBJECT && bt != BSON_ARRAY)) { + tclistclear(dups); + bt = bt2; + } + itoff = it2.cur - it->cur; + tclistpush(dups, &itoff, sizeof(itoff)); + } + } + + const char *buf; + int bufsz; + + buf = tclistval(dups, TCLISTNUM(dups) - 1, &bufsz); + memcpy(&itoff, buf, sizeof(itoff)); + it2.cur = it->cur + itoff; + it2.first = itoff == 0 ? it->first : 0; + + bt2 = BSON_ITERATOR_TYPE(&it2); + if (bt2 == BSON_OBJECT) { + bson bst; + bson_init(&bst); + int j = -1; + while(++j < TCLISTNUM(dups)) { + buf = tclistval(dups, j, &bufsz); + memcpy(&itoff, buf, sizeof(itoff)); + it2.cur = it->cur + itoff; + it2.first = itoff == 0 ? it->first : 0; + + bson_iterator sit; + BSON_ITERATOR_SUBITERATOR(&it2, &sit); + while(bson_iterator_next(&sit) != BSON_EOO){ + bson_append_field_from_iterator(&sit, &bst); + } + } + bson_finish(&bst); + + bson_append_start_object(bso, BSON_ITERATOR_KEY(it)); + BSON_ITERATOR_INIT(&it2, &bst); + _bson_fix_duplicate_keys(&it2, bso); + bson_append_finish_object(bso); + } else if (bt2 == BSON_ARRAY) { + char ibuf[TCNUMBUFSIZ]; + memset(ibuf, '\0', TCNUMBUFSIZ); + + bson_append_start_array(bso, BSON_ITERATOR_KEY(it)); + int ind = 0; + int j = -1; + while(++j < TCLISTNUM(dups)) { + buf = tclistval(dups, TCLISTNUM(dups) - 1, &bufsz); + memcpy(&itoff, buf, sizeof(itoff)); + it2.cur = it->cur + itoff; + it2.first = itoff == 0 ? it->first : 0; + + bson_iterator sit, sit2; + bson_type sbt; + BSON_ITERATOR_SUBITERATOR(&it2, &sit); + while((sbt = bson_iterator_next(&sit)) != BSON_EOO) { + bson_numstrn(ibuf, TCNUMBUFSIZ, ind++); + if (sbt == BSON_OBJECT) { + bson_append_start_object(bso, ibuf); + BSON_ITERATOR_SUBITERATOR(&sit, &sit2); + _bson_fix_duplicate_keys(&sit2, bso); + bson_append_finish_object(bso); + } else if(sbt == BSON_ARRAY) { + bson_append_start_array(bso, ibuf); + BSON_ITERATOR_SUBITERATOR(&sit, &sit2); + _bson_fix_duplicate_keys(&sit2, bso); + bson_append_finish_array(bso); + } else { + bson_append_field_from_iterator2(ibuf, &sit, bso); + } + } + } + bson_append_finish_array(bso); + } else { + bson_append_field_from_iterator(&it2, bso); + } + tclistdel(dups); + } + tcmapdel(keys); +} + +void bson_fix_duplicate_keys(const bson *bsi, bson *bso) { + bson_iterator it; + + BSON_ITERATOR_INIT(&it, bsi); + _bson_fix_duplicate_keys(&it, bso); +} + + typedef struct { int nstack; //nested object stack pos int matched; //number of matched include fields diff --git a/src/bson/bson.h b/src/bson/bson.h index f7403f7..662af64 100644 --- a/src/bson/bson.h +++ b/src/bson/bson.h @@ -148,6 +148,9 @@ EJDB_EXPORT const char* bson_first_errormsg(bson *bson); (_bs_I)->cur = (_bs)->data + 4; \ (_bs_I)->first = 1; +#define BSON_ITERATOR_CLONE(_bs_I_S, _bs_I_T) \ + (_bs_I_T)->cur = (_bs_I_S)->cur; \ + (_bs_I_T)->first = (_bs_I_S)->first; /* -------------------------------- READING @@ -1008,6 +1011,70 @@ EJDB_EXPORT int bson_append_finish_object(bson *b); */ EJDB_EXPORT int bson_append_finish_array(bson *b); +EJDB_EXPORT int bson_merge_recursive(const bson *b1, const bson *b2, bson_bool_t overwrite, bson *out); + +/** + * Check duplicate keys + * @return true if bson contains duplicate keys + */ +EJDB_EXPORT bson_bool_t bson_check_duplicate_keys(const bson *bs); + +/** + * Remove duplicate keys from bson: + * - merge objects and arrays with same key: + * { a : { b : "value 1" }, a : { c : "value 2" } } -> { a : { b : "value 1", c : "value 2" } } + * - keep last value for non object and non array values + * { a : "value 1", a : "value 2" } -> { a : "value 2" } + * + * Example: + * { + * a : { + * b : 1, + * c : "c" + * }, + * b : NULL, + * c : [ + * { + * a : 1, + * b : 2, + * a : 0 + * }, + * { + * a : 0, + * b : 1, + * c : 3 + * } + * ], + * a : { + * d : 0, + * c : 1 + * } + * } + * + * => + * + * { + * a : { + * b : 1, + * c : 1, + * d : 0 + * }, + * b : NULL, + * c : [ + * { + * a : 0, + * b : 2 + * }, + * { + * a : 0, + b : 1, + c : 3 + * } + * ] + * } + */ +EJDB_EXPORT void bson_fix_duplicate_keys(const bson *bsi, bson *bso); + EJDB_EXPORT void bson_numstr(char *str, int64_t i); EJDB_EXPORT int bson_numstrn(char *str, int maxbuf, int64_t i); diff --git a/src/bson/bsontest.c b/src/bson/bsontest.c new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/bson/bsontest.c diff --git a/src/bson/tests/CMakeLists.txt b/src/bson/tests/CMakeLists.txt new file mode 100644 index 0000000..2096305 --- /dev/null +++ b/src/bson/tests/CMakeLists.txt @@ -0,0 +1,14 @@ +link_libraries(ejdb_p ${CUNIT_LIBRARIES}) +include_directories(${CUNIT_INCLUDE_DIRS}) + +set(TEST_DATA_DIR ${CMAKE_CURRENT_BINARY_DIR}) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${TEST_DATA_DIR}) + +add_executable(bsontest bsontest.c) +set_target_properties(bsontest + PROPERTIES + COMPILE_FLAGS "-DEJDB_STATIC") +add_test(NAME bsontest + WORKING_DIRECTORY ${TEST_DATA_DIR} + COMMAND ${TEST_TOOL_CMD} $<TARGET_FILE:bsontest>) + diff --git a/src/bson/tests/bsontest.c b/src/bson/tests/bsontest.c new file mode 100644 index 0000000..935c3da --- /dev/null +++ b/src/bson/tests/bsontest.c @@ -0,0 +1,183 @@ +#include "myconf.h" +#include "bson.h" +#include "CUnit/Basic.h" + + +/* + * CUnit Test Suite + */ + +int init_suite(void) { + return 0; +} + +int clean_suite(void) { + return 0; +} + +void testCheckDuplicates(void) { + bson bs, bs2; + bson_iterator it; + bson_type bt; + + bson_init(&bs); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "b", 2); + bson_append_null(&bs, "c"); + bson_append_start_object(&bs, "d"); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "e", 0); + bson_append_int(&bs, "d", 1); + bson_append_finish_object(&bs); + bson_finish(&bs); + CU_ASSERT_FALSE_FATAL(bs.err); + + CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs)); + + bson_destroy(&bs); + + bson_init(&bs); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "b", 2); + bson_append_null(&bs, "c"); + bson_append_start_object(&bs, "d"); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "e", 0); + bson_append_int(&bs, "e", 1); + bson_append_finish_object(&bs); + bson_finish(&bs); + CU_ASSERT_FALSE_FATAL(bs.err); + + CU_ASSERT_TRUE(bson_check_duplicate_keys(&bs)); + + bson_init(&bs2); + bson_fix_duplicate_keys(&bs, &bs2); + bson_finish(&bs2); + + CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs2)); + BSON_ITERATOR_INIT(&it, &bs2); + bt = bson_find_fieldpath_value("d.e", &it); + CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt)); + CU_ASSERT_EQUAL(bson_iterator_int(&it), 1); + + bson_destroy(&bs2); + + bson_init(&bs); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "b", 2); + bson_append_null(&bs, "c"); + bson_append_start_object(&bs, "d"); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "e", 0); + bson_append_int(&bs, "d", 1); + bson_append_finish_object(&bs); + bson_append_start_array(&bs, "f"); + bson_append_start_object(&bs, "0"); + bson_append_string(&bs, "a", "a"); + bson_append_string(&bs, "b", "b"); + bson_append_int(&bs, "c", 1); + bson_append_finish_object(&bs); + bson_append_start_object(&bs, "1"); + bson_append_string(&bs, "a", "a"); + bson_append_string(&bs, "b", "b"); + bson_append_int(&bs, "c", 1); + bson_append_finish_object(&bs); + bson_append_finish_array(&bs); + bson_finish(&bs); + CU_ASSERT_FALSE_FATAL(bs.err); + + CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs)); + + bson_init(&bs); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "b", 2); + bson_append_null(&bs, "c"); + bson_append_start_object(&bs, "d"); + bson_append_string(&bs, "a", "a"); + bson_append_int(&bs, "e", 0); + bson_append_int(&bs, "d", 1); + bson_append_start_object(&bs, "q"); + bson_append_int(&bs, "w", 0); + bson_append_finish_object(&bs); + bson_append_finish_object(&bs); + bson_append_start_array(&bs, "f"); + bson_append_start_object(&bs, "0"); + bson_append_string(&bs, "a", "a"); + bson_append_string(&bs, "b", "b"); + bson_append_int(&bs, "a", 1); + bson_append_finish_object(&bs); + bson_append_start_object(&bs, "1"); + bson_append_string(&bs, "a", "a"); + bson_append_string(&bs, "b", "b"); + bson_append_int(&bs, "c", 1); + bson_append_finish_object(&bs); + bson_append_finish_array(&bs); + bson_append_start_object(&bs, "a"); + bson_append_finish_object(&bs); + bson_append_start_object(&bs, "d"); + bson_append_start_object(&bs, "q"); + bson_append_int(&bs, "e", 1); + bson_append_finish_object(&bs); + bson_append_finish_object(&bs); + bson_finish(&bs); + CU_ASSERT_FALSE_FATAL(bs.err); + + CU_ASSERT_TRUE(bson_check_duplicate_keys(&bs)); + + bson_init(&bs2); + bson_fix_duplicate_keys(&bs, &bs2); + bson_finish(&bs2); + + CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs2)); + BSON_ITERATOR_INIT(&it, &bs2); + bt = bson_find_fieldpath_value("f.0.a", &it); + CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt)); + CU_ASSERT_EQUAL(bson_iterator_int(&it), 1); + BSON_ITERATOR_INIT(&it, &bs2); + bt = bson_find_fieldpath_value("f.1.a", &it); + CU_ASSERT_TRUE(BSON_IS_STRING_TYPE(bt)); + CU_ASSERT_FALSE(strcmp(bson_iterator_string(&it), "a")); + + BSON_ITERATOR_INIT(&it, &bs2); + bt = bson_find_fieldpath_value("a", &it); + CU_ASSERT_EQUAL(bt, BSON_OBJECT); + + BSON_ITERATOR_INIT(&it, &bs2); + bt = bson_find_fieldpath_value("d.q.w", &it); + CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt)); + CU_ASSERT_EQUAL(bson_iterator_int(&it), 0); + BSON_ITERATOR_INIT(&it, &bs2); + bt = bson_find_fieldpath_value("d.q.e", &it); + CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt)); + CU_ASSERT_EQUAL(bson_iterator_int(&it), 1); +} + +int main() { + setlocale(LC_ALL, "en_US.UTF-8"); + CU_pSuite pSuite = NULL; + + /* Initialize the CUnit test registry */ + if (CUE_SUCCESS != CU_initialize_registry()) + return CU_get_error(); + + /* Add a suite to the registry */ + pSuite = CU_add_suite("bsontest", init_suite, clean_suite); + if (NULL == pSuite) { + CU_cleanup_registry(); + return CU_get_error(); + } + + /* Add the tests to the suite */ + if ((NULL == CU_add_test(pSuite, "testCheckDuplicates", testCheckDuplicates)) + ) { + CU_cleanup_registry(); + return CU_get_error(); + } + + /* Run all tests using the CUnit Basic interface */ + CU_basic_set_mode(CU_BRM_VERBOSE); + CU_basic_run_tests(); + int ret = CU_get_error() || CU_get_number_of_failures(); + CU_cleanup_registry(); + return ret; +} diff --git a/src/ejdb/tests/ejdbtest2.c b/src/ejdb/tests/ejdbtest2.c index e46fe48..51e0c14 100644 --- a/src/ejdb/tests/ejdbtest2.c +++ b/src/ejdb/tests/ejdbtest2.c @@ -6030,6 +6030,79 @@ void testTicket117(void) { tcxstrdel(log); } +void testTicket148(void) { + EJCOLL *coll = ejdbcreatecoll(jb, "ticket148", NULL); + CU_ASSERT_PTR_NOT_NULL_FATAL(coll); + + bson bs; + bson_oid_t oid; + + bson_init(&bs); + bson_finish(&bs); + CU_ASSERT_FALSE_FATAL(bs.err); + CU_ASSERT_TRUE_FATAL(ejdbsavebson(coll, &bs, &oid)); + bson_destroy(&bs); + + bson bsq; + bson_init_as_query(&bsq); + bson_append_start_object(&bsq, "$set"); + bson_append_int(&bsq, "info.name.par.age", 40); + bson_append_int(&bsq, "info.name.mot.age", 35); + bson_append_finish_object(&bsq); + bson_finish(&bsq); + CU_ASSERT_FALSE_FATAL(bsq.err); + + uint32_t count = ejdbupdate(coll, &bsq, 0, 0, 0, 0); + bson_destroy(&bsq); + CU_ASSERT_EQUAL(count, 1); + + bson_init_as_query(&bsq); + bson_finish(&bsq); + EJQ *q1 = ejdbcreatequery(jb, &bsq, NULL, 0, NULL); + bson_destroy(&bsq); + CU_ASSERT_PTR_NOT_NULL_FATAL(q1); + TCLIST *q1res = ejdbqryexecute(coll, q1, &count, 0, NULL); + CU_ASSERT_EQUAL(TCLISTNUM(q1res), 1); + + void *bsdata = TCLISTVALPTR(q1res, 0); + CU_ASSERT_PTR_NOT_NULL_FATAL(bsdata); + + bson_iterator it, sit; + bson_type bt; + BSON_ITERATOR_FROM_BUFFER(&it, bsdata); + while((bt = bson_iterator_next(&it)) != BSON_EOO) { + if (bt == BSON_OID) { + continue; + } + break; + } + CU_ASSERT_EQUAL(bt, BSON_OBJECT); + CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&it), "info")); + + BSON_ITERATOR_SUBITERATOR(&it, &sit); + bt = bson_iterator_next(&sit); + CU_ASSERT_EQUAL(bt, BSON_OBJECT); + CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&sit), "name")); + + BSON_ITERATOR_SUBITERATOR(&sit, &sit); + bt = bson_iterator_next(&sit); + CU_ASSERT_EQUAL(bt, BSON_OBJECT); + CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&sit) , "par") && strcmp(BSON_ITERATOR_KEY(&sit) , "mot")); + bt = bson_iterator_next(&sit); + CU_ASSERT_EQUAL(bt, BSON_OBJECT); + CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&sit) , "par") && strcmp(BSON_ITERATOR_KEY(&sit) , "mot")); + + BSON_ITERATOR_FROM_BUFFER(&it, bsdata); + bt = bson_find_fieldpath_value("info.name.par.age", &it); + CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt)); + CU_ASSERT_EQUAL(bson_iterator_int(&it), 40); + + BSON_ITERATOR_FROM_BUFFER(&it, bsdata); + bt = bson_find_fieldpath_value("info.name.mot.age", &it); + CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt)); + CU_ASSERT_EQUAL(bson_iterator_int(&it), 35); +} + int main() { setlocale(LC_ALL, "en_US.UTF-8"); @@ -6124,7 +6197,8 @@ int main() { (NULL == CU_add_test(pSuite, "testDistinct", testDistinct)) || (NULL == CU_add_test(pSuite, "testSlice", testSlice)) || (NULL == CU_add_test(pSuite, "testTicket117", testTicket117)) || - (NULL == CU_add_test(pSuite, "testMetaInfo", testMetaInfo)) + (NULL == CU_add_test(pSuite, "testMetaInfo", testMetaInfo)) || + (NULL == CU_add_test(pSuite, "testTicket148", testTicket148)) ) { CU_cleanup_registry(); return CU_get_error(); |