summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTyutyunkov Vyacheslav <tve@softmotions.com>2015-07-13 17:39:47 +0600
committerTyutyunkov Vyacheslav <tve@softmotions.com>2015-07-13 17:39:47 +0600
commit0e05906911a3e739262ceb75fba44ebe9a726fd4 (patch)
tree5e06442dbe3331a27203e0b07a66a39df24bb98c
parent5cc701a95c261840c75075a00c2908896d1f9fb8 (diff)
downloadejdb-0e05906911a3e739262ceb75fba44ebe9a726fd4.tar.gz
ejdb-0e05906911a3e739262ceb75fba44ebe9a726fd4.tar.bz2
ejdb-0e05906911a3e739262ceb75fba44ebe9a726fd4.zip
#148 - fix duplicate paths in bson
-rw-r--r--src/bson/CMakeLists.txt0
-rw-r--r--src/bson/bson.c149
-rw-r--r--src/bson/bson.h67
-rw-r--r--src/bson/bsontest.c0
-rw-r--r--src/bson/tests/CMakeLists.txt14
-rw-r--r--src/bson/tests/bsontest.c183
-rw-r--r--src/ejdb/tests/ejdbtest2.c76
7 files changed, 488 insertions, 1 deletions
diff --git a/src/bson/CMakeLists.txt b/src/bson/CMakeLists.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/bson/CMakeLists.txt
diff --git a/src/bson/bson.c b/src/bson/bson.c
index d16512f..6e9feef 100644
--- a/src/bson/bson.c
+++ b/src/bson/bson.c
@@ -1621,6 +1621,19 @@ int bson_merge_fieldpaths(const void *bsdata1, const void *bsdata2, bson *out) {
}
}
tcmapdel(mfields);
+
+ if (!out->err) {
+ // check duplicate paths
+ bson_finish(out);
+ if (bson_check_duplicate_keys(out)) {
+ bson bstmp;
+ bson_copy(&bstmp, out);
+ bson_destroy(out);
+ bson_init(out);
+ bson_fix_duplicate_keys(&bstmp, out);
+ bson_destroy(&bstmp);
+ }
+ }
return out->err;
}
@@ -1724,6 +1737,142 @@ int bson_merge_recursive(const bson *b1, const bson *b2, bson_bool_t overwrite,
return bson_merge_recursive2(bson_data(b1), bson_data(b2), overwrite, out);
}
+static bson_bool_t _bson_check_duplicate_keys(bson_iterator *it) {
+ bson_iterator it2;
+ bson_type bt, bt2;
+ while ((bt = bson_iterator_next(it)) != BSON_EOO) {
+ BSON_ITERATOR_CLONE(it, &it2);
+ while((bt2 = bson_iterator_next(&it2)) != BSON_EOO) {
+ if (!strcmp(BSON_ITERATOR_KEY(it), BSON_ITERATOR_KEY(&it2))) {
+ return true;
+ }
+ }
+ if (bt == BSON_OBJECT || bt == BSON_ARRAY) {
+ BSON_ITERATOR_SUBITERATOR(it, &it2);
+ if (_bson_check_duplicate_keys(&it2)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+bson_bool_t bson_check_duplicate_keys(const bson *bs) {
+ bson_iterator it;
+ BSON_ITERATOR_INIT(&it, bs);
+ return _bson_check_duplicate_keys(&it);
+}
+
+static void _bson_fix_duplicate_keys(bson_iterator *it, bson *bso) {
+ bson_iterator it2;
+ bson_type bt, bt2;
+
+ TCMAP *keys = tcmapnew();
+ while((bt = bson_iterator_next(it)) != BSON_EOO) {
+ if (NULL != tcmapget2(keys, BSON_ITERATOR_KEY(it))) {
+ continue;
+ }
+ tcmapput2(keys, BSON_ITERATOR_KEY(it), BSON_ITERATOR_KEY(it));
+
+ TCLIST *dups = tclistnew();
+ off_t itoff = 0;
+ tclistpush(dups, &itoff, sizeof(itoff));
+
+ BSON_ITERATOR_CLONE(it, &it2);
+ while((bt2 = bson_iterator_next(&it2)) != BSON_EOO) {
+ if (!strcmp(BSON_ITERATOR_KEY(it), BSON_ITERATOR_KEY(&it2))) {
+ bt2 = BSON_ITERATOR_TYPE(&it2);
+ if (bt != bt2 || (bt != BSON_OBJECT && bt != BSON_ARRAY)) {
+ tclistclear(dups);
+ bt = bt2;
+ }
+ itoff = it2.cur - it->cur;
+ tclistpush(dups, &itoff, sizeof(itoff));
+ }
+ }
+
+ const char *buf;
+ int bufsz;
+
+ buf = tclistval(dups, TCLISTNUM(dups) - 1, &bufsz);
+ memcpy(&itoff, buf, sizeof(itoff));
+ it2.cur = it->cur + itoff;
+ it2.first = itoff == 0 ? it->first : 0;
+
+ bt2 = BSON_ITERATOR_TYPE(&it2);
+ if (bt2 == BSON_OBJECT) {
+ bson bst;
+ bson_init(&bst);
+ int j = -1;
+ while(++j < TCLISTNUM(dups)) {
+ buf = tclistval(dups, j, &bufsz);
+ memcpy(&itoff, buf, sizeof(itoff));
+ it2.cur = it->cur + itoff;
+ it2.first = itoff == 0 ? it->first : 0;
+
+ bson_iterator sit;
+ BSON_ITERATOR_SUBITERATOR(&it2, &sit);
+ while(bson_iterator_next(&sit) != BSON_EOO){
+ bson_append_field_from_iterator(&sit, &bst);
+ }
+ }
+ bson_finish(&bst);
+
+ bson_append_start_object(bso, BSON_ITERATOR_KEY(it));
+ BSON_ITERATOR_INIT(&it2, &bst);
+ _bson_fix_duplicate_keys(&it2, bso);
+ bson_append_finish_object(bso);
+ } else if (bt2 == BSON_ARRAY) {
+ char ibuf[TCNUMBUFSIZ];
+ memset(ibuf, '\0', TCNUMBUFSIZ);
+
+ bson_append_start_array(bso, BSON_ITERATOR_KEY(it));
+ int ind = 0;
+ int j = -1;
+ while(++j < TCLISTNUM(dups)) {
+ buf = tclistval(dups, TCLISTNUM(dups) - 1, &bufsz);
+ memcpy(&itoff, buf, sizeof(itoff));
+ it2.cur = it->cur + itoff;
+ it2.first = itoff == 0 ? it->first : 0;
+
+ bson_iterator sit, sit2;
+ bson_type sbt;
+ BSON_ITERATOR_SUBITERATOR(&it2, &sit);
+ while((sbt = bson_iterator_next(&sit)) != BSON_EOO) {
+ bson_numstrn(ibuf, TCNUMBUFSIZ, ind++);
+ if (sbt == BSON_OBJECT) {
+ bson_append_start_object(bso, ibuf);
+ BSON_ITERATOR_SUBITERATOR(&sit, &sit2);
+ _bson_fix_duplicate_keys(&sit2, bso);
+ bson_append_finish_object(bso);
+ } else if(sbt == BSON_ARRAY) {
+ bson_append_start_array(bso, ibuf);
+ BSON_ITERATOR_SUBITERATOR(&sit, &sit2);
+ _bson_fix_duplicate_keys(&sit2, bso);
+ bson_append_finish_array(bso);
+ } else {
+ bson_append_field_from_iterator2(ibuf, &sit, bso);
+ }
+ }
+ }
+ bson_append_finish_array(bso);
+ } else {
+ bson_append_field_from_iterator(&it2, bso);
+ }
+ tclistdel(dups);
+ }
+ tcmapdel(keys);
+}
+
+void bson_fix_duplicate_keys(const bson *bsi, bson *bso) {
+ bson_iterator it;
+
+ BSON_ITERATOR_INIT(&it, bsi);
+ _bson_fix_duplicate_keys(&it, bso);
+}
+
+
typedef struct {
int nstack; //nested object stack pos
int matched; //number of matched include fields
diff --git a/src/bson/bson.h b/src/bson/bson.h
index f7403f7..662af64 100644
--- a/src/bson/bson.h
+++ b/src/bson/bson.h
@@ -148,6 +148,9 @@ EJDB_EXPORT const char* bson_first_errormsg(bson *bson);
(_bs_I)->cur = (_bs)->data + 4; \
(_bs_I)->first = 1;
+#define BSON_ITERATOR_CLONE(_bs_I_S, _bs_I_T) \
+ (_bs_I_T)->cur = (_bs_I_S)->cur; \
+ (_bs_I_T)->first = (_bs_I_S)->first;
/* --------------------------------
READING
@@ -1008,6 +1011,70 @@ EJDB_EXPORT int bson_append_finish_object(bson *b);
*/
EJDB_EXPORT int bson_append_finish_array(bson *b);
+EJDB_EXPORT int bson_merge_recursive(const bson *b1, const bson *b2, bson_bool_t overwrite, bson *out);
+
+/**
+ * Check duplicate keys
+ * @return true if bson contains duplicate keys
+ */
+EJDB_EXPORT bson_bool_t bson_check_duplicate_keys(const bson *bs);
+
+/**
+ * Remove duplicate keys from bson:
+ * - merge objects and arrays with same key:
+ * { a : { b : "value 1" }, a : { c : "value 2" } } -> { a : { b : "value 1", c : "value 2" } }
+ * - keep last value for non object and non array values
+ * { a : "value 1", a : "value 2" } -> { a : "value 2" }
+ *
+ * Example:
+ * {
+ * a : {
+ * b : 1,
+ * c : "c"
+ * },
+ * b : NULL,
+ * c : [
+ * {
+ * a : 1,
+ * b : 2,
+ * a : 0
+ * },
+ * {
+ * a : 0,
+ * b : 1,
+ * c : 3
+ * }
+ * ],
+ * a : {
+ * d : 0,
+ * c : 1
+ * }
+ * }
+ *
+ * =>
+ *
+ * {
+ * a : {
+ * b : 1,
+ * c : 1,
+ * d : 0
+ * },
+ * b : NULL,
+ * c : [
+ * {
+ * a : 0,
+ * b : 2
+ * },
+ * {
+ * a : 0,
+ b : 1,
+ c : 3
+ * }
+ * ]
+ * }
+ */
+EJDB_EXPORT void bson_fix_duplicate_keys(const bson *bsi, bson *bso);
+
EJDB_EXPORT void bson_numstr(char *str, int64_t i);
EJDB_EXPORT int bson_numstrn(char *str, int maxbuf, int64_t i);
diff --git a/src/bson/bsontest.c b/src/bson/bsontest.c
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/bson/bsontest.c
diff --git a/src/bson/tests/CMakeLists.txt b/src/bson/tests/CMakeLists.txt
new file mode 100644
index 0000000..2096305
--- /dev/null
+++ b/src/bson/tests/CMakeLists.txt
@@ -0,0 +1,14 @@
+link_libraries(ejdb_p ${CUNIT_LIBRARIES})
+include_directories(${CUNIT_INCLUDE_DIRS})
+
+set(TEST_DATA_DIR ${CMAKE_CURRENT_BINARY_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${TEST_DATA_DIR})
+
+add_executable(bsontest bsontest.c)
+set_target_properties(bsontest
+ PROPERTIES
+ COMPILE_FLAGS "-DEJDB_STATIC")
+add_test(NAME bsontest
+ WORKING_DIRECTORY ${TEST_DATA_DIR}
+ COMMAND ${TEST_TOOL_CMD} $<TARGET_FILE:bsontest>)
+
diff --git a/src/bson/tests/bsontest.c b/src/bson/tests/bsontest.c
new file mode 100644
index 0000000..935c3da
--- /dev/null
+++ b/src/bson/tests/bsontest.c
@@ -0,0 +1,183 @@
+#include "myconf.h"
+#include "bson.h"
+#include "CUnit/Basic.h"
+
+
+/*
+ * CUnit Test Suite
+ */
+
+int init_suite(void) {
+ return 0;
+}
+
+int clean_suite(void) {
+ return 0;
+}
+
+void testCheckDuplicates(void) {
+ bson bs, bs2;
+ bson_iterator it;
+ bson_type bt;
+
+ bson_init(&bs);
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "b", 2);
+ bson_append_null(&bs, "c");
+ bson_append_start_object(&bs, "d");
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "e", 0);
+ bson_append_int(&bs, "d", 1);
+ bson_append_finish_object(&bs);
+ bson_finish(&bs);
+ CU_ASSERT_FALSE_FATAL(bs.err);
+
+ CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs));
+
+ bson_destroy(&bs);
+
+ bson_init(&bs);
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "b", 2);
+ bson_append_null(&bs, "c");
+ bson_append_start_object(&bs, "d");
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "e", 0);
+ bson_append_int(&bs, "e", 1);
+ bson_append_finish_object(&bs);
+ bson_finish(&bs);
+ CU_ASSERT_FALSE_FATAL(bs.err);
+
+ CU_ASSERT_TRUE(bson_check_duplicate_keys(&bs));
+
+ bson_init(&bs2);
+ bson_fix_duplicate_keys(&bs, &bs2);
+ bson_finish(&bs2);
+
+ CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs2));
+ BSON_ITERATOR_INIT(&it, &bs2);
+ bt = bson_find_fieldpath_value("d.e", &it);
+ CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt));
+ CU_ASSERT_EQUAL(bson_iterator_int(&it), 1);
+
+ bson_destroy(&bs2);
+
+ bson_init(&bs);
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "b", 2);
+ bson_append_null(&bs, "c");
+ bson_append_start_object(&bs, "d");
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "e", 0);
+ bson_append_int(&bs, "d", 1);
+ bson_append_finish_object(&bs);
+ bson_append_start_array(&bs, "f");
+ bson_append_start_object(&bs, "0");
+ bson_append_string(&bs, "a", "a");
+ bson_append_string(&bs, "b", "b");
+ bson_append_int(&bs, "c", 1);
+ bson_append_finish_object(&bs);
+ bson_append_start_object(&bs, "1");
+ bson_append_string(&bs, "a", "a");
+ bson_append_string(&bs, "b", "b");
+ bson_append_int(&bs, "c", 1);
+ bson_append_finish_object(&bs);
+ bson_append_finish_array(&bs);
+ bson_finish(&bs);
+ CU_ASSERT_FALSE_FATAL(bs.err);
+
+ CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs));
+
+ bson_init(&bs);
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "b", 2);
+ bson_append_null(&bs, "c");
+ bson_append_start_object(&bs, "d");
+ bson_append_string(&bs, "a", "a");
+ bson_append_int(&bs, "e", 0);
+ bson_append_int(&bs, "d", 1);
+ bson_append_start_object(&bs, "q");
+ bson_append_int(&bs, "w", 0);
+ bson_append_finish_object(&bs);
+ bson_append_finish_object(&bs);
+ bson_append_start_array(&bs, "f");
+ bson_append_start_object(&bs, "0");
+ bson_append_string(&bs, "a", "a");
+ bson_append_string(&bs, "b", "b");
+ bson_append_int(&bs, "a", 1);
+ bson_append_finish_object(&bs);
+ bson_append_start_object(&bs, "1");
+ bson_append_string(&bs, "a", "a");
+ bson_append_string(&bs, "b", "b");
+ bson_append_int(&bs, "c", 1);
+ bson_append_finish_object(&bs);
+ bson_append_finish_array(&bs);
+ bson_append_start_object(&bs, "a");
+ bson_append_finish_object(&bs);
+ bson_append_start_object(&bs, "d");
+ bson_append_start_object(&bs, "q");
+ bson_append_int(&bs, "e", 1);
+ bson_append_finish_object(&bs);
+ bson_append_finish_object(&bs);
+ bson_finish(&bs);
+ CU_ASSERT_FALSE_FATAL(bs.err);
+
+ CU_ASSERT_TRUE(bson_check_duplicate_keys(&bs));
+
+ bson_init(&bs2);
+ bson_fix_duplicate_keys(&bs, &bs2);
+ bson_finish(&bs2);
+
+ CU_ASSERT_FALSE(bson_check_duplicate_keys(&bs2));
+ BSON_ITERATOR_INIT(&it, &bs2);
+ bt = bson_find_fieldpath_value("f.0.a", &it);
+ CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt));
+ CU_ASSERT_EQUAL(bson_iterator_int(&it), 1);
+ BSON_ITERATOR_INIT(&it, &bs2);
+ bt = bson_find_fieldpath_value("f.1.a", &it);
+ CU_ASSERT_TRUE(BSON_IS_STRING_TYPE(bt));
+ CU_ASSERT_FALSE(strcmp(bson_iterator_string(&it), "a"));
+
+ BSON_ITERATOR_INIT(&it, &bs2);
+ bt = bson_find_fieldpath_value("a", &it);
+ CU_ASSERT_EQUAL(bt, BSON_OBJECT);
+
+ BSON_ITERATOR_INIT(&it, &bs2);
+ bt = bson_find_fieldpath_value("d.q.w", &it);
+ CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt));
+ CU_ASSERT_EQUAL(bson_iterator_int(&it), 0);
+ BSON_ITERATOR_INIT(&it, &bs2);
+ bt = bson_find_fieldpath_value("d.q.e", &it);
+ CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt));
+ CU_ASSERT_EQUAL(bson_iterator_int(&it), 1);
+}
+
+int main() {
+ setlocale(LC_ALL, "en_US.UTF-8");
+ CU_pSuite pSuite = NULL;
+
+ /* Initialize the CUnit test registry */
+ if (CUE_SUCCESS != CU_initialize_registry())
+ return CU_get_error();
+
+ /* Add a suite to the registry */
+ pSuite = CU_add_suite("bsontest", init_suite, clean_suite);
+ if (NULL == pSuite) {
+ CU_cleanup_registry();
+ return CU_get_error();
+ }
+
+ /* Add the tests to the suite */
+ if ((NULL == CU_add_test(pSuite, "testCheckDuplicates", testCheckDuplicates))
+ ) {
+ CU_cleanup_registry();
+ return CU_get_error();
+ }
+
+ /* Run all tests using the CUnit Basic interface */
+ CU_basic_set_mode(CU_BRM_VERBOSE);
+ CU_basic_run_tests();
+ int ret = CU_get_error() || CU_get_number_of_failures();
+ CU_cleanup_registry();
+ return ret;
+}
diff --git a/src/ejdb/tests/ejdbtest2.c b/src/ejdb/tests/ejdbtest2.c
index e46fe48..51e0c14 100644
--- a/src/ejdb/tests/ejdbtest2.c
+++ b/src/ejdb/tests/ejdbtest2.c
@@ -6030,6 +6030,79 @@ void testTicket117(void) {
tcxstrdel(log);
}
+void testTicket148(void) {
+ EJCOLL *coll = ejdbcreatecoll(jb, "ticket148", NULL);
+ CU_ASSERT_PTR_NOT_NULL_FATAL(coll);
+
+ bson bs;
+ bson_oid_t oid;
+
+ bson_init(&bs);
+ bson_finish(&bs);
+ CU_ASSERT_FALSE_FATAL(bs.err);
+ CU_ASSERT_TRUE_FATAL(ejdbsavebson(coll, &bs, &oid));
+ bson_destroy(&bs);
+
+ bson bsq;
+ bson_init_as_query(&bsq);
+ bson_append_start_object(&bsq, "$set");
+ bson_append_int(&bsq, "info.name.par.age", 40);
+ bson_append_int(&bsq, "info.name.mot.age", 35);
+ bson_append_finish_object(&bsq);
+ bson_finish(&bsq);
+ CU_ASSERT_FALSE_FATAL(bsq.err);
+
+ uint32_t count = ejdbupdate(coll, &bsq, 0, 0, 0, 0);
+ bson_destroy(&bsq);
+ CU_ASSERT_EQUAL(count, 1);
+
+ bson_init_as_query(&bsq);
+ bson_finish(&bsq);
+ EJQ *q1 = ejdbcreatequery(jb, &bsq, NULL, 0, NULL);
+ bson_destroy(&bsq);
+ CU_ASSERT_PTR_NOT_NULL_FATAL(q1);
+ TCLIST *q1res = ejdbqryexecute(coll, q1, &count, 0, NULL);
+ CU_ASSERT_EQUAL(TCLISTNUM(q1res), 1);
+
+ void *bsdata = TCLISTVALPTR(q1res, 0);
+ CU_ASSERT_PTR_NOT_NULL_FATAL(bsdata);
+
+ bson_iterator it, sit;
+ bson_type bt;
+ BSON_ITERATOR_FROM_BUFFER(&it, bsdata);
+ while((bt = bson_iterator_next(&it)) != BSON_EOO) {
+ if (bt == BSON_OID) {
+ continue;
+ }
+ break;
+ }
+ CU_ASSERT_EQUAL(bt, BSON_OBJECT);
+ CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&it), "info"));
+
+ BSON_ITERATOR_SUBITERATOR(&it, &sit);
+ bt = bson_iterator_next(&sit);
+ CU_ASSERT_EQUAL(bt, BSON_OBJECT);
+ CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&sit), "name"));
+
+ BSON_ITERATOR_SUBITERATOR(&sit, &sit);
+ bt = bson_iterator_next(&sit);
+ CU_ASSERT_EQUAL(bt, BSON_OBJECT);
+ CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&sit) , "par") && strcmp(BSON_ITERATOR_KEY(&sit) , "mot"));
+ bt = bson_iterator_next(&sit);
+ CU_ASSERT_EQUAL(bt, BSON_OBJECT);
+ CU_ASSERT_FALSE(strcmp(BSON_ITERATOR_KEY(&sit) , "par") && strcmp(BSON_ITERATOR_KEY(&sit) , "mot"));
+
+ BSON_ITERATOR_FROM_BUFFER(&it, bsdata);
+ bt = bson_find_fieldpath_value("info.name.par.age", &it);
+ CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt));
+ CU_ASSERT_EQUAL(bson_iterator_int(&it), 40);
+
+ BSON_ITERATOR_FROM_BUFFER(&it, bsdata);
+ bt = bson_find_fieldpath_value("info.name.mot.age", &it);
+ CU_ASSERT_TRUE(BSON_IS_NUM_TYPE(bt));
+ CU_ASSERT_EQUAL(bson_iterator_int(&it), 35);
+}
+
int main() {
setlocale(LC_ALL, "en_US.UTF-8");
@@ -6124,7 +6197,8 @@ int main() {
(NULL == CU_add_test(pSuite, "testDistinct", testDistinct)) ||
(NULL == CU_add_test(pSuite, "testSlice", testSlice)) ||
(NULL == CU_add_test(pSuite, "testTicket117", testTicket117)) ||
- (NULL == CU_add_test(pSuite, "testMetaInfo", testMetaInfo))
+ (NULL == CU_add_test(pSuite, "testMetaInfo", testMetaInfo)) ||
+ (NULL == CU_add_test(pSuite, "testTicket148", testTicket148))
) {
CU_cleanup_registry();
return CU_get_error();