summaryrefslogtreecommitdiff
path: root/testing
diff options
context:
space:
mode:
Diffstat (limited to 'testing')
-rw-r--r--testing/Makefile8
-rw-r--r--testing/checksum_test.cc756
-rw-r--r--testing/checksum_test_c.c174
-rw-r--r--testing/cmp.h53
-rw-r--r--testing/delta.h73
-rw-r--r--testing/file.h385
-rw-r--r--testing/modify.h385
-rw-r--r--testing/random.h143
-rw-r--r--testing/regtest.cc1306
-rw-r--r--testing/regtest_c.c2
-rwxr-xr-xtesting/run_release.sh2
-rw-r--r--testing/segment.h98
-rw-r--r--testing/sizes.h111
-rw-r--r--testing/test.h70
-rwxr-xr-xtesting/xdelta3-regtest.py1273
-rwxr-xr-xtesting/xdelta3-test.py155
16 files changed, 4994 insertions, 0 deletions
diff --git a/testing/Makefile b/testing/Makefile
new file mode 100644
index 0000000..d0b9c9e
--- /dev/null
+++ b/testing/Makefile
@@ -0,0 +1,8 @@
+all:
+ (cd .. && make all)
+
+xdelta3regtest:
+ (cd .. && make xdelta3regtest)
+
+xdelta3checksum:
+ (cd .. && make xdelta3checksum)
diff --git a/testing/checksum_test.cc b/testing/checksum_test.cc
new file mode 100644
index 0000000..9418d24
--- /dev/null
+++ b/testing/checksum_test.cc
@@ -0,0 +1,756 @@
+/* Copyright (C) 2007 Josh MacDonald */
+
+#include "test.h"
+#include <assert.h>
+#include <list>
+#include <vector>
+#include <algorithm>
+
+#include "../cpp-btree/btree_map.h"
+
+extern "C" {
+uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
+uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum,
+ const uint8_t *base, const usize_t look);
+
+uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
+uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum,
+ const uint8_t *base, const usize_t look);
+}
+
+using btree::btree_map;
+using std::list;
+using std::vector;
+
+// MLCG parameters
+// a, a*
+uint32_t good_32bit_values[] = {
+ 1597334677U, // ...
+ 741103597U, 887987685U,
+};
+
+// a, a*
+uint64_t good_64bit_values[] = {
+ 1181783497276652981ULL, 4292484099903637661ULL,
+ 7664345821815920749ULL, // ...
+};
+
+void print_header() {
+ static int hdr_cnt = 0;
+ if (hdr_cnt++ % 20 == 0) {
+ printf("%-32sConf\t\tCount\tUniq\tFull\tCover\tColls"
+ "\tMB/s\tIters\t#Colls\n", "Name");
+ }
+}
+
+struct true_type { };
+struct false_type { };
+
+template <typename Word>
+usize_t bitsof();
+
+template<>
+usize_t bitsof<unsigned int>() {
+ return sizeof(unsigned int) * 8;
+}
+
+template<>
+usize_t bitsof<unsigned long>() {
+ return sizeof(unsigned long) * 8;
+}
+
+template<>
+usize_t bitsof<unsigned long long>() {
+ return sizeof(unsigned long long) * 8;
+}
+
+template <typename Word>
+struct hhash { // shift "s" bits leaving the high bits as a hash value for
+ // this checksum, which are the most "distant" in terms of the
+ // spectral test for the rabin_karp MLCG. For short windows,
+ // the high bits aren't enough, XOR "mask" worth of these in.
+ Word operator()(const Word t, const Word s, const Word mask) {
+ return (t >> s) ^ (t & mask);
+ }
+};
+
+template <typename Word>
+Word good_word();
+
+template<>
+uint32_t good_word<uint32_t>() {
+ return good_32bit_values[0];
+}
+
+template<>
+uint64_t good_word<uint64_t>() {
+ return good_64bit_values[0];
+}
+
+// CLASSES
+
+#define SELF Word, CksumSize, CksumSkip, Hash, Compaction
+#define MEMBER template <typename Word, \
+ int CksumSize, \
+ int CksumSkip, \
+ typename Hash, \
+ int Compaction>
+
+MEMBER
+struct cksum_params {
+ typedef Word word_type;
+ typedef Hash hash_type;
+
+ static const int cksum_size = CksumSize;
+ static const int cksum_skip = CksumSkip;
+ static const int compaction = Compaction;
+};
+
+MEMBER
+struct rabin_karp : public cksum_params<SELF> {
+ // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
+ rabin_karp()
+ : powers(make_powers()),
+ product(powers[0] * good_word<Word>()),
+ incr_state(0) { }
+
+ static Word* make_powers() {
+ Word *p = new Word[CksumSize];
+ p[CksumSize - 1] = 1;
+ for (int i = CksumSize - 2; i >= 0; i--) {
+ p[i] = p[i + 1] * good_word<Word>();
+ }
+ return p;
+ }
+
+ ~rabin_karp() {
+ delete [] powers;
+ }
+
+ Word step(const uint8_t *ptr) {
+ Word h = 0;
+ for (int i = 0; i < CksumSize; i++) {
+ h += (ptr[i]) * powers[i];
+ }
+ return h;
+ }
+
+ Word state0(const uint8_t *ptr) {
+ incr_state = step(ptr);
+ return incr_state;
+ }
+
+ Word incr(const uint8_t *ptr) {
+ incr_state = good_word<Word>() * incr_state -
+ product * (ptr[-1]) + (ptr[CksumSize - 1]);
+ return incr_state;
+ }
+
+ const Word *const powers;
+ const Word product;
+ Word incr_state;
+};
+
+MEMBER
+struct with_stream : public cksum_params<SELF> {
+ xd3_stream stream;
+
+ with_stream()
+ {
+ xd3_config cfg;
+ memset (&stream, 0, sizeof (stream));
+ xd3_init_config (&cfg, 0);
+ cfg.smatch_cfg = XD3_SMATCH_SOFT;
+ cfg.smatcher_soft.large_look = CksumSize;
+ cfg.smatcher_soft.large_step = CksumSkip;
+ cfg.smatcher_soft.small_look = 4;
+ cfg.smatcher_soft.small_chain = 4;
+ cfg.smatcher_soft.small_lchain = 4;
+ cfg.smatcher_soft.max_lazy = 4;
+ cfg.smatcher_soft.long_enough = 4;
+ CHECK_EQ(0, xd3_config_stream (&stream, &cfg));
+
+ CHECK_EQ(0, xd3_size_hashtable (&stream,
+ 1<<10 /* ignored */,
+ stream.smatcher.large_look,
+ & stream.large_hash));
+ }
+ ~with_stream()
+ {
+ xd3_free_stream (&stream);
+ }
+};
+
+MEMBER
+struct large_cksum : public with_stream<SELF> {
+ Word step(const uint8_t *ptr) {
+ return xd3_large_cksum (&this->stream.large_hash, ptr, CksumSize);
+ }
+
+ Word state0(const uint8_t *ptr) {
+ incr_state = step(ptr);
+ return incr_state;
+ }
+
+ Word incr(const uint8_t *ptr) {
+ incr_state = xd3_large_cksum_update (&this->stream.large_hash,
+ incr_state, ptr - 1, CksumSize);
+ return incr_state;
+ }
+
+ Word incr_state;
+};
+
+#if SIZEOF_USIZE_T == 4
+#define xd3_large_cksum_old xd3_large32_cksum_old
+#define xd3_large_cksum_update_old xd3_large32_cksum_update_old
+#elif SIZEOF_USIZE_T == 8
+#define xd3_large_cksum_old xd3_large64_cksum_old
+#define xd3_large_cksum_update_old xd3_large64_cksum_update_old
+#endif
+
+MEMBER
+struct large_cksum_old : public with_stream<SELF> {
+ Word step(const uint8_t *ptr) {
+ return xd3_large_cksum_old (&this->stream.large_hash, ptr, CksumSize);
+ }
+
+ Word state0(const uint8_t *ptr) {
+ incr_state = step(ptr);
+ return incr_state;
+ }
+
+ Word incr(const uint8_t *ptr) {
+ incr_state = xd3_large_cksum_update_old (&this->stream.large_hash,
+ incr_state, ptr - 1, CksumSize);
+ return incr_state;
+ }
+
+ Word incr_state;
+};
+
+// TESTS
+
+template <typename Word>
+struct file_stats {
+ typedef const uint8_t* ptr_type;
+ typedef Word word_type;
+ typedef btree::btree_multimap<word_type, ptr_type> table_type;
+ typedef typename table_type::iterator table_iterator;
+
+ usize_t cksum_size;
+ usize_t cksum_skip;
+ usize_t unique;
+ usize_t unique_values;
+ usize_t count;
+ table_type table;
+
+ file_stats(usize_t size, usize_t skip)
+ : cksum_size(size),
+ cksum_skip(skip),
+ unique(0),
+ unique_values(0),
+ count(0) {
+ }
+
+ void reset() {
+ unique = 0;
+ unique_values = 0;
+ count = 0;
+ table.clear();
+ }
+
+ void update(word_type word, ptr_type ptr) {
+ table_iterator t_i = table.find(word);
+
+ count++;
+ if (t_i != table.end()) {
+ int collisions = 0;
+ for (table_iterator p_i = t_i;
+ p_i != table.end() && p_i->first == word;
+ ++p_i) {
+ if (memcmp(p_i->second, ptr, cksum_size) == 0) {
+ return;
+ }
+ collisions++;
+ }
+ if (collisions >= 1000) {
+ fprintf(stderr, "Something is not right, lots of collisions=%d\n",
+ collisions);
+ abort();
+ }
+ } else {
+ unique_values++;
+ }
+ unique++;
+ table.insert(std::make_pair(word, ptr));
+ return;
+ }
+
+ void freeze() {
+ table.clear();
+ }
+};
+
+struct test_result_base;
+
+static vector<test_result_base*> all_tests;
+
+struct test_result_base {
+ virtual ~test_result_base() {
+ }
+ virtual void reset() = 0;
+ virtual void print() = 0;
+ virtual void get(const uint8_t* buf, const size_t buf_size,
+ usize_t iters) = 0;
+ virtual void stat() = 0;
+ virtual usize_t count() = 0;
+ virtual usize_t dups() = 0;
+ virtual double uniqueness() = 0;
+ virtual double fullness() = 0;
+ virtual double collisions() = 0;
+ virtual double coverage() = 0;
+ virtual double compression() = 0;
+ virtual double time() = 0;
+ virtual double total_time() = 0;
+ virtual usize_t total_count() = 0;
+ virtual usize_t total_dups() = 0;
+};
+
+template <typename Checksum>
+struct test_result : public test_result_base {
+ Checksum cksum;
+ const char *test_name;
+ file_stats<typename Checksum::word_type> fstats;
+ usize_t test_size;
+ usize_t n_steps;
+ usize_t n_incrs;
+ typename Checksum::word_type s_bits;
+ typename Checksum::word_type s_mask;
+ usize_t t_entries;
+ usize_t h_bits;
+ usize_t h_buckets_full;
+ char *hash_table;
+ long accum_millis;
+ usize_t accum_iters;
+
+ // These are not reset
+ double accum_time;
+ usize_t accum_count;
+ usize_t accum_dups;
+ usize_t accum_colls;
+ size_t accum_size;
+
+ test_result(const char *name)
+ : test_name(name),
+ fstats(Checksum::cksum_size, Checksum::cksum_skip),
+ hash_table(NULL),
+ accum_millis(0),
+ accum_iters(0),
+ accum_time(0.0),
+ accum_count(0),
+ accum_dups(0),
+ accum_colls(0),
+ accum_size(0) {
+ all_tests.push_back(this);
+ }
+
+ ~test_result() {
+ reset();
+ }
+
+ void reset() {
+ // size of file
+ test_size = 0;
+
+ // count
+ n_steps = 0;
+ n_incrs = 0;
+
+ // four values used by new_table()/summarize_table()
+ s_bits = 0;
+ s_mask = 0;
+ t_entries = 0;
+ h_bits = 0;
+ h_buckets_full = 0;
+
+ accum_millis = 0;
+ accum_iters = 0;
+
+ fstats.reset();
+
+ // temporary
+ if (hash_table) {
+ delete(hash_table);
+ hash_table = NULL;
+ }
+ }
+
+ usize_t count() {
+ if (Checksum::cksum_skip == 1) {
+ return n_incrs;
+ } else {
+ return n_steps;
+ }
+ }
+
+ usize_t dups() {
+ return fstats.count - fstats.unique;
+ }
+
+ /* Fraction of distinct strings of length cksum_size which are not
+ * represented in the hash table. */
+ double collisions() {
+ return (fstats.unique - fstats.unique_values) / (double) fstats.unique;
+ }
+ usize_t colls() {
+ return (fstats.unique - fstats.unique_values);
+ }
+
+ double uniqueness() {
+ return 1.0 - (double) dups() / count();
+ }
+
+ double fullness() {
+ return (double) h_buckets_full / (1 << h_bits);
+ }
+
+ double coverage() {
+ return (double) h_buckets_full / uniqueness() / count();
+ }
+
+ double compression() {
+ return 1.0 - coverage();
+ }
+
+ double time() {
+ return (double) accum_millis / accum_iters;
+ }
+
+ double total_time() {
+ return accum_time;
+ }
+
+ usize_t total_count() {
+ return accum_count;
+ }
+
+ usize_t total_dups() {
+ return accum_dups;
+ }
+
+ usize_t total_colls() {
+ return accum_dups;
+ }
+
+ void stat() {
+ accum_time += time();
+ accum_count += count();
+ accum_dups += dups();
+ accum_colls += colls();
+ accum_size += test_size;
+ }
+
+ void print() {
+ if (fstats.count != count()) {
+ fprintf(stderr, "internal error: %" W "d != %" W "d\n", fstats.count, count());
+ abort();
+ }
+ print_header();
+ printf("%-32s%d/%d 2^%" W "u\t%" W "u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t"
+ "%" W "u\t%" W "u\n",
+ test_name,
+ Checksum::cksum_size,
+ Checksum::cksum_skip,
+ h_bits,
+ count(),
+ uniqueness(),
+ fullness(),
+ coverage(),
+ collisions(),
+ 0.001 * accum_iters * test_size / accum_millis,
+ accum_iters,
+ colls());
+ }
+
+ usize_t size_log2 (usize_t slots) {
+ usize_t bits = bitsof<typename Checksum::word_type>() - 1;
+ usize_t i;
+
+ for (i = 3; i <= bits; i += 1) {
+ if (slots <= (1U << i)) {
+ return i - Checksum::compaction;
+ }
+ }
+
+ return bits;
+ }
+
+ void new_table(usize_t entries) {
+ t_entries = entries;
+ h_bits = size_log2(entries);
+
+ usize_t n = 1 << h_bits;
+
+ s_bits = bitsof<typename Checksum::word_type>() - h_bits;
+ s_mask = n - 1U;
+
+ hash_table = new char[n / 8];
+ memset(hash_table, 0, n / 8);
+ }
+
+ int get_table_bit(usize_t i) {
+ return hash_table[i/8] & (1 << i%8);
+ }
+
+ int set_table_bit(usize_t i) {
+ return hash_table[i/8] |= (1 << i%8);
+ }
+
+ void summarize_table() {
+ usize_t n = 1 << h_bits;
+ usize_t f = 0;
+ for (usize_t i = 0; i < n; i++) {
+ if (get_table_bit(i)) {
+ f++;
+ }
+ }
+ h_buckets_full = f;
+ }
+
+ void get(const uint8_t* buf, const size_t buf_size, usize_t test_iters) {
+ typename Checksum::hash_type hash;
+ const uint8_t *ptr;
+ const uint8_t *end;
+ usize_t periods;
+ int64_t last_offset;
+ int64_t stop;
+
+ test_size = buf_size;
+ last_offset = buf_size - Checksum::cksum_size;
+
+ if (last_offset < 0) {
+ periods = 0;
+ n_steps = 0;
+ n_incrs = 0;
+ stop = -Checksum::cksum_size;
+ } else {
+ periods = last_offset / Checksum::cksum_skip;
+ n_steps = periods + 1;
+ n_incrs = last_offset + 1;
+ stop = last_offset - (periods + 1) * Checksum::cksum_skip;
+ }
+
+ // Compute file stats once.
+ if (fstats.unique_values == 0) {
+ if (Checksum::cksum_skip == 1) {
+ for (size_t i = 0; i <= buf_size - Checksum::cksum_size; i++) {
+ fstats.update(hash(cksum.step(buf + i), s_bits, s_mask), buf + i);
+ }
+ } else {
+ ptr = buf + last_offset;
+ end = buf + stop;
+
+ for (; ptr != end; ptr -= Checksum::cksum_skip) {
+ fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr);
+ }
+ }
+ fstats.freeze();
+ }
+
+ long start_test = get_millisecs_now();
+
+ if (Checksum::cksum_skip != 1) {
+ new_table(n_steps);
+
+ for (usize_t i = 0; i < test_iters; i++) {
+ ptr = buf + last_offset;
+ end = buf + stop;
+
+ for (; ptr != end; ptr -= Checksum::cksum_skip) {
+ set_table_bit(hash(cksum.step(ptr), s_bits, s_mask));
+ }
+ }
+
+ summarize_table();
+ }
+
+ stop = buf_size - Checksum::cksum_size + 1;
+ if (stop < 0) {
+ stop = 0;
+ }
+
+ if (Checksum::cksum_skip == 1) {
+ new_table(n_incrs);
+
+ for (usize_t i = 0; i < test_iters; i++) {
+ ptr = buf;
+ end = buf + stop;
+
+ if (ptr != end) {
+ set_table_bit(hash(cksum.state0(ptr++), s_bits, s_mask));
+ }
+
+ for (; ptr != end; ptr++) {
+ typename Checksum::word_type w = cksum.incr(ptr);
+ CHECK_EQ(w, cksum.step(ptr));
+ set_table_bit(hash(w, s_bits, s_mask));
+ }
+ }
+
+ summarize_table();
+ }
+
+ accum_iters += test_iters;
+ accum_millis += get_millisecs_now() - start_test;
+ }
+};
+
+static int read_whole_file(const char *name,
+ uint8_t **buf_ptr,
+ size_t *buf_len) {
+ main_file file;
+ int ret;
+ xoff_t len;
+ size_t nread;
+ main_file_init(&file);
+ file.filename = name;
+ ret = main_file_open(&file, name, XO_READ);
+ if (ret != 0) {
+ fprintf(stderr, "open failed\n");
+ goto exit;
+ }
+ ret = main_file_stat(&file, &len);
+ if (ret != 0) {
+ fprintf(stderr, "stat failed\n");
+ goto exit;
+ }
+
+ (*buf_len) = (size_t)len;
+ (*buf_ptr) = (uint8_t*) main_malloc(*buf_len);
+ ret = main_file_read(&file, *buf_ptr, *buf_len, &nread,
+ "read failed");
+ if (ret == 0 && *buf_len == nread) {
+ ret = 0;
+ } else {
+ fprintf(stderr, "invalid read\n");
+ ret = XD3_INTERNAL;
+ }
+ exit:
+ main_file_cleanup(&file);
+ return ret;
+}
+
+int main(int argc, char** argv) {
+ int i;
+ uint8_t *buf = NULL;
+ size_t buf_len = 0;
+ int ret;
+
+ if (argc <= 1) {
+ fprintf(stderr, "usage: %s file ...\n", argv[0]);
+ return 1;
+ }
+
+// TODO: The xdelta3-hash.h code is identical now; add sameness test.
+// using rabin_karp<> template.
+#define TEST(T,Z,S,C) \
+ test_result<large_cksum<T,Z,S,hhash<T>,C>> \
+ _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C \
+ ("xck_" #T "_" #Z "_" #S "_" #C); \
+ test_result<large_cksum_old<T,Z,S,hhash<T>,C>> \
+ _old_ ## T ## _ ## Z ## _ ## S ## _ ## C \
+ ("old_" #T "_" #Z "_" #S "_" #C)
+
+#define TESTS(SIZE, SKIP) \
+ TEST(usize_t, SIZE, SKIP, 1); \
+ TEST(usize_t, SIZE, SKIP, 2)
+
+ TESTS(5, 1);
+ TESTS(6, 1);
+ TESTS(7, 1);
+ TESTS(8, 1);
+ TESTS(9, 1);
+ TESTS(10, 1);
+ TESTS(11, 1);
+ TESTS(12, 1);
+ TESTS(13, 1);
+ TESTS(14, 1);
+ TESTS(15, 1);
+ TESTS(16, 1);
+ TESTS(17, 1);
+ TESTS(18, 1);
+ TESTS(19, 1);
+ TESTS(20, 1);
+ TESTS(21, 1);
+ TESTS(22, 1);
+ TESTS(23, 1);
+ TESTS(24, 1);
+ TESTS(25, 1);
+ TESTS(26, 1);
+ TESTS(27, 1);
+ TESTS(28, 1);
+ TESTS(29, 1);
+ TESTS(30, 1);
+ TESTS(31, 1);
+ TESTS(32, 1);
+ TESTS(33, 1);
+ TESTS(34, 1);
+ TESTS(35, 1);
+ TESTS(36, 1);
+ TESTS(37, 1);
+ TESTS(38, 1);
+ TESTS(39, 1);
+
+
+ for (i = 1; i < argc; i++) {
+ if ((ret = read_whole_file(argv[i],
+ & buf,
+ & buf_len))) {
+ return 1;
+ }
+
+ fprintf(stderr, "file %s is %zu bytes\n",
+ argv[i], buf_len);
+
+ double min_time = -1.0;
+ double min_compression = 0.0;
+
+ for (vector<test_result_base*>::iterator iter = all_tests.begin();
+ iter != all_tests.end(); ++iter) {
+ test_result_base *test = *iter;
+ test->reset();
+
+ usize_t iters = 1;
+ long start_test = get_millisecs_now();
+
+ do {
+ test->get(buf, buf_len, iters);
+ iters *= 3;
+ iters /= 2;
+ } while (get_millisecs_now() - start_test < 2000);
+
+ test->stat();
+
+ if (min_time < 0.0) {
+ min_compression = test->compression();
+ min_time = test->time();
+ }
+
+ if (min_time > test->time()) {
+ min_time = test->time();
+ }
+
+ if (min_compression > test->compression()) {
+ min_compression = test->compression();
+ }
+
+ test->print();
+ }
+
+ main_free(buf);
+ buf = NULL;
+ }
+
+ return 0;
+}
diff --git a/testing/checksum_test_c.c b/testing/checksum_test_c.c
new file mode 100644
index 0000000..8f0507a
--- /dev/null
+++ b/testing/checksum_test_c.c
@@ -0,0 +1,174 @@
+#include "../xdelta3.c"
+
+// OLD CHECKSUM CODE
+
+#define PERMUTE32(x) (__single_hash32[x])
+#define PERMUTE64(x) (__single_hash64[x])
+
+const uint16_t __single_hash32[256] =
+{
+ /* This hashes the input alphabet (Scheme SLIB pseudo-random). */
+ 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
+ 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
+ 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
+ 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
+ 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
+ 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
+ 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
+ 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
+ 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
+ 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
+ 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
+ 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
+ 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
+ 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
+ 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
+ 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
+ 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
+ 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
+ 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
+ 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
+ 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
+ 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
+ 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
+ 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
+ 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
+ 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
+ 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
+ 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
+ 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
+ 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
+ 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
+ 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
+};
+
+const uint32_t __single_hash64[256] =
+{
+ /* http://random.org 2014.10.24 */
+ 0xd25e9f0a, 0xb1af9d5e, 0xb753dfa2, 0x157050f7, /* 0 */
+ 0xc84b072c, 0xdd14fe7c, 0xf92208c3, 0xdf08a0c0,
+ 0x63a5c118, 0x76f5d90f, 0xa2f8b93e, 0xb6c12d22,
+ 0xaf074957, 0x966fb7d9, 0x62f7b785, 0xb40e8a09,
+ 0x0a811d5d, 0x323a6daa, 0xb62f7c5b, 0xfdcb9a53,
+ 0xf25a9067, 0x4506bc7a, 0xff58a74b, 0x5ae62817,
+ 0x74097675, 0x722c0fd9, 0x116a2a66, 0x65f76728,
+ 0x72c79651, 0xe043cf9d, 0x64b867c7, 0x6604834f,
+ 0xcdca58a6, 0x0f164e2d, 0x24515f05, 0x632cdbf8,
+ 0x18091d4a, 0x3eff4128, 0x673d1c33, 0xd8e10c71,
+ 0x1a3edf11, 0xba52892f, 0xa56949e0, 0xf3e1dd77, /* 10 */
+ 0x86fcbe3e, 0x138d66d0, 0x4fc98359, 0xc22e5dd6,
+ 0xc59f2267, 0x6c6dd739, 0xe03da190, 0x07e8469c,
+ 0xadcfb02c, 0x00d3b0d9, 0xa1f44918, 0x8bd84d87,
+ 0x08ec9ec1, 0xbbcd156f, 0xb57718e3, 0x3177e752,
+ 0xf52a4d70, 0xde7aaad9, 0x075f1da0, 0x21ba00c6,
+ 0xb9469a5c, 0xcf08d5ba, 0x91ac9edc, 0xc6167b63,
+ 0xc1974919, 0xc8c8d195, 0x4b1996dd, 0xeff8991c,
+ 0xf7f66c6b, 0x25b012e2, 0x59d12a98, 0xea40d3cc,
+ 0x41f9970b, 0xec48101a, 0xa3bdcf90, 0x99f16905,
+ 0x27af6c97, 0xc849af37, 0x49cad89b, 0xf48c2278, /* 20 */
+ 0x5529c3d8, 0x9e7d6dce, 0x16feb52d, 0xf1b0aca1,
+ 0xaf28fccb, 0x48e4ce3c, 0xc4436617, 0x64524e3e,
+ 0x61806681, 0x6384f2d7, 0x1172880f, 0x34a5ef5f,
+ 0xcc8cc0a8, 0x66e8f100, 0x2866085f, 0xba9b1b2d,
+ 0x51285949, 0x2be4b574, 0x889b1ef5, 0x3dbe920d,
+ 0x9277a62f, 0x0584a9f6, 0x085d8fc4, 0x4b5d403d,
+ 0x4e46ca78, 0x3294c2f9, 0x29313e70, 0xe4f09b24,
+ 0xe73b331c, 0x072f5552, 0x2e390b78, 0xea0021ca,
+ 0xd8f40320, 0xed0e16fd, 0x7de9cf7a, 0xf17e3d6c,
+ 0x8df1bd85, 0x052cae67, 0x3486e512, 0x3a1c09b8, /* 30 */
+ 0x6c2a7b4e, 0x83455753, 0xbc0353ac, 0x0ffe20b6,
+ 0x5fdcef85, 0x010f506c, 0x595ce972, 0xe28680d0,
+ 0xa7e216b2, 0xa392ee0f, 0x25b73faa, 0x2b1f4983,
+ 0xeeaefe98, 0x1d3d9cbc, 0x6aebe97b, 0x8b7b3584,
+ 0x9e6a9a07, 0xd37f1e99, 0x4ac2a441, 0x8ae9a213,
+ 0x7d0e27d7, 0x5de54b9a, 0x8621de1f, 0xf0f2f866,
+ 0xcb08d275, 0x49c3f87e, 0xd5ee68c1, 0x9802fc77,
+ 0x68be6c5e, 0x65aa8c27, 0xf423d5f7, 0x10ec5502,
+ 0x9909bce1, 0x509cdf1b, 0x338fea72, 0x2733e9bf,
+ 0xf92f4fd7, 0x87738ea2, 0x931a8bbc, 0x0a5c9155, /* 40 */
+ 0xbe5edd9b, 0xadbf5838, 0x0338f8d2, 0x290da210,
+ 0x390c37d8, 0xe7cffae8, 0x20617ebe, 0x464322dd,
+ 0x7b3c4e78, 0xac142dcb, 0x2d5cef76, 0xd8fe49fc,
+ 0x60f4e9a9, 0x7473816f, 0x0dc35f39, 0x5eed80c1,
+ 0x0cb55ab6, 0x1d3ac541, 0x13c7f529, 0x7bffdf4a,
+ 0xe334785b, 0x85263ec1, 0xd132ae56, 0x7c868b9e,
+ 0x47f60638, 0x1012b979, 0x81c31dd3, 0x1af868c8,
+ 0x0c5d0742, 0xd1b3e1a2, 0x5873200a, 0xf848465c,
+ 0x0fc4d596, 0x609c18af, 0xc9f5a480, 0xd1a94a84,
+ 0xa1431a3f, 0x7de8bb1a, 0x25f1256b, 0x1dcc732c, /* 50 */
+ 0x6aa1549a, 0xa2367281, 0x32f2a77e, 0x82e62a0f,
+ 0x045cbb56, 0x74b2027c, 0xd71a32d9, 0x022e7cb5,
+ 0xe99be177, 0x60222fdf, 0xd69681ca, 0x9008ee2c,
+ 0x32923db4, 0xcf82bf97, 0x38960a5b, 0xb3503d5b,
+ 0x9bd4c7f2, 0x33c029c8, 0x1ef504a3, 0xdb249d3b,
+ 0x91e89676, 0x4ca43b36, 0x9191433c, 0x465d5dc4,
+ 0xf4dcb118, 0x9d11dd00, 0xb592f058, 0xdbe5ce30,
+ 0x74790d92, 0x779850a8, 0x7180d25b, 0xfa951d99,
+ 0x5990935a, 0x921cb022, 0x3b7c39bc, 0x6a38a7c7,
+ 0xdc22703b, 0x142bab3b, 0x4e3d9479, 0x44bb8482, /* 60 */
+ 0x8043abce, 0xfebe832a, 0x8e6a2f98, 0x4d43c4fe,
+ 0xd192a70a, 0x802f3c3a, 0x5d11bbab, 0x2665d241,
+ 0xb3f3a680, 0x3a8d223f, 0xcf82cdb4, 0x4ed28743,
+};
+
+uint64_t
+xd3_large64_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look)
+{
+ static const uint64_t kBits = 32;
+ static const uint64_t kMask = 0xffffffff;
+ usize_t i = 0;
+ uint64_t low = 0;
+ uint64_t high = 0;
+
+ for (; i < look; i += 1)
+ {
+ low += PERMUTE64(*base++);
+ high += low;
+ }
+
+ return ((high & kMask) << kBits) | (low & kMask);
+}
+
+uint64_t
+xd3_large64_cksum_update_old (xd3_hash_cfg *ignore, const uint64_t cksum,
+ const uint8_t *base, const usize_t look)
+{
+ static const uint64_t kBits = 32;
+ static const uint64_t kMask = 0xffffffff;
+ uint64_t old_c = PERMUTE64(base[0]);
+ uint64_t new_c = PERMUTE64(base[look]);
+ uint64_t low = ((cksum & kMask) - old_c + new_c) & kMask;
+ uint64_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask;
+ return (high << kBits) | low;
+}
+
+uint32_t
+xd3_large32_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look)
+{
+ static const uint32_t kBits = 16;
+ static const uint32_t kMask = 0xffff;
+ usize_t i = 0;
+ uint32_t low = 0;
+ uint32_t high = 0;
+
+ for (; i < look; i += 1)
+ {
+ low += PERMUTE32(*base++);
+ high += low;
+ }
+
+ return ((high & kMask) << kBits) | (low & kMask);
+}
+
+uint32_t
+xd3_large32_cksum_update_old (xd3_hash_cfg *ignore, const uint32_t cksum,
+ const uint8_t *base, const usize_t look)
+{
+ static const uint32_t kBits = 16;
+ static const uint32_t kMask = 0xffff;
+ uint32_t old_c = PERMUTE32(base[0]);
+ uint32_t new_c = PERMUTE32(base[look]);
+ uint32_t low = ((cksum & kMask) - old_c + new_c) & kMask;
+ uint32_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask;
+ return (high << kBits) | low;
+}
diff --git a/testing/cmp.h b/testing/cmp.h
new file mode 100644
index 0000000..53ae671
--- /dev/null
+++ b/testing/cmp.h
@@ -0,0 +1,53 @@
+/* -*- Mode: C++ -*- */
+static size_t CmpDifferentBlockBytes(const Block &a, const Block &b) {
+ size_t total = 0;
+ size_t i = 0;
+ size_t m = min(a.Size(), b.Size());
+
+ for (; i < m; i++) {
+ if (a[i] != b[i]) {
+ total++;
+ }
+ }
+
+ total += a.Size() - i;
+ total += b.Size() - i;
+
+ return total;
+}
+
+static xoff_t CmpDifferentBytes(const FileSpec &a, const FileSpec &b) {
+ Block block_a, block_b;
+ xoff_t total = 0;
+ typename FileSpec::iterator a_i(a), b_i(b);
+
+ for (; !a_i.Done() && !b_i.Done(); a_i.Next(), b_i.Next()) {
+
+ a_i.Get(&block_a);
+ b_i.Get(&block_b);
+
+ total += CmpDifferentBlockBytes(block_a, block_b);
+ }
+
+ for (; !a_i.Done(); a_i.Next()) {
+ total += a_i.BytesOnBlock();
+ }
+ for (; !b_i.Done(); b_i.Next()) {
+ total += b_i.BytesOnBlock();
+ }
+
+ return total;
+}
+
+static size_t CmpDifferentBlockBytesAtOffset(const Block &a,
+ const FileSpec &b_spec,
+ xoff_t offset) {
+ Block b;
+ size_t size = a.Size();
+ CHECK_LE(offset, b_spec.Size());
+ if (b_spec.Size() < offset + size) {
+ size = b_spec.Size() - offset;
+ }
+ b_spec.Get(&b, offset, size);
+ return CmpDifferentBlockBytes(a, b);
+}
diff --git a/testing/delta.h b/testing/delta.h
new file mode 100644
index 0000000..deac217
--- /dev/null
+++ b/testing/delta.h
@@ -0,0 +1,73 @@
+// Mode: -*- C++ -*-
+
+class Delta {
+public:
+ Delta(const Block &block) {
+ int ret;
+ xd3_config config;
+ memset(&stream_, 0, sizeof (stream_));
+ memset(&config, 0, sizeof (config));
+
+ xd3_init_config(&config, XD3_SKIP_EMIT | XD3_ADLER32_NOVER);
+
+ CHECK_EQ(0, xd3_config_stream (&stream_, &config));
+
+ xd3_avail_input (&stream_, block.Data(), block.Size());
+
+ bool done = false;
+ while (!done) {
+ ret = xd3_decode_input(&stream_);
+
+ switch (ret) {
+ case XD3_INPUT:
+ done = true;
+ break;
+ case XD3_OUTPUT:
+ CHECK_EQ(0, xd3_whole_append_window (&stream_));
+ break;
+ case XD3_GOTHEADER:
+ case XD3_WINSTART:
+ case XD3_WINFINISH:
+ break;
+ default:
+ cerr << "decode: " << done;
+ abort();
+ }
+ }
+ }
+
+ ~Delta() {
+ xd3_free_stream(&stream_);
+ }
+
+ xoff_t AddedBytes() const {
+ return stream_.whole_target.addslen;
+ }
+
+ xoff_t Windows() const {
+ return stream_.whole_target.wininfolen;
+ }
+
+// Note: This does not benefit from -Wformat= checking, due to the
+// enclosing template. Further, it was not used.
+// void Print() const {
+// for (size_t i = 0; i < stream_.whole_target.instlen; i++) {
+// xd3_winst &winst = stream_.whole_target.inst[i];
+// switch (winst.type) {
+// case XD3_RUN:
+// DP(RINT, "%" Q "u run %" W "u\n", winst.position, winst.size);
+// break;
+// case XD3_ADD:
+// DP(RINT "%" Q "u add %" W "u\n", winst.position, winst.size);
+// break;
+// default:
+// DP(RINT "%" Q "u copy %" W "u @ %" Q "u (mode %u)\n",
+// winst.position, winst.size, winst.addr, winst.mode);
+// break;
+// }
+// }
+// }
+
+private:
+ xd3_stream stream_;
+};
diff --git a/testing/file.h b/testing/file.h
new file mode 100644
index 0000000..2188f7a
--- /dev/null
+++ b/testing/file.h
@@ -0,0 +1,385 @@
+/* -*- Mode: C++ -*- */
+class Block;
+class BlockIterator;
+class TmpFile;
+
+class Block {
+public:
+ Block()
+ : data_(NULL),
+ data_size_(0),
+ size_(0) { }
+
+ ~Block() {
+ if (data_) {
+ delete [] data_;
+ }
+ }
+
+ size_t Size() const {
+ return size_;
+ }
+
+ uint8_t operator[](size_t i) const {
+ CHECK_LT(i, size_);
+ return data_[i];
+ }
+
+ uint8_t* Data() const {
+ if (data_ == NULL) {
+ CHECK_EQ(0, size_);
+ data_size_ = 1;
+ data_ = new uint8_t[1];
+ }
+ return data_;
+ }
+
+ // For writing to blocks
+ void Append(const uint8_t *data, size_t size) {
+ if (data_ == NULL) {
+ CHECK_EQ(0, size_);
+ CHECK_EQ(0, data_size_);
+ data_ = new uint8_t[Constants::BLOCK_SIZE];
+ data_size_ = Constants::BLOCK_SIZE;
+ }
+
+ if (size_ + size > data_size_) {
+ uint8_t *tmp = data_;
+ while (size_ + size > data_size_) {
+ data_size_ *= 2;
+ }
+ data_ = new uint8_t[data_size_];
+ memcpy(data_, tmp, size_);
+ delete [] tmp;
+ }
+
+ memcpy(data_ + size_, data, size);
+ size_ += size;
+ }
+
+ // For cleaing a block
+ void Reset() {
+ size_ = 0;
+ }
+
+ // Note: This does not benefit from -Wformat= checking, due to the
+ // enclosing template. Further, it was not used.
+ // void Print() const {
+ // xoff_t pos = 0;
+ // for (size_t i = 0; i < Size(); i++) {
+ // if (pos % 16 == 0) {
+ // DP(RINT "%5" Q "x: ", pos);
+ // }
+ // DP(RINT "%02x ", (*this)[i]);
+ // if (pos % 16 == 15) {
+ // DP(RINT "\n");
+ // }
+ // pos++;
+ // }
+ // DP(RINT "\n");
+ // }
+
+ void WriteTmpFile(TmpFile *f) const {
+ f->Append(this);
+ }
+
+ void SetSize(size_t size) {
+ uint8_t *t = NULL;
+ if (data_size_ < size) {
+ if (data_) {
+ t = data_;
+ }
+ data_ = new uint8_t[size];
+ data_size_ = size;
+ }
+ if (t && size < size_) {
+ memcpy(data_, t, size);
+ }
+ delete [] t;
+ size_ = size;
+ }
+
+private:
+ friend class BlockIterator;
+
+ mutable uint8_t *data_;
+ mutable size_t data_size_;
+ size_t size_;
+};
+
+class FileSpec {
+ public:
+ FileSpec(MTRandom *rand)
+ : rand_(rand) {
+ }
+
+ // Generates a file with a known size
+ void GenerateFixedSize(xoff_t size) {
+ Reset();
+
+ for (xoff_t p = 0; p < size; ) {
+ xoff_t t = min(Constants::BLOCK_SIZE, size - p);
+ table_.insert(make_pair(p, Segment(t, rand_)));
+ p += t;
+ }
+ }
+
+ // Generates a file with exponential-random distributed size
+ void GenerateRandomSize(xoff_t mean) {
+ GenerateFixedSize(rand_->ExpRand(mean));
+ }
+
+ // Returns the size of the file
+ xoff_t Size() const {
+ if (table_.empty()) {
+ return 0;
+ }
+ ConstSegmentMapIterator i = --table_.end();
+ return i->first + i->second.Size();
+ }
+
+ // Returns the number of blocks
+ xoff_t Blocks(size_t blksize = Constants::BLOCK_SIZE) const {
+ if (table_.empty()) {
+ return 0;
+ }
+ return ((Size() - 1) / blksize) + 1;
+ }
+
+ // Returns the number of segments
+ xoff_t Segments() const {
+ return table_.size();
+ }
+
+ // Create a mutation according to "what".
+ void ModifyTo(const Mutator &mutator,
+ FileSpec *modify) const {
+ modify->Reset();
+ mutator.Mutate(&modify->table_, &table_, rand_);
+ modify->CheckSegments();
+ }
+
+ void CheckSegments() const {
+ for (ConstSegmentMapIterator iter(table_.begin());
+ iter != table_.end(); ) {
+ ConstSegmentMapIterator iter0(iter++);
+ if (iter == table_.end()) {
+ break;
+ }
+ CHECK_EQ(iter0->first + iter0->second.Size(), iter->first);
+ }
+ }
+
+ void Reset() {
+ table_.clear();
+ }
+
+ void Print() const {
+ for (ConstSegmentMapIterator iter(table_.begin());
+ iter != table_.end();
+ ++iter) {
+ const Segment &seg = iter->second;
+ cerr << "Segment at " << iter->first
+ << " (" << seg.ToString() << ")" << endl;
+ }
+ }
+
+ void PrintData() const {
+ Block block;
+ for (BlockIterator iter(*this); !iter.Done(); iter.Next()) {
+ iter.Get(&block);
+ block.Print();
+ }
+ }
+
+ void WriteTmpFile(TmpFile *f) const {
+ Block block;
+ for (BlockIterator iter(*this); !iter.Done(); iter.Next()) {
+ iter.Get(&block);
+ f->Append(&block);
+ }
+ }
+
+ void Get(Block *block, xoff_t offset, size_t size) const {
+ size_t got = 0;
+ block->SetSize(size);
+
+ ConstSegmentMapIterator pos = table_.upper_bound(offset);
+ if (pos == table_.begin()) {
+ CHECK_EQ(0, Size());
+ return;
+ }
+ --pos;
+
+ while (got < size) {
+ CHECK(pos != table_.end());
+ CHECK_GE(offset, pos->first);
+
+ const Segment &seg = pos->second;
+
+ // The position of this segment may start before this block starts,
+ // and then the position of the data may be offset from the seeding
+ // position.
+ size_t seg_offset = offset - pos->first;
+ size_t advance = min(seg.Size() - seg_offset,
+ size - got);
+
+ seg.Fill(seg_offset, advance, block->Data() + got);
+
+ got += advance;
+ offset += advance;
+ ++pos;
+ }
+ }
+
+ typedef BlockIterator iterator;
+
+ private:
+ friend class BlockIterator;
+
+ MTRandom *rand_;
+ SegmentMap table_;
+};
+
+class BlockIterator {
+public:
+ explicit BlockIterator(const FileSpec& spec)
+ : spec_(spec),
+ blkno_(0),
+ blksize_(Constants::BLOCK_SIZE) { }
+
+ BlockIterator(const FileSpec& spec,
+ size_t blksize)
+ : spec_(spec),
+ blkno_(0),
+ blksize_(blksize) { }
+
+ bool Done() const {
+ return blkno_ >= spec_.Blocks(blksize_);
+ }
+
+ void Next() {
+ blkno_++;
+ }
+
+ xoff_t Blkno() const {
+ return blkno_;
+ }
+
+ xoff_t Blocks() const {
+ return spec_.Blocks(blksize_);
+ }
+
+ xoff_t Offset() const {
+ return blkno_ * blksize_;
+ }
+
+ void SetBlock(xoff_t blkno) {
+ CHECK_LE(blkno, Blocks());
+ blkno_ = blkno;
+ }
+
+ void Get(Block *block) const {
+ spec_.Get(block, blkno_ * blksize_, BytesOnBlock());
+ }
+
+ size_t BytesOnBlock() const {
+ xoff_t blocks = spec_.Blocks(blksize_);
+ xoff_t size = spec_.Size();
+
+ DCHECK((blkno_ < blocks) ||
+ (blkno_ == blocks && size % blksize_ == 0));
+
+ if (blkno_ == blocks) {
+ return 0;
+ }
+ if (blkno_ + 1 == blocks) {
+ return ((size - 1) % blksize_) + 1;
+ }
+ return blksize_;
+ }
+
+ size_t BlockSize() const {
+ return blksize_;
+ }
+
+private:
+ const FileSpec& spec_;
+ xoff_t blkno_;
+ size_t blksize_;
+};
+
+class ExtFile {
+public:
+ ExtFile() {
+ static int static_counter = 0;
+ pid_t pid = getpid();
+ char buf[64];
+ xoff_t xpid = pid;
+ snprintf(buf, 64, "/tmp/regtest.%" Q "u.%d", xpid, static_counter++);
+ filename_.append(buf);
+ unlink(filename_.c_str());
+ }
+
+ ~ExtFile() {
+ unlink(filename_.c_str());
+ }
+
+ const char* Name() const {
+ return filename_.c_str();
+ }
+
+ // Check whether a real file matches a file spec.
+ bool EqualsSpec(const FileSpec &spec) const {
+ main_file t;
+ main_file_init(&t);
+ CHECK_EQ(0, main_file_open(&t, Name(), XO_READ));
+
+ Block tblock;
+ Block sblock;
+ for (BlockIterator iter(spec); !iter.Done(); iter.Next()) {
+ iter.Get(&sblock);
+ tblock.SetSize(sblock.Size());
+ size_t tread;
+ CHECK_EQ(0, main_file_read(&t,
+ tblock.Data(),
+ tblock.Size(), &tread, "read failed"));
+ CHECK_EQ(0, CmpDifferentBlockBytes(tblock, sblock));
+ }
+
+ CHECK_EQ(0, main_file_close(&t));
+ main_file_cleanup(&t);
+ return true;
+ }
+
+protected:
+ string filename_;
+};
+
+class TmpFile : public ExtFile {
+public:
+ TmpFile() {
+ main_file_init(&file_);
+ CHECK_EQ(0, main_file_open(&file_, Name(), XO_WRITE));
+ }
+
+ ~TmpFile() {
+ main_file_cleanup(&file_);
+ }
+
+ void Append(const Block *block) {
+ CHECK_EQ(0, main_file_write(&file_,
+ block->Data(), block->Size(),
+ "tmpfile write failed"));
+ }
+
+ const char* Name() const {
+ if (main_file_isopen(&file_)) {
+ CHECK_EQ(0, main_file_close(&file_));
+ }
+ return ExtFile::Name();
+ }
+
+private:
+ mutable main_file file_;
+};
diff --git a/testing/modify.h b/testing/modify.h
new file mode 100644
index 0000000..93f7bde
--- /dev/null
+++ b/testing/modify.h
@@ -0,0 +1,385 @@
+// -*- Mode: C++ -*-
+class Mutator {
+public:
+ virtual ~Mutator() { }
+ virtual void Mutate(SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) const = 0;
+};
+
+class Change {
+public:
+ enum Kind {
+ MODIFY = 1, // Mutate a certain range w/ random or supplied data
+ ADD = 2, // Insert random or supplied data
+ DELRANGE = 3, // Delete a specified range of data
+ COPY = 4, // Copy from one region, inserting elsewhere
+ MOVE = 5, // Copy then delete copied-from range
+ COPYOVER = 6 // Copy then delete copied-to range
+
+ // ADD, DELRANGE, and COPY change the file size
+ // MODIFY, MOVE, COPYOVER preserve the file size
+ };
+
+ // Constructor for modify, add, delete.
+ Change(Kind kind0, xoff_t size0, xoff_t addr1_0)
+ : kind(kind0),
+ size(size0),
+ addr1(addr1_0),
+ addr2(0),
+ insert(NULL) {
+ CHECK(kind != MOVE && kind != COPY && kind != COPYOVER);
+ }
+
+ // Constructor for modify, add w/ provided data.
+ Change(Kind kind0, xoff_t size0, xoff_t addr1_0, Segment *insert0)
+ : kind(kind0),
+ size(size0),
+ addr1(addr1_0),
+ addr2(0),
+ insert(insert0) {
+ CHECK(kind != MOVE && kind != COPY && kind != COPYOVER);
+ }
+
+ // Constructor for move, copy, overwrite
+ Change(Kind kind0, xoff_t size0, xoff_t addr1_0, xoff_t addr2_0)
+ : kind(kind0),
+ size(size0),
+ addr1(addr1_0),
+ addr2(addr2_0),
+ insert(NULL) {
+ CHECK(kind == MOVE || kind == COPY || kind == COPYOVER);
+ }
+
+ Kind kind;
+ xoff_t size;
+ xoff_t addr1;
+ xoff_t addr2;
+ Segment *insert; // For modify and/or add
+};
+
+typedef list<Change> ChangeList;
+typedef typename ChangeList::const_iterator ConstChangeListIterator;
+typedef typename ChangeList::iterator ChangeListIterator;
+
+class ChangeListMutator : public Mutator {
+public:
+ ChangeListMutator(const ChangeList &cl)
+ : cl_(cl) { }
+
+ ChangeListMutator() { }
+
+ void Mutate(SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) const {
+ // The speed of processing gigabytes of data is so slow compared with
+ // these table-copy operations, no attempt to make this fast.
+ SegmentMap tmp;
+
+ for (ConstChangeListIterator iter(cl_.begin());
+ iter != cl_.end(); ++iter) {
+ const Change &ch = *iter;
+ tmp.clear();
+ Mutate(ch, &tmp, source_table, rand);
+ tmp.swap(*table);
+ source_table = table;
+ }
+ }
+
+ static void Mutate(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) {
+ switch (ch.kind) {
+ case Change::ADD:
+ AddChange(ch, table, source_table, rand);
+ break;
+ case Change::MODIFY:
+ ModifyChange(ch, table, source_table, rand);
+ break;
+ case Change::DELRANGE:
+ DeleteChange(ch, table, source_table, rand);
+ break;
+ case Change::COPY:
+ CopyChange(ch, table, source_table, rand);
+ break;
+ case Change::MOVE:
+ MoveChange(ch, table, source_table, rand);
+ break;
+ case Change::COPYOVER:
+ OverwriteChange(ch, table, source_table, rand);
+ break;
+ }
+ }
+
+ static void ModifyChange(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) {
+ xoff_t m_start = ch.addr1;
+ xoff_t m_end = m_start + ch.size;
+ xoff_t i_start = 0;
+ xoff_t i_end = 0;
+
+ for (ConstSegmentMapIterator iter(source_table->begin());
+ iter != source_table->end();
+ ++iter) {
+ const Segment &seg = iter->second;
+ i_start = iter->first;
+ i_end = i_start + seg.Size();
+
+ if (i_end <= m_start || i_start >= m_end) {
+ table->insert(table->end(), make_pair(i_start, seg));
+ continue;
+ }
+
+ if (i_start < m_start) {
+ table->insert(table->end(),
+ make_pair(i_start,
+ seg.Subseg(0, m_start - i_start)));
+ }
+
+ // Insert the entire segment, even though it may extend into later
+ // segments. This condition avoids inserting it during later
+ // segments.
+ if (m_start >= i_start) {
+ if (ch.insert != NULL) {
+ table->insert(table->end(), make_pair(m_start, *ch.insert));
+ } else {
+ Segment part(m_end - m_start, rand);
+ table->insert(table->end(), make_pair(m_start, part));
+ }
+ }
+
+ if (i_end > m_end) {
+ table->insert(table->end(),
+ make_pair(m_end,
+ seg.Subseg(m_end - i_start, i_end - m_end)));
+ }
+ }
+
+ // This check verifies that the modify does not extend past the
+ // source_table EOF.
+ CHECK_LE(m_end, i_end);
+ }
+
+ static void AddChange(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) {
+ xoff_t m_start = ch.addr1;
+ xoff_t i_start = 0;
+ xoff_t i_end = 0;
+
+ for (ConstSegmentMapIterator iter(source_table->begin());
+ iter != source_table->end();
+ ++iter) {
+ const Segment &seg = iter->second;
+ i_start = iter->first;
+ i_end = i_start + seg.Size();
+
+ if (i_end <= m_start) {
+ table->insert(table->end(), make_pair(i_start, seg));
+ continue;
+ }
+
+ if (i_start > m_start) {
+ table->insert(table->end(), make_pair(i_start + ch.size, seg));
+ continue;
+ }
+
+ if (i_start < m_start) {
+ table->insert(table->end(),
+ make_pair(i_start,
+ seg.Subseg(0, m_start - i_start)));
+ }
+
+ if (ch.insert != NULL) {
+ table->insert(table->end(), make_pair(m_start, *ch.insert));
+ } else {
+ Segment addseg(ch.size, rand);
+ table->insert(table->end(), make_pair(m_start, addseg));
+ }
+
+ if (m_start < i_end) {
+ table->insert(table->end(),
+ make_pair(m_start + ch.size,
+ seg.Subseg(m_start - i_start,
+ i_end - m_start)));
+ }
+ }
+
+ CHECK_LE(m_start, i_end);
+
+ // Special case for add at end-of-input.
+ if (m_start == i_end) {
+ Segment addseg(ch.size, rand);
+ table->insert(table->end(), make_pair(m_start, addseg));
+ }
+ }
+
+ static void DeleteChange(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) {
+ xoff_t m_start = ch.addr1;
+ xoff_t m_end = m_start + ch.size;
+ xoff_t i_start = 0;
+ xoff_t i_end = 0;
+
+ for (ConstSegmentMapIterator iter(source_table->begin());
+ iter != source_table->end();
+ ++iter) {
+ const Segment &seg = iter->second;
+ i_start = iter->first;
+ i_end = i_start + seg.Size();
+
+ if (i_end <= m_start) {
+ table->insert(table->end(), make_pair(i_start, seg));
+ continue;
+ }
+
+ if (i_start >= m_end) {
+ table->insert(table->end(), make_pair(i_start - ch.size, seg));
+ continue;
+ }
+
+ if (i_start < m_start) {
+ table->insert(table->end(),
+ make_pair(i_start,
+ seg.Subseg(0, m_start - i_start)));
+ }
+
+ if (i_end > m_end) {
+ table->insert(table->end(),
+ make_pair(m_end - ch.size,
+ seg.Subseg(m_end - i_start, i_end - m_end)));
+ }
+ }
+
+ CHECK_LT(m_start, i_end);
+ CHECK_LE(m_end, i_end);
+ }
+
+ // A move is a copy followed by delete of the copied-from range.
+ static void MoveChange(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) {
+ SegmentMap tmp;
+ CHECK_NE(ch.addr1, ch.addr2);
+ CopyChange(ch, &tmp, source_table, rand);
+ Change d(Change::DELRANGE, ch.size,
+ ch.addr1 < ch.addr2 ? ch.addr1 : ch.addr1 + ch.size);
+ DeleteChange(d, table, &tmp, rand);
+ }
+
+ // An overwrite is a copy followed by a delete of the copied-to range.
+ static void OverwriteChange(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) {
+ SegmentMap tmp;
+ CHECK_NE(ch.addr1, ch.addr2);
+ CopyChange(ch, &tmp, source_table, rand);
+ Change d(Change::DELRANGE, ch.size, ch.addr2 + ch.size);
+ DeleteChange(d, table, &tmp, rand);
+ }
+
+ static void CopyChange(const Change &ch,
+ SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *ignore) {
+ xoff_t m_start = ch.addr2;
+ xoff_t c_start = ch.addr1;
+ xoff_t i_start = 0;
+ xoff_t i_end = 0;
+
+ // Like AddChange() with AppendCopy instead of a random segment.
+ for (ConstSegmentMapIterator iter(source_table->begin());
+ iter != source_table->end();
+ ++iter) {
+ const Segment &seg = iter->second;
+ i_start = iter->first;
+ i_end = i_start + seg.Size();
+
+ if (i_end <= m_start) {
+ table->insert(table->end(), make_pair(i_start, seg));
+ continue;
+ }
+
+ if (i_start > m_start) {
+ table->insert(table->end(), make_pair(i_start + ch.size, seg));
+ continue;
+ }
+
+ if (i_start < m_start) {
+ table->insert(table->end(),
+ make_pair(i_start,
+ seg.Subseg(0, m_start - i_start)));
+ }
+
+ AppendCopy(table, source_table, c_start, m_start, ch.size);
+
+ if (m_start < i_end) {
+ table->insert(table->end(),
+ make_pair(m_start + ch.size,
+ seg.Subseg(m_start - i_start, i_end - m_start)));
+ }
+ }
+
+ CHECK_LE(m_start, i_end);
+
+ // Special case for copy to end-of-input.
+ if (m_start == i_end) {
+ AppendCopy(table, source_table, c_start, m_start, ch.size);
+ }
+ }
+
+ static void AppendCopy(SegmentMap *table,
+ const SegmentMap *source_table,
+ xoff_t copy_offset,
+ xoff_t append_offset,
+ xoff_t length) {
+ ConstSegmentMapIterator pos(source_table->upper_bound(copy_offset));
+ --pos;
+ xoff_t got = 0;
+
+ while (got < length) {
+ size_t seg_offset = copy_offset - pos->first;
+ size_t advance = min(pos->second.Size() - seg_offset,
+ (size_t)(length - got));
+
+ table->insert(table->end(),
+ make_pair(append_offset,
+ pos->second.Subseg(seg_offset,
+ advance)));
+
+ got += advance;
+ copy_offset += advance;
+ append_offset += advance;
+ ++pos;
+ }
+ }
+
+ ChangeList* Changes() {
+ return &cl_;
+ }
+
+ const ChangeList* Changes() const {
+ return &cl_;
+ }
+
+private:
+ ChangeList cl_;
+};
+
+class Modify1stByte : public Mutator {
+public:
+ void Mutate(SegmentMap *table,
+ const SegmentMap *source_table,
+ MTRandom *rand) const {
+ ChangeListMutator::Mutate(Change(Change::MODIFY, 1, 0),
+ table, source_table, rand);
+ }
+};
diff --git a/testing/random.h b/testing/random.h
new file mode 100644
index 0000000..8ff64db
--- /dev/null
+++ b/testing/random.h
@@ -0,0 +1,143 @@
+/* -*- Mode: C++ -*- */
+/* This is public-domain Mersenne Twister code,
+ * attributed to Michael Brundage. Thanks!
+ * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html
+ */
+#undef MT_LEN
+#undef MT_IA
+class MTRandom {
+ public:
+ enum Constants {
+ MT_LEN = 624,
+ MT_IA = 397
+ };
+
+ static const uint32_t TEST_SEED1;
+ static const uint32_t UPPER_MASK;
+ static const uint32_t LOWER_MASK;
+ static const uint32_t MATRIX_A;
+
+ MTRandom() {
+ Init(TEST_SEED1);
+ }
+
+ explicit MTRandom(uint32_t seed) {
+ Init(seed);
+ }
+
+ uint32_t Rand32 () {
+ uint32_t y;
+ static unsigned long mag01[2] = {
+ 0 , MATRIX_A
+ };
+
+ if (mt_index_ >= MT_LEN) {
+ int kk;
+
+ for (kk = 0; kk < MT_LEN - MT_IA; kk++) {
+ y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK);
+ mt_buffer_[kk] = mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+ for (;kk < MT_LEN - 1; kk++) {
+ y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK);
+ mt_buffer_[kk] = mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+ y = (mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt_buffer_[0] & LOWER_MASK);
+ mt_buffer_[MT_LEN - 1] = mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+ mt_index_ = 0;
+ }
+
+ y = mt_buffer_[mt_index_++];
+
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680UL;
+ y ^= (y << 15) & 0xefc60000UL;
+ y ^= (y >> 18);
+
+ return y;
+ }
+
+ uint32_t ExpRand32(uint32_t mean) {
+ double mean_d = mean;
+ double erand = log (1.0 / (Rand32() / (double)UINT32_MAX));
+ uint32_t x = (uint32_t) (mean_d * erand + 0.5);
+ return x;
+ }
+
+ uint64_t Rand64() {
+ return ((uint64_t)Rand32() << 32) | Rand32();
+ }
+
+ uint64_t ExpRand64(uint64_t mean) {
+ double mean_d = mean;
+ double erand = log (1.0 / (Rand64() / (double)UINT32_MAX));
+ uint64_t x = (uint64_t) (mean_d * erand + 0.5);
+ return x;
+ }
+
+ template <typename T>
+ T Rand() {
+ switch (sizeof(T)) {
+ case sizeof(uint32_t):
+ return Rand32();
+ case sizeof(uint64_t):
+ return Rand64();
+ default:
+ cerr << "Invalid sizeof T" << endl;
+ abort();
+ }
+ }
+
+ template <typename T>
+ T ExpRand(T mean) {
+ switch (sizeof(T)) {
+ case sizeof(uint32_t):
+ return ExpRand32(mean);
+ case sizeof(uint64_t):
+ return ExpRand64(mean);
+ default:
+ cerr << "Invalid sizeof T" << endl;
+ abort();
+ }
+ }
+
+ private:
+ void Init(uint32_t seed) {
+ mt_buffer_[0] = seed;
+ mt_index_ = MT_LEN;
+ for (int i = 1; i < MT_LEN; i++) {
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ mt_buffer_[i] =
+ (1812433253UL * (mt_buffer_[i-1] ^ (mt_buffer_[i-1] >> 30)) + i);
+ }
+ }
+
+ int mt_index_;
+ uint32_t mt_buffer_[MT_LEN];
+};
+
+const uint32_t MTRandom::TEST_SEED1 = 5489UL;
+const uint32_t MTRandom::UPPER_MASK = 0x80000000;
+const uint32_t MTRandom::LOWER_MASK = 0x7FFFFFFF;
+const uint32_t MTRandom::MATRIX_A = 0x9908B0DF;
+
+class MTRandom8 {
+public:
+ MTRandom8(MTRandom *rand)
+ : rand_(rand) {
+ }
+
+ uint8_t Rand8() {
+ uint32_t r = rand_->Rand32();
+
+ // TODO: make this use a single byte at a time?
+ return (r & 0xff) ^ (r >> 7) ^ (r >> 15) ^ (r >> 21);
+ }
+
+private:
+ MTRandom *rand_;
+};
diff --git a/testing/regtest.cc b/testing/regtest.cc
new file mode 100644
index 0000000..dd2ab8d
--- /dev/null
+++ b/testing/regtest.cc
@@ -0,0 +1,1306 @@
+/* -*- Mode: C++ -*- */
+#include "test.h"
+#include "random.h"
+#include "sizes.h"
+
+template <typename Constants>
+class Regtest {
+public:
+ typedef typename Constants::Sizes Sizes;
+
+ struct Options {
+ Options()
+ : encode_srcwin_maxsz(1<<20),
+ block_size(Constants::BLOCK_SIZE),
+ window_size(Constants::WINDOW_SIZE),
+ size_known(false),
+ iopt_size(XD3_DEFAULT_IOPT_SIZE),
+ smatch_cfg(XD3_SMATCH_DEFAULT) { }
+
+ xoff_t encode_srcwin_maxsz;
+ size_t block_size;
+ xoff_t window_size;
+ bool size_known;
+ usize_t iopt_size;
+ xd3_smatch_cfg smatch_cfg;
+ };
+
+#include "segment.h"
+#include "modify.h"
+#include "file.h"
+#include "cmp.h"
+#include "delta.h"
+
+ void InMemoryEncodeDecode(const FileSpec &source_file,
+ const FileSpec &target_file,
+ Block *coded_data,
+ const Options &options) {
+ xd3_stream encode_stream;
+ xd3_config encode_config;
+ xd3_source encode_source;
+
+ xd3_stream decode_stream;
+ xd3_config decode_config;
+ xd3_source decode_source;
+ xoff_t verified_bytes = 0;
+ xoff_t encoded_bytes = 0;
+
+ if (coded_data) {
+ coded_data->Reset();
+ }
+
+ memset(&encode_stream, 0, sizeof (encode_stream));
+ memset(&encode_source, 0, sizeof (encode_source));
+
+ memset(&decode_stream, 0, sizeof (decode_stream));
+ memset(&decode_source, 0, sizeof (decode_source));
+
+ xd3_init_config(&encode_config, XD3_ADLER32);
+ xd3_init_config(&decode_config, XD3_ADLER32);
+
+ encode_config.winsize = options.window_size;
+ encode_config.iopt_size = options.iopt_size;
+ encode_config.smatch_cfg = options.smatch_cfg;
+
+ CHECK_EQ(0, xd3_config_stream (&encode_stream, &encode_config));
+ CHECK_EQ(0, xd3_config_stream (&decode_stream, &decode_config));
+
+ encode_source.blksize = options.block_size;
+ decode_source.blksize = options.block_size;
+
+ encode_source.max_winsize = options.encode_srcwin_maxsz;
+ decode_source.max_winsize = options.encode_srcwin_maxsz;
+
+ if (!options.size_known)
+ {
+ xd3_set_source (&encode_stream, &encode_source);
+ xd3_set_source (&decode_stream, &decode_source);
+ }
+ else
+ {
+ xd3_set_source_and_size (&encode_stream, &encode_source,
+ source_file.Size());
+ xd3_set_source_and_size (&decode_stream, &decode_source,
+ source_file.Size());
+ }
+
+ BlockIterator source_iterator(source_file, options.block_size);
+ BlockIterator target_iterator(target_file, Constants::WINDOW_SIZE);
+ Block encode_source_block, decode_source_block;
+ Block decoded_block, target_block;
+ bool encoding = true;
+ bool done = false;
+ bool done_after_input = false;
+
+ IF_DEBUG1 (XPR(NTR "source %" Q "u[%" Z "u] target %" Q "u winsize %" Z "u\n",
+ source_file.Size(), options.block_size,
+ target_file.Size(),
+ Constants::WINDOW_SIZE));
+
+ while (!done) {
+ target_iterator.Get(&target_block);
+
+ xoff_t blks = target_iterator.Blocks();
+
+ IF_DEBUG2(XPR(NTR "target in %s: %" Q "u[%" Z "u] %" Q "u(%" Q "u) "
+ "verified %" Q "u\n",
+ encoding ? "encoding" : "decoding",
+ target_iterator.Offset(),
+ target_block.Size(),
+ target_iterator.Blkno(),
+ blks,
+ verified_bytes));
+
+ if (blks == 0 || target_iterator.Blkno() == (blks - 1)) {
+ xd3_set_flags(&encode_stream, XD3_FLUSH | encode_stream.flags);
+ }
+
+ xd3_avail_input(&encode_stream, target_block.Data(), target_block.Size());
+ encoded_bytes += target_block.Size();
+
+ process:
+ int ret;
+ const char *msg;
+ if (encoding) {
+ ret = xd3_encode_input(&encode_stream);
+ msg = encode_stream.msg;
+ } else {
+ ret = xd3_decode_input(&decode_stream);
+ msg = decode_stream.msg;
+ }
+ (void) msg;
+
+ switch (ret) {
+ case XD3_OUTPUT:
+ if (encoding) {
+ if (coded_data != NULL) {
+ // Optional encoded-output to the caller
+ coded_data->Append(encode_stream.next_out,
+ encode_stream.avail_out);
+ }
+ // Feed this data to the decoder.
+ xd3_avail_input(&decode_stream,
+ encode_stream.next_out,
+ encode_stream.avail_out);
+ xd3_consume_output(&encode_stream);
+ encoding = false;
+ } else {
+ decoded_block.Append(decode_stream.next_out,
+ decode_stream.avail_out);
+ xd3_consume_output(&decode_stream);
+ }
+ goto process;
+
+ case XD3_GETSRCBLK: {
+ xd3_source *src = (encoding ? &encode_source : &decode_source);
+ Block *block = (encoding ? &encode_source_block : &decode_source_block);
+ if (encoding) {
+ IF_DEBUG2(XPR(NTR "[srcblock] %" Q "u last srcpos %" Q "u "
+ "encodepos %" Q "u\n",
+ encode_source.getblkno,
+ encode_stream.match_last_srcpos,
+ encode_stream.input_position + encode_stream.total_in));
+ }
+
+ source_iterator.SetBlock(src->getblkno);
+ source_iterator.Get(block);
+ src->curblkno = src->getblkno;
+ src->onblk = block->Size();
+ src->curblk = block->Data();
+
+ goto process;
+ }
+
+ case XD3_INPUT:
+ if (!encoding) {
+ encoding = true;
+ goto process;
+ } else {
+ if (done_after_input) {
+ done = true;
+ continue;
+ }
+
+ if (target_block.Size() < target_iterator.BlockSize()) {
+ encoding = false;
+ } else {
+ target_iterator.Next();
+ }
+ continue;
+ }
+
+ case XD3_WINFINISH:
+ if (encoding) {
+ if (encode_stream.flags & XD3_FLUSH) {
+ done_after_input = true;
+ }
+ encoding = false;
+ } else {
+ CHECK_EQ(0, CmpDifferentBlockBytesAtOffset(decoded_block,
+ target_file,
+ verified_bytes));
+ verified_bytes += decoded_block.Size();
+ decoded_block.Reset();
+ encoding = true;
+ }
+ goto process;
+
+ case XD3_WINSTART:
+ case XD3_GOTHEADER:
+ goto process;
+
+ default:
+ XPR(NTR "%s = %s %s\n", encoding ? "E " : " D",
+ xd3_strerror(ret),
+ msg == NULL ? "" : msg);
+
+ CHECK_EQ(0, ret);
+ CHECK_EQ(-1, ret);
+ }
+ }
+
+ CHECK_EQ(target_file.Size(), encoded_bytes);
+ CHECK_EQ(target_file.Size(), verified_bytes);
+ CHECK_EQ(0, xd3_close_stream(&decode_stream));
+ CHECK_EQ(0, xd3_close_stream(&encode_stream));
+ xd3_free_stream(&encode_stream);
+ xd3_free_stream(&decode_stream);
+ }
+
+ void MainEncodeDecode(const TmpFile &source_file,
+ const TmpFile &target_file,
+ ExtFile *coded_data,
+ const Options &options) {
+ vector<const char*> ecmd;
+ char bbuf[16];
+ snprintf(bbuf, sizeof(bbuf), "-B%" Q "u", options.encode_srcwin_maxsz);
+ ecmd.push_back("xdelta3");
+ ecmd.push_back(bbuf);
+ ecmd.push_back("-s");
+ ecmd.push_back(source_file.Name());
+ ecmd.push_back(target_file.Name());
+ ecmd.push_back(coded_data->Name());
+ ecmd.push_back(NULL);
+
+ CHECK_EQ(0, xd3_main_cmdline(ecmd.size() - 1,
+ const_cast<char**>(&ecmd[0])));
+
+ vector<const char*> dcmd;
+ ExtFile recon_file;
+ dcmd.push_back("xdelta3");
+ ecmd.push_back(bbuf);
+ dcmd.push_back("-d");
+ dcmd.push_back("-s");
+ dcmd.push_back(source_file.Name());
+ dcmd.push_back(coded_data->Name());
+ dcmd.push_back(recon_file.Name());
+ dcmd.push_back(NULL);
+
+ CHECK_EQ(0, xd3_main_cmdline(dcmd.size() - 1,
+ const_cast<char**>(&dcmd[0])));
+
+ CHECK_EQ(0, test_compare_files(recon_file.Name(),
+ target_file.Name()));
+ }
+
+ // Similar to xd3_process_memory, with support for test Options.
+ // Exercises xd3_process_stream.
+ int TestProcessMemory (int is_encode,
+ int (*func) (xd3_stream *),
+ const uint8_t *input,
+ usize_t input_size,
+ const uint8_t *source,
+ usize_t source_size,
+ uint8_t *output,
+ usize_t *output_size,
+ usize_t output_size_max,
+ const Options &options) {
+ xd3_stream stream;
+ xd3_config config;
+ xd3_source src;
+ int ret;
+
+ memset (& stream, 0, sizeof (stream));
+ memset (& config, 0, sizeof (config));
+
+ if (is_encode)
+ {
+ config.winsize = input_size;
+ config.iopt_size = options.iopt_size;
+ config.sprevsz = xd3_pow2_roundup (config.winsize);
+ }
+
+ if ((ret = xd3_config_stream (&stream, &config)) != 0)
+ {
+ goto exit;
+ }
+
+ if (source != NULL)
+ {
+ memset (& src, 0, sizeof (src));
+
+ src.blksize = source_size;
+ src.onblk = source_size;
+ src.curblk = source;
+ src.curblkno = 0;
+ src.max_winsize = source_size;
+
+ if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0)
+ {
+ goto exit;
+ }
+ }
+
+ if ((ret = xd3_process_stream (is_encode,
+ & stream,
+ func, 1,
+ input, input_size,
+ output,
+ output_size,
+ output_size_max)) != 0)
+ {
+ goto exit;
+ }
+
+ exit:
+ if (ret != 0)
+ {
+ IF_DEBUG2 (DP(RINT "test_process_memory: %d: %s\n", ret, stream.msg));
+ }
+ xd3_free_stream(&stream);
+ return ret;
+ }
+
+ void EncodeDecodeAPI(const FileSpec &spec0, const FileSpec &spec1,
+ Block *delta, const Options &options) {
+ Block from;
+ Block to;
+ spec0.Get(&from, 0, spec0.Size());
+ spec1.Get(&to, 0, spec1.Size());
+
+ delta->SetSize(to.Size() * 1.5);
+ usize_t out_size;
+ int enc_ret = TestProcessMemory(true,
+ &xd3_encode_input,
+ to.Data(),
+ to.Size(),
+ from.Data(),
+ from.Size(),
+ delta->Data(),
+ &out_size,
+ delta->Size(),
+ options);
+ CHECK_EQ(0, enc_ret);
+ delta->SetSize(out_size);
+
+ Block recon;
+ recon.SetSize(to.Size());
+ usize_t recon_size;
+ int dec_ret = xd3_decode_memory(delta->Data(),
+ delta->Size(),
+ from.Data(),
+ from.Size(),
+ recon.Data(),
+ &recon_size,
+ recon.Size(),
+ 0);
+ CHECK_EQ(0, dec_ret);
+ CHECK_EQ(0, CmpDifferentBlockBytes(to, recon));
+ }
+
+//////////////////////////////////////////////////////////////////////
+
+void TestPrintf() {
+ char buf[64];
+ xoff_t x = XOFF_T_MAX;
+ snprintf_func (buf, sizeof(buf), "%" Q "u", x);
+ const char *expect = XD3_USE_LARGEFILE64 ?
+ "18446744073709551615" : "4294967295";
+ XD3_ASSERT(strcmp (buf, expect) == 0);
+}
+
+void TestRandomNumbers() {
+ MTRandom rand;
+ int rounds = 1<<20;
+ uint64_t usum = 0;
+ uint64_t esum = 0;
+
+ for (int i = 0; i < rounds; i++) {
+ usum += rand.Rand32();
+ esum += rand.ExpRand32(1024);
+ }
+
+ double allowed_error = 0.01;
+
+ uint32_t umean = usum / rounds;
+ uint32_t emean = esum / rounds;
+
+ uint32_t uexpect = UINT32_MAX / 2;
+ uint32_t eexpect = 1024;
+
+ if (umean < uexpect * (1.0 - allowed_error) ||
+ umean > uexpect * (1.0 + allowed_error)) {
+ XPR(NT "uniform mean error: %u != %u\n", umean, uexpect);
+ abort();
+ }
+
+ if (emean < eexpect * (1.0 - allowed_error) ||
+ emean > eexpect * (1.0 + allowed_error)) {
+ XPR(NT "exponential mean error: %u != %u\n", emean, eexpect);
+ abort();
+ }
+}
+
+void TestRandomFile() {
+ MTRandom rand1;
+ FileSpec spec1(&rand1);
+ BlockIterator bi(spec1);
+
+ spec1.GenerateFixedSize(0);
+ CHECK_EQ(0, spec1.Size());
+ CHECK_EQ(0, spec1.Segments());
+ CHECK_EQ(0, spec1.Blocks());
+ bi.SetBlock(0);
+ CHECK_EQ(0, bi.BytesOnBlock());
+
+ spec1.GenerateFixedSize(1);
+ CHECK_EQ(1, spec1.Size());
+ CHECK_EQ(1, spec1.Segments());
+ CHECK_EQ(1, spec1.Blocks());
+ bi.SetBlock(0);
+ CHECK_EQ(1, bi.BytesOnBlock());
+
+ spec1.GenerateFixedSize(Constants::BLOCK_SIZE);
+ CHECK_EQ(Constants::BLOCK_SIZE, spec1.Size());
+ CHECK_EQ(1, spec1.Segments());
+ CHECK_EQ(1, spec1.Blocks());
+ bi.SetBlock(0);
+ CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+ bi.SetBlock(1);
+ CHECK_EQ(0, bi.BytesOnBlock());
+
+ spec1.GenerateFixedSize(Constants::BLOCK_SIZE + 1);
+ CHECK_EQ(Constants::BLOCK_SIZE + 1, spec1.Size());
+ CHECK_EQ(2, spec1.Segments());
+ CHECK_EQ(2, spec1.Blocks());
+ bi.SetBlock(0);
+ CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+ bi.SetBlock(1);
+ CHECK_EQ(1, bi.BytesOnBlock());
+
+ spec1.GenerateFixedSize(Constants::BLOCK_SIZE * 2);
+ CHECK_EQ(Constants::BLOCK_SIZE * 2, spec1.Size());
+ CHECK_EQ(2, spec1.Segments());
+ CHECK_EQ(2, spec1.Blocks());
+ bi.SetBlock(0);
+ CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+ bi.SetBlock(1);
+ CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock());
+}
+
+void TestFirstByte() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(0);
+ spec1.GenerateFixedSize(1);
+ CHECK_EQ(0, CmpDifferentBytes(spec0, spec0));
+ CHECK_EQ(0, CmpDifferentBytes(spec1, spec1));
+ CHECK_EQ(1, CmpDifferentBytes(spec0, spec1));
+ CHECK_EQ(1, CmpDifferentBytes(spec1, spec0));
+
+ spec0.GenerateFixedSize(1);
+ spec0.ModifyTo(Modify1stByte(), &spec1);
+ CHECK_EQ(1, CmpDifferentBytes(spec0, spec1));
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE + 1);
+ spec0.ModifyTo(Modify1stByte(), &spec1);
+ CHECK_EQ(1, CmpDifferentBytes(spec0, spec1));
+
+ SizeIterator<size_t, Sizes> si(&rand, Constants::TEST_ROUNDS);
+
+ for (; !si.Done(); si.Next()) {
+ size_t size = si.Get();
+ if (size == 0) {
+ continue;
+ }
+ spec0.GenerateFixedSize(size);
+ spec0.ModifyTo(Modify1stByte(), &spec1);
+ InMemoryEncodeDecode(spec0, spec1, NULL, Options());
+ }
+}
+
+void TestModifyMutator() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+ struct {
+ size_t size;
+ size_t addr;
+ } test_cases[] = {
+ { Constants::BLOCK_SIZE, 0 },
+ { Constants::BLOCK_SIZE / 2, 1 },
+ { Constants::BLOCK_SIZE, 1 },
+ { Constants::BLOCK_SIZE * 2, 1 },
+ };
+
+ for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+ ChangeList cl1;
+ cl1.push_back(Change(Change::MODIFY, test_cases[i].size,
+ test_cases[i].addr));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size(), spec1.Size());
+
+ size_t diff = CmpDifferentBytes(spec0, spec1);
+ CHECK_LE(diff, test_cases[i].size);
+
+ // There is a 1/256 probability of the changed byte matching the
+ // original value. The following allows double the probability to
+ // pass.
+ CHECK_GE(diff, test_cases[i].size - (2 * test_cases[i].size / 256));
+
+ InMemoryEncodeDecode(spec0, spec1, NULL, Options());
+ }
+}
+
+void TestAddMutator() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 2);
+ // TODO: fix this test (for all block sizes)! it's broken because
+ // the same byte could be added?
+
+ struct {
+ size_t size;
+ size_t addr;
+ size_t expected_adds;
+ } test_cases[] = {
+ { 1, 0, 2 /* 1st byte, last byte (short block) */ },
+ { 1, 1, 3 /* 1st 2 bytes, last byte */ },
+ { 1, Constants::BLOCK_SIZE - 1, 2 /* changed, last */ },
+ { 1, Constants::BLOCK_SIZE, 2 /* changed, last */ },
+ { 1, Constants::BLOCK_SIZE + 1, 3 /* changed + 1st of 2nd block, last */ },
+ { 1, 2 * Constants::BLOCK_SIZE, 1 /* last byte */ },
+ };
+
+ for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+ ChangeList cl1;
+ cl1.push_back(Change(Change::ADD, test_cases[i].size, test_cases[i].addr));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size());
+
+ Block coded;
+ InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+ Delta delta(coded);
+ CHECK_EQ(test_cases[i].expected_adds,
+ delta.AddedBytes());
+ }
+}
+
+void TestDeleteMutator() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 4);
+
+ struct {
+ size_t size;
+ size_t addr;
+ } test_cases[] = {
+ // Note: an entry { Constants::BLOCK_SIZE, 0 },
+ // does not work because the xd3_srcwin_move_point logic won't
+ // find a copy if it occurs >= double its size into the file.
+ { Constants::BLOCK_SIZE / 2, 0 },
+ { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2 },
+ { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE / 2 },
+ { Constants::BLOCK_SIZE * 2, Constants::BLOCK_SIZE * 3 / 2 },
+ { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2 },
+ };
+
+ for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+ ChangeList cl1;
+ cl1.push_back(Change(Change::DELRANGE, test_cases[i].size,
+ test_cases[i].addr));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size() - test_cases[i].size, spec1.Size());
+
+ Block coded;
+ InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+ Delta delta(coded);
+ CHECK_EQ(0, delta.AddedBytes());
+ }
+}
+
+void TestCopyMutator() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+ struct {
+ size_t size;
+ size_t from;
+ size_t to;
+ } test_cases[] = {
+ // Copy is difficult to write tests for because where Xdelta finds
+ // copies, it does not enter checksums. So these tests copy data from
+ // later to earlier so that checksumming will start.
+ { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 },
+ { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE,
+ Constants::BLOCK_SIZE, },
+ };
+
+ for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+ ChangeList cl1;
+ cl1.push_back(Change(Change::COPY, test_cases[i].size,
+ test_cases[i].from, test_cases[i].to));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size());
+
+ Block coded;
+ InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+ Delta delta(coded);
+ CHECK_EQ(0, delta.AddedBytes());
+ }
+}
+
+void TestMoveMutator() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+ struct {
+ size_t size;
+ size_t from;
+ size_t to;
+ } test_cases[] = {
+ // This is easier to test than Copy but has the same trouble as Delete.
+ { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 },
+ { Constants::BLOCK_SIZE / 2, 0, Constants::BLOCK_SIZE / 2 },
+ { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE, 2 *
+ Constants::BLOCK_SIZE },
+ { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE,
+ Constants::BLOCK_SIZE },
+ { Constants::BLOCK_SIZE * 3 / 2, Constants::BLOCK_SIZE,
+ Constants::BLOCK_SIZE * 3 / 2 },
+
+ // This is a no-op
+ { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2,
+ 3 * Constants::BLOCK_SIZE },
+ };
+
+ for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) {
+ ChangeList cl1;
+ cl1.push_back(Change(Change::MOVE, test_cases[i].size,
+ test_cases[i].from, test_cases[i].to));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size(), spec1.Size());
+
+ Block coded;
+ InMemoryEncodeDecode(spec0, spec1, &coded, Options());
+
+ Delta delta(coded);
+ CHECK_EQ(0, delta.AddedBytes());
+ }
+}
+
+void TestOverwriteMutator() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE);
+
+ ChangeList cl1;
+ cl1.push_back(Change(Change::COPYOVER, 10, 0, 20));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size(), spec1.Size());
+
+ Block b0, b1;
+ BlockIterator(spec0).Get(&b0);
+ BlockIterator(spec1).Get(&b1);
+
+ CHECK(memcmp(b0.Data(), b1.Data() + 20, 10) == 0);
+ CHECK(memcmp(b0.Data(), b1.Data(), 20) == 0);
+ CHECK(memcmp(b0.Data() + 30, b1.Data() + 30,
+ Constants::BLOCK_SIZE - 30) == 0);
+
+ xoff_t zero = 0;
+ cl1.clear();
+ cl1.push_back(Change(Change::COPYOVER, 10, 20, zero));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ CHECK_EQ(spec0.Size(), spec1.Size());
+
+ BlockIterator(spec0).Get(&b0);
+ BlockIterator(spec1).Get(&b1);
+
+ CHECK(memcmp(b0.Data() + 20, b1.Data(), 10) == 0);
+ CHECK(memcmp(b0.Data() + 10, b1.Data() + 10,
+ Constants::BLOCK_SIZE - 10) == 0);
+}
+
+// Note: this test is written to expose a problem, but the problem was
+// only exposed with BLOCK_SIZE = 128.
+void TestNonBlocking() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+ FileSpec spec2(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3);
+
+ // This is a lazy target match
+ Change ct(Change::COPYOVER, 22,
+ Constants::BLOCK_SIZE + 50,
+ Constants::BLOCK_SIZE + 20);
+
+ // This is a source match just after the block boundary, shorter
+ // than the lazy target match.
+ Change cs1(Change::COPYOVER, 16,
+ Constants::BLOCK_SIZE + 51,
+ Constants::BLOCK_SIZE - 1);
+
+ // This overwrites the original source bytes.
+ Change cs2(Change::MODIFY, 108,
+ Constants::BLOCK_SIZE + 20);
+
+ // This changes the first blocks
+ Change c1st(Change::MODIFY, Constants::BLOCK_SIZE - 2, 0);
+
+ ChangeList csl;
+ csl.push_back(cs1);
+ csl.push_back(cs2);
+ csl.push_back(c1st);
+
+ spec0.ModifyTo(ChangeListMutator(csl), &spec1);
+
+ ChangeList ctl;
+ ctl.push_back(ct);
+ ctl.push_back(c1st);
+
+ spec0.ModifyTo(ChangeListMutator(ctl), &spec2);
+
+ InMemoryEncodeDecode(spec1, spec2, NULL, Options());
+}
+
+void TestEmptyInMemory() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+ Block block;
+
+ spec0.GenerateFixedSize(0);
+ spec1.GenerateFixedSize(0);
+
+ InMemoryEncodeDecode(spec0, spec1, &block, Options());
+
+ Delta delta(block);
+ CHECK_LT(0, block.Size());
+ CHECK_EQ(1, delta.Windows());
+}
+
+void TestBlockInMemory() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+ Block block;
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE);
+ spec1.GenerateFixedSize(Constants::BLOCK_SIZE);
+
+ InMemoryEncodeDecode(spec0, spec1, &block, Options());
+
+ Delta delta(block);
+ CHECK_EQ(spec1.Blocks(Constants::WINDOW_SIZE), delta.Windows());
+}
+
+void TestSmallStride() {
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ usize_t size = Constants::BLOCK_SIZE * 4;
+ spec0.GenerateFixedSize(size);
+
+ // Note: Not very good performance due to hash collisions, note 3x
+ // multiplier below.
+ for (int s = 15; s < 101; s++) {
+ usize_t changes = 0;
+ ChangeList cl;
+ for (usize_t j = s; j < size; j += s, ++changes)
+ {
+ cl.push_back(Change(Change::MODIFY, 1, j));
+ }
+
+ FileSpec spec1(&rand);
+ spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+ Options options;
+ options.encode_srcwin_maxsz = size;
+ options.iopt_size = 128;
+ options.smatch_cfg = XD3_SMATCH_SLOW;
+ options.size_known = false;
+
+ Block block;
+ InMemoryEncodeDecode(spec0, spec1, &block, options);
+ Delta delta(block);
+
+ IF_DEBUG1(DP(RINT "[stride=%d] changes=%" W "u adds=%" Q "u\n",
+ s, changes, delta.AddedBytes()));
+ double allowance = Constants::BLOCK_SIZE < 8192 || s < 30 ? 3.0 : 1.1;
+ CHECK_GE(allowance * changes, (double)delta.AddedBytes());
+ }
+}
+
+void TestCopyWindow() {
+ // Construct an input that has many copies, to fill the IOPT buffer
+ // and force a source window decision. "srclen" may be set to a
+ // value that goes beyond the end-of-source.
+ const int clen = 16;
+ const int size = 4096;
+ const int nmov = size / clen;
+ const int iters = 16;
+ uint32_t added_01 = 0;
+ uint32_t added_10 = 0;
+ for (int i = 1; i <= iters; i++) {
+ MTRandom rand(MTRandom::TEST_SEED1 * i);
+ FileSpec spec0(&rand);
+ ChangeList cl;
+
+ spec0.GenerateFixedSize(size);
+
+ for (int j = 0; j < nmov; j += 2)
+ {
+ cl.push_back(Change(Change::MOVE,
+ clen, (j + 1) * clen, j * clen));
+ }
+
+ FileSpec spec1(&rand);
+ spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+ Options options;
+ options.encode_srcwin_maxsz = size;
+ options.iopt_size = 128;
+ options.smatch_cfg = XD3_SMATCH_SLOW;
+
+ Block block1;
+ InMemoryEncodeDecode(spec0, spec1, &block1, options);
+ Delta delta1(block1);
+ // Allow one missed window (e.g., hash collisions)
+ added_01 += delta1.AddedBytes();
+
+ Block block2;
+ InMemoryEncodeDecode(spec1, spec0, &block2, options);
+ Delta delta2(block2);
+ // Allow one missed window (e.g., hash collisions)
+ added_10 += delta2.AddedBytes();
+
+ Block block3;
+ Block block4;
+ EncodeDecodeAPI(spec0, spec1, &block3, options);
+ EncodeDecodeAPI(spec1, spec0, &block4, options);
+ }
+ // Average less than 0.5 misses (of length clen) per iteration.
+ CHECK_GE(clen * iters / 2, added_01);
+ CHECK_GE(clen * iters / 2, added_10);
+}
+
+void TestCopyFromEnd() {
+ // Copies from the end of the source buffer, which reach a block
+ // boundary end-of-file.
+ const int size = 4096;
+ const int clen = 16;
+ const int nmov = (size / 2) / clen;
+ const int iters = 16;
+ uint32_t added_01 = 0;
+ uint32_t added_10 = 0;
+ for (int i = 1; i <= iters; i++) {
+ MTRandom rand(MTRandom::TEST_SEED1 * i);
+ FileSpec spec0(&rand);
+ ChangeList cl;
+
+ spec0.GenerateFixedSize(size);
+
+ cl.push_back(Change(Change::MODIFY, 2012, 2048));
+
+ for (int j = 0; j < nmov; j += 2)
+ {
+ cl.push_back(Change(Change::MOVE,
+ clen, (j + 1) * clen, j * clen));
+ }
+
+ cl.push_back(Change(Change::COPYOVER, 28, 4068, 3000));
+ cl.push_back(Change(Change::COPYOVER, 30, 4066, 3100));
+ cl.push_back(Change(Change::COPYOVER, 32, 4064, 3200));
+
+ FileSpec spec1(&rand);
+ spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+ Options options;
+ options.encode_srcwin_maxsz = size;
+ options.iopt_size = 128;
+ options.smatch_cfg = XD3_SMATCH_SLOW;
+
+ Block block1;
+ InMemoryEncodeDecode(spec0, spec1, &block1, options);
+ Delta delta1(block1);
+ added_01 += delta1.AddedBytes();
+
+ Block block2;
+ InMemoryEncodeDecode(spec1, spec0, &block2, options);
+ Delta delta2(block2);
+ added_10 += delta2.AddedBytes();
+
+ Block block3;
+ Block block4;
+ EncodeDecodeAPI(spec0, spec1, &block3, options);
+ EncodeDecodeAPI(spec1, spec0, &block4, options);
+ }
+ CHECK_GE(2000 * iters, added_01);
+ CHECK_LE(2000 * iters, added_10);
+}
+
+void TestHalfBlockCopy() {
+ // Create a half-block copy, 7.5 blocks apart, in a pair of files:
+ // 0 1 ... 6 7
+ // spec0 [bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb][ccccc][bbbb_]
+ // spec1 [aaaaa][ccccc][aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_]
+ // where stage=
+ // 0: the final block is full
+ // a. (source)spec1->(target)spec0 copies block C: reads 8 source
+ // blocks during target block 0.
+ // b. (source)spec0->(target)spec1 does not copy block C b/c attempt
+ // to read past EOF empties block 0 from (virtual) block cache
+ // 1: the final block is less than full.
+ // a. (same) copies block C
+ // b. (same) copies block C, unlike 0a, no attempt to read past EOF
+ //
+ // "virtual" above refers to XD3_TOOFARBACK, since there is no caching
+ // in the API, there is simply a promise not to request blocks that are
+ // beyond source->max_winsize from the last known source file position.
+ for (int stage = 0; stage < 2; stage++)
+ {
+ IF_DEBUG1 (DP(RINT "half_block_copy stage %d\n", stage));
+
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+
+ spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 8 - stage);
+
+ ChangeList cl1;
+ cl1.push_back(Change(Change::MODIFY,
+ Constants::BLOCK_SIZE / 2, // size
+ 0));
+ cl1.push_back(Change(Change::COPYOVER,
+ Constants::BLOCK_SIZE / 2, // size
+ Constants::BLOCK_SIZE * 7, // offset
+ Constants::BLOCK_SIZE / 2));
+ cl1.push_back(Change(Change::MODIFY,
+ Constants::BLOCK_SIZE * 7,
+ Constants::BLOCK_SIZE - stage));
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+
+ Options options;
+ options.encode_srcwin_maxsz = Constants::BLOCK_SIZE * 8;
+
+ Block block0;
+ Block block1;
+ InMemoryEncodeDecode(spec0, spec1, &block0, options);
+ InMemoryEncodeDecode(spec1, spec0, &block1, options);
+
+ Delta delta0(block0);
+ Delta delta1(block1);
+
+ const int yes =
+ Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2;
+ const int no =
+ Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2;
+
+ if (stage == 0)
+ {
+ CHECK_EQ(yes, delta0.AddedBytes());
+ CHECK_EQ(no, delta1.AddedBytes());
+ }
+ else
+ {
+ CHECK_EQ(yes, delta0.AddedBytes());
+ CHECK_EQ(yes, delta1.AddedBytes());
+ }
+ }
+}
+
+void FourWayMergeTest(const FileSpec &spec0,
+ const FileSpec &spec1,
+ const FileSpec &spec2,
+ const FileSpec &spec3) {
+ TmpFile f0, f1, f2, f3;
+ ExtFile d01, d12, d23;
+ Options options;
+ options.encode_srcwin_maxsz =
+ std::max(spec0.Size(), options.encode_srcwin_maxsz);
+
+ spec0.WriteTmpFile(&f0);
+ spec1.WriteTmpFile(&f1);
+ spec2.WriteTmpFile(&f2);
+ spec3.WriteTmpFile(&f3);
+
+ MainEncodeDecode(f0, f1, &d01, options);
+ MainEncodeDecode(f1, f2, &d12, options);
+ MainEncodeDecode(f2, f3, &d23, options);
+
+ // Merge 2
+ ExtFile out;
+ vector<const char*> mcmd;
+ mcmd.push_back("xdelta3");
+ mcmd.push_back("merge");
+ mcmd.push_back("-m");
+ mcmd.push_back(d01.Name());
+ mcmd.push_back(d12.Name());
+ mcmd.push_back(out.Name());
+ mcmd.push_back(NULL);
+
+ // XPR(NTR "Running one merge: %s\n", CommandToString(mcmd).c_str());
+ CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1,
+ const_cast<char**>(&mcmd[0])));
+
+ ExtFile recon;
+ vector<const char*> tcmd;
+ tcmd.push_back("xdelta3");
+ tcmd.push_back("-d");
+ tcmd.push_back("-s");
+ tcmd.push_back(f0.Name());
+ tcmd.push_back(out.Name());
+ tcmd.push_back(recon.Name());
+ tcmd.push_back(NULL);
+
+ // XPR(NTR "Running one recon! %s\n", CommandToString(tcmd).c_str());
+ CHECK_EQ(0, xd3_main_cmdline(tcmd.size() - 1,
+ const_cast<char**>(&tcmd[0])));
+ // XPR(NTR "Should equal! %s\n", f2.Name());
+
+ CHECK(recon.EqualsSpec(spec2));
+
+ // Merge 3
+ ExtFile out3;
+ vector<const char*> mcmd3;
+ mcmd3.push_back("xdelta3");
+ mcmd3.push_back("merge");
+ mcmd3.push_back("-m");
+ mcmd3.push_back(d01.Name());
+ mcmd3.push_back("-m");
+ mcmd3.push_back(d12.Name());
+ mcmd3.push_back(d23.Name());
+ mcmd3.push_back(out3.Name());
+ mcmd3.push_back(NULL);
+
+ // XPR(NTR "Running one 3-merge: %s\n", CommandToString(mcmd3).c_str());
+ CHECK_EQ(0, xd3_main_cmdline(mcmd3.size() - 1,
+ const_cast<char**>(&mcmd3[0])));
+
+ ExtFile recon3;
+ vector<const char*> tcmd3;
+ tcmd3.push_back("xdelta3");
+ tcmd3.push_back("-d");
+ tcmd3.push_back("-s");
+ tcmd3.push_back(f0.Name());
+ tcmd3.push_back(out3.Name());
+ tcmd3.push_back(recon3.Name());
+ tcmd3.push_back(NULL);
+
+ // XPR(NTR "Running one 3-recon %s\n", CommandToString(tcmd3).c_str());
+ CHECK_EQ(0, xd3_main_cmdline(tcmd3.size() - 1,
+ const_cast<char**>(&tcmd3[0])));
+ // XPR(NTR "Should equal %s\n", f3.Name());
+
+ CHECK(recon3.EqualsSpec(spec3));
+}
+
+void TestMergeCommand1() {
+ /* Repeat random-input testing for a number of iterations.
+ * Test 2, 3, and 4-file scenarios (i.e., 1, 2, and 3-delta merges). */
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+ FileSpec spec2(&rand);
+ FileSpec spec3(&rand);
+
+ SizeIterator<size_t, Sizes> si0(&rand, 10);
+
+ for (; !si0.Done(); si0.Next()) {
+ size_t size0 = si0.Get();
+
+ SizeIterator<size_t, Sizes> si1(&rand, 10);
+ for (; !si1.Done(); si1.Next()) {
+ size_t change1 = si1.Get();
+
+ if (change1 == 0) {
+ continue;
+ }
+
+ // XPR(NTR "S0 = %lu\n", size0);
+ // XPR(NTR "C1 = %lu\n", change1);
+ // XPR(NTR ".");
+
+ size_t add1_pos = size0 ? rand.Rand32() % size0 : 0;
+ size_t del2_pos = size0 ? rand.Rand32() % size0 : 0;
+
+ spec0.GenerateFixedSize(size0);
+
+ ChangeList cl1, cl2, cl3;
+
+ size_t change3 = change1;
+ size_t change3_pos;
+
+ if (change3 >= size0) {
+ change3 = size0;
+ change3_pos = 0;
+ } else {
+ change3_pos = rand.Rand32() % (size0 - change3);
+ }
+
+ cl1.push_back(Change(Change::ADD, change1, add1_pos));
+ cl2.push_back(Change(Change::DELRANGE, change1, del2_pos));
+ cl3.push_back(Change(Change::MODIFY, change3, change3_pos));
+
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ spec1.ModifyTo(ChangeListMutator(cl2), &spec2);
+ spec2.ModifyTo(ChangeListMutator(cl3), &spec3);
+
+ FourWayMergeTest(spec0, spec1, spec2, spec3);
+ FourWayMergeTest(spec3, spec2, spec1, spec0);
+ }
+ }
+}
+
+void TestMergeCommand2() {
+ /* Same as above, different mutation pattern. */
+ /* TODO: run this with large sizes too */
+ /* TODO: run this with small sizes too */
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+ FileSpec spec2(&rand);
+ FileSpec spec3(&rand);
+
+ SizeIterator<size_t, Sizes> si0(&rand, 10);
+ for (; !si0.Done(); si0.Next()) {
+ size_t size0 = si0.Get();
+
+ SizeIterator<size_t, Sizes> si1(&rand, 10);
+ for (; !si1.Done(); si1.Next()) {
+ size_t size1 = si1.Get();
+
+ SizeIterator<size_t, Sizes> si2(&rand, 10);
+ for (; !si2.Done(); si2.Next()) {
+ size_t size2 = si2.Get();
+
+ SizeIterator<size_t, Sizes> si3(&rand, 10);
+ for (; !si3.Done(); si3.Next()) {
+ size_t size3 = si3.Get();
+
+ // We're only interested in three sizes, strictly decreasing. */
+ if (size3 >= size2 || size2 >= size1 || size1 >= size0) {
+ continue;
+ }
+
+ // XPR(NTR "S0 = %lu\n", size0);
+ // XPR(NTR "S1 = %lu\n", size1);
+ // XPR(NTR "S2 = %lu\n", size2);
+ // XPR(NTR "S3 = %lu\n", size3);
+ // XPR(NTR ".");
+
+ spec0.GenerateFixedSize(size0);
+
+ ChangeList cl1, cl2, cl3;
+
+ cl1.push_back(Change(Change::DELRANGE, size0 - size1, 0));
+ cl2.push_back(Change(Change::DELRANGE, size0 - size2, 0));
+ cl3.push_back(Change(Change::DELRANGE, size0 - size3, 0));
+
+ spec0.ModifyTo(ChangeListMutator(cl1), &spec1);
+ spec0.ModifyTo(ChangeListMutator(cl2), &spec2);
+ spec0.ModifyTo(ChangeListMutator(cl3), &spec3);
+
+ FourWayMergeTest(spec0, spec1, spec2, spec3);
+ FourWayMergeTest(spec3, spec2, spec1, spec0);
+ }
+ }
+ }
+ }
+}
+
+void TestLastFrontierBlock() {
+ // This test constructs an input that can expose
+ // https://github.com/jmacd/xdelta/issues/188
+ // when run through the command-line with source via a FIFO.
+ // That is not tested here, but the test stays.
+ if (Constants::WINDOW_SIZE < XD3_ALLOCSIZE)
+ {
+ return;
+ }
+
+ MTRandom rand;
+ FileSpec spec0(&rand);
+ FileSpec spec1(&rand);
+ const xoff_t size = XD3_ALLOCSIZE * 64; // == XD3_MINSRCWINSZ * 2
+ const xoff_t edit = XD3_ALLOCSIZE;
+
+ Options options;
+ options.encode_srcwin_maxsz = XD3_MINSRCWINSZ;
+ options.block_size = XD3_ALLOCSIZE;
+ options.window_size = XD3_MINSRCWINSZ;
+ options.size_known = false;
+
+ spec0.GenerateFixedSize(size);
+
+ ChangeList cl;
+
+ // Modify the 0th byte in order to induce indexing of subsequent
+ // bytes, but allow copying most of the file to keep the test fast.
+ cl.push_back(Change(Change::MODIFY, 1, edit * 31));
+ cl.push_back(Change(Change::COPYOVER, edit, edit * 31, edit * 63));
+
+ spec0.ModifyTo(ChangeListMutator(cl), &spec1);
+
+ Block noblock;
+ InMemoryEncodeDecode(spec0, spec1, &noblock, options);
+ InMemoryEncodeDecode(spec1, spec0, &noblock, options);
+}
+
+}; // class Regtest<Constants>
+
+#define TEST(x) XPR(NTR #x "...\n"); regtest.x()
+
+// These tests are primarily tests of the testing framework itself.
+template <class T>
+void UnitTest() {
+ Regtest<T> regtest;
+ TEST(TestPrintf);
+ TEST(TestRandomNumbers);
+ TEST(TestRandomFile);
+ TEST(TestFirstByte);
+ TEST(TestModifyMutator);
+ TEST(TestAddMutator);
+ TEST(TestDeleteMutator);
+ TEST(TestCopyMutator);
+ TEST(TestMoveMutator);
+ TEST(TestOverwriteMutator);
+}
+
+// These are Xdelta tests.
+template <class T>
+void MainTest() {
+ XPR(NT "Blocksize %" Q "u windowsize %" Z "u\n",
+ T::BLOCK_SIZE, T::WINDOW_SIZE);
+ Regtest<T> regtest;
+ TEST(TestEmptyInMemory);
+ TEST(TestBlockInMemory);
+ TEST(TestSmallStride);
+ TEST(TestCopyWindow);
+ TEST(TestCopyFromEnd);
+ TEST(TestNonBlocking);
+ TEST(TestHalfBlockCopy);
+ TEST(TestLastFrontierBlock);
+ TEST(TestMergeCommand1);
+ TEST(TestMergeCommand2);
+}
+
+#undef TEST
+
+int main(int argc, char **argv)
+{
+ vector<const char*> mcmd;
+ string pn;
+ const char *sp = strrchr(argv[0], '/');
+ if (sp != NULL) {
+ pn.append(argv[0], sp - argv[0] + 1);
+ }
+ pn.append("xdelta3");
+ mcmd.push_back(pn.c_str());
+ mcmd.push_back("test");
+ mcmd.push_back(NULL);
+
+ UnitTest<SmallBlock>();
+ MainTest<SmallBlock>();
+ MainTest<MixedBlock>();
+ MainTest<OversizeBlock>();
+ MainTest<LargeBlock>();
+
+ CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1,
+ const_cast<char**>(&mcmd[0])));
+
+ return 0;
+}
+
diff --git a/testing/regtest_c.c b/testing/regtest_c.c
new file mode 100644
index 0000000..e2845ee
--- /dev/null
+++ b/testing/regtest_c.c
@@ -0,0 +1,2 @@
+/* -*- Mode: C++ -*- */
+#include "../xdelta3.c"
diff --git a/testing/run_release.sh b/testing/run_release.sh
new file mode 100755
index 0000000..85ed1f7
--- /dev/null
+++ b/testing/run_release.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+(cd .. && ./run_release.sh)
diff --git a/testing/segment.h b/testing/segment.h
new file mode 100644
index 0000000..ea3dcee
--- /dev/null
+++ b/testing/segment.h
@@ -0,0 +1,98 @@
+// -*- Mode: C++ -*-
+
+class Segment {
+ public:
+ Segment(size_t size, MTRandom *rand)
+ : size_(size),
+ seed_(rand->Rand32()),
+ seed_offset_(0),
+ data_(NULL) {
+ CHECK_GT(size_, 0);
+ }
+
+ Segment(size_t size, uint32_t seed)
+ : size_(size),
+ seed_(seed),
+ seed_offset_(0),
+ data_(NULL) {
+ CHECK_GT(size_, 0);
+ }
+
+ Segment(size_t size, uint8_t *data)
+ : size_(size),
+ seed_(0),
+ seed_offset_(0),
+ data_(data) {
+ CHECK_GT(size_, 0);
+ }
+
+ size_t Size() const {
+ return size_;
+ }
+
+ Segment Subseg(size_t start, size_t size) const {
+ CHECK_LE(start + size, size_);
+ if (data_) {
+ return Segment(size, data_ + start);
+ } else {
+ return Segment(size, seed_, seed_offset_ + start);
+ }
+ }
+
+ void Fill(size_t seg_offset, size_t size, uint8_t *data) const {
+ CHECK_LE(seg_offset + size, size_);
+ if (data_) {
+ memcpy(data, data_ + seg_offset, size);
+ } else {
+ size_t skip = seg_offset + seed_offset_;
+ MTRandom gen(seed_);
+ MTRandom8 gen8(&gen);
+ while (skip--) {
+ gen8.Rand8();
+ }
+ for (size_t i = 0; i < size; i++) {
+ data[i] = gen8.Rand8();
+ }
+ }
+ }
+
+ string ToString() const {
+ string r;
+ if (data_) {
+ for (size_t i = 0; i < size_; i++) {
+ char buf[10];
+ sprintf(buf, "%02x ", data_[i]);
+ r.append(buf);
+ }
+ } else {
+ char buf[256];
+ sprintf(buf, "size=%ld,seed=%ud,skip=%ld", size_, seed_, seed_offset_);
+ r.append(buf);
+ }
+ return r;
+ }
+
+private:
+ // Used by Subseg()
+ Segment(size_t size, uint32_t seed, size_t seed_offset)
+ : size_(size),
+ seed_(seed),
+ seed_offset_(seed_offset),
+ data_(NULL) {
+ CHECK_GT(size_, 0);
+ }
+
+ size_t size_; // Size of this segment
+
+ // For random segments
+ uint32_t seed_; // Seed used for generating byte sequence
+ size_t seed_offset_; // Seed positions the sequence this many bytes
+ // before its beginning.
+
+ // For literal segments (data is not owned)
+ uint8_t *data_;
+};
+
+typedef map<xoff_t, Segment> SegmentMap;
+typedef typename SegmentMap::const_iterator ConstSegmentMapIterator;
+typedef typename SegmentMap::iterator SegmentMapIterator;
diff --git a/testing/sizes.h b/testing/sizes.h
new file mode 100644
index 0000000..223d359
--- /dev/null
+++ b/testing/sizes.h
@@ -0,0 +1,111 @@
+// -*- Mode: C++ -*-
+template <typename T, typename U>
+class SizeIterator {
+ public:
+ SizeIterator(MTRandom *rand, size_t howmany)
+ : rand_(rand),
+ count_(0),
+ fixed_(U::sizes),
+ fixed_size_(SIZEOF_ARRAY(U::sizes)),
+ howmany_(howmany) { }
+
+ T Get() {
+ if (count_ < fixed_size_) {
+ return fixed_[count_];
+ }
+ return rand_->Rand<T>() % U::max_value;
+ }
+
+ bool Done() {
+ return count_ >= fixed_size_ && count_ >= howmany_;
+ }
+
+ void Next() {
+ count_++;
+ }
+
+ private:
+ MTRandom *rand_;
+ size_t count_;
+ T* fixed_;
+ size_t fixed_size_;
+ size_t howmany_;
+};
+
+// Small sizes
+class SmallSizes {
+public:
+ static size_t sizes[];
+ static size_t max_value;
+};
+
+size_t SmallSizes::sizes[] = {
+ 0, 1, 128 / 4, 3333,
+ 128 - (128 / 3),
+ 128,
+ 128 + (128 / 3),
+ 2 * 128 - (128 / 3),
+ 2 * 128,
+ 2 * 128 + (128 / 3),
+};
+
+size_t SmallSizes::max_value = 128 * 3;
+
+// Large sizes
+class LargeSizes {
+public:
+ static size_t sizes[];
+ static size_t max_value;
+};
+
+size_t LargeSizes::sizes[] = {
+ 1 << 20,
+ 1 << 18,
+ 1 << 16,
+};
+
+size_t LargeSizes::max_value = 1<<20;
+
+// Base constants
+struct BaseConstants {
+ static const size_t TEST_ROUNDS;
+};
+
+const size_t BaseConstants::TEST_ROUNDS = 10;
+
+// Regtest<> arguments
+struct SmallBlock : public BaseConstants {
+ static const xoff_t BLOCK_SIZE;
+ static const size_t WINDOW_SIZE;
+ typedef SmallSizes Sizes;
+};
+
+const xoff_t SmallBlock::BLOCK_SIZE = 1<<7;
+const size_t SmallBlock::WINDOW_SIZE = 1<<7;
+
+struct LargeBlock : public BaseConstants {
+ static const xoff_t BLOCK_SIZE;
+ static const size_t WINDOW_SIZE;
+ typedef LargeSizes Sizes;
+};
+
+const xoff_t LargeBlock::BLOCK_SIZE = (1 << 13);
+const size_t LargeBlock::WINDOW_SIZE = (1 << 13);
+
+struct MixedBlock : public BaseConstants {
+ static const xoff_t BLOCK_SIZE;
+ static const size_t WINDOW_SIZE;
+ typedef SmallSizes Sizes;
+};
+
+const xoff_t MixedBlock::BLOCK_SIZE = 1<<7;
+const size_t MixedBlock::WINDOW_SIZE = 1<<8;
+
+struct OversizeBlock : public BaseConstants {
+ static const xoff_t BLOCK_SIZE;
+ static const size_t WINDOW_SIZE;
+ typedef SmallSizes Sizes;
+};
+
+const xoff_t OversizeBlock::BLOCK_SIZE = 1<<8;
+const size_t OversizeBlock::WINDOW_SIZE = 1<<7;
diff --git a/testing/test.h b/testing/test.h
new file mode 100644
index 0000000..7de24fb
--- /dev/null
+++ b/testing/test.h
@@ -0,0 +1,70 @@
+// -*- Mode: C++ -*-
+
+extern "C" {
+#include "../xdelta3.h"
+#include "../xdelta3-internal.h"
+}
+
+#include <unistd.h>
+#include <math.h>
+#include <string>
+
+#define CHECK_EQ(x,y) CHECK_OP(x,y,==)
+#define CHECK_NE(x,y) CHECK_OP(x,y,!=)
+#define CHECK_LT(x,y) CHECK_OP(x,y,<)
+#define CHECK_GT(x,y) CHECK_OP(x,y,>)
+#define CHECK_LE(x,y) CHECK_OP(x,y,<=)
+#define CHECK_GE(x,y) CHECK_OP(x,y,>=)
+
+#define CHECK_OP(x,y,OP) \
+ do { \
+ __typeof__(x) _x(x); \
+ __typeof__(x) _y(y); \
+ if (!(_x OP _y)) { \
+ cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \
+ cerr << __FILE__ << ":" << __LINE__ << " {0} " << _x << endl; \
+ cerr << __FILE__ << ":" << __LINE__ << " {1} " << _y << endl; \
+ abort(); \
+ } } while (false)
+#undef CHECK
+#define CHECK(x) \
+ do {if (!(x)) { \
+ cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x << endl; \
+ abort(); \
+ } } while (false)
+
+#define DCHECK(x)
+
+using std::string;
+
+#include <vector>
+using std::vector;
+
+inline string CommandToString(const vector<const char*> &v) {
+ string s(v[0]);
+ for (size_t i = 1; i < v.size() && v[i] != NULL; i++) {
+ s.append(" ");
+ s.append(v[i]);
+ }
+ return s;
+}
+
+#include <iostream>
+using std::cerr;
+using std::endl;
+using std::ostream;
+
+#include <map>
+using std::map;
+using std::pair;
+
+#include <list>
+using std::list;
+
+template <typename T, typename U>
+pair<T, U> make_pair(const T& t, const U& u) {
+ return pair<T, U>(t, u);
+}
+
+using std::min;
+using std::max;
diff --git a/testing/xdelta3-regtest.py b/testing/xdelta3-regtest.py
new file mode 100755
index 0000000..cb07198
--- /dev/null
+++ b/testing/xdelta3-regtest.py
@@ -0,0 +1,1273 @@
+#!/usr/bin/python2.6
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2003, 2006, 2007, 2008. Joshua P. MacDonald
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+# TODO: test 1.5 vs. greedy
+
+import os, sys, math, re, time, types, array, random
+import xdelta3
+
+#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
+#RCSDIR = '/tmp/PRCS_read_copy'
+#SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
+
+#RCSDIR = 'G:/jmacd/PRCS_copy'
+#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
+
+RCSDIR = '/Users/jmacd/src/ftp.kernel.org'
+SAMPLEDIR = '/Users/jmacd/src/xdelta3/linux'
+
+#
+MIN_SIZE = 0
+
+TIME_TOO_SHORT = 0.050
+
+SKIP_TRIALS = 2
+MIN_TRIALS = 3
+MAX_TRIALS = 15
+
+# 10 = fast 1.5 = slow
+MIN_STDDEV_PCT = 1.5
+
+# How many results per round
+MAX_RESULTS = 500
+TEST_ROUNDS = 10
+KEEP_P = (0.5)
+
+# For RCS testing, what percent to select
+FILE_P = (0.50)
+
+# For run-speed tests
+MIN_RUN = 1000 * 1000 * 1
+MAX_RUN = 1000 * 1000 * 10
+
+# Testwide defaults
+ALL_ARGS = [
+ '-q' # '-vv'
+ ]
+
+# The first 7 args go to -C
+SOFT_CONFIG_CNT = 7
+
+CONFIG_ORDER = [ 'large_look',
+ 'large_step',
+ 'small_look',
+ 'small_chain',
+ 'small_lchain',
+ 'max_lazy',
+ 'long_enough',
+
+ # > SOFT_CONFIG_CNT
+ 'nocompress',
+ 'winsize',
+ 'srcwinsize',
+ 'sprevsz',
+ 'iopt',
+ 'djw',
+ 'altcode',
+ ]
+
+CONFIG_ARGMAP = {
+ 'winsize' : '-W',
+ 'srcwinsize' : '-B',
+ 'sprevsz' : '-P',
+ 'iopt' : '-I',
+ 'nocompress' : '-N',
+ 'djw' : '-Sdjw',
+ 'altcode' : '-T',
+ }
+
+def INPUT_SPEC(rand):
+ return {
+
+ # Time/space costs:
+
+ # -C 1,2,3,4,5,6,7
+ 'large_look' : lambda d: rand.choice([9, 10, 11, 12]),
+ 'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]),
+ 'small_look' : lambda d: rand.choice([4]),
+ 'small_chain' : lambda d: rand.choice([1]),
+ 'small_lchain' : lambda d: rand.choice([1]),
+ 'max_lazy' : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]),
+
+ # Note: long_enough only refers to small matching and has no effect if
+ # small_chain == 1.
+ 'long_enough' : lambda d: rand.choice([4]),
+
+ # -N
+ 'nocompress' : lambda d: rand.choice(['false']),
+
+ # -T
+ 'altcode' : lambda d: rand.choice(['false']),
+
+ # -S djw
+ 'djw' : lambda d: rand.choice(['false']),
+
+ # Memory costs:
+
+ # -W
+ 'winsize' : lambda d: 8 * (1<<20),
+
+ # -B
+ 'srcwinsize' : lambda d: 64 * (1<<20),
+
+ # -I 0 is unlimited
+ 'iopt' : lambda d: 0,
+
+ # -P only powers of two
+ 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]),
+ }
+#end
+
+#
+TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
+
+RUNFILE = os.path.join(TMPDIR, 'run')
+DFILE = os.path.join(TMPDIR, 'output')
+RFILE = os.path.join(TMPDIR, 'recon')
+CMPTMP1 = os.path.join(TMPDIR, 'cmptmp1')
+CMPTMP2 = os.path.join(TMPDIR, 'cmptmp2')
+
+HEAD_STATE = 0
+BAR_STATE = 1
+REV_STATE = 2
+DATE_STATE = 3
+
+#
+IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*')
+
+# rcs output
+RE_TOTREV = re.compile('total revisions: (\\d+)')
+RE_BAR = re.compile('----------------------------')
+RE_REV = re.compile('revision (.+)')
+RE_DATE = re.compile('date: ([^;]+);.*')
+# xdelta output
+RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)')
+RE_EXTCOMP = re.compile('XDELTA ext comp.*')
+
+def c2str(c):
+ return ' '.join(['%s' % x for x in c])
+#end
+
+def SumList(l):
+ return reduce(lambda x,y: x+y, l)
+#end
+
+# returns (total, mean, stddev, q2 (median),
+# (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
+class StatList:
+ def __init__(self,l,desc):
+ cnt = len(l)
+ assert(cnt > 1)
+ l.sort()
+ self.cnt = cnt
+ self.l = l
+ self.total = SumList(l)
+ self.mean = self.total / float(self.cnt)
+ self.s = math.sqrt(SumList([(x-self.mean) *
+ (x - self.mean) for x in l]) /
+ float(self.cnt-1))
+ self.q0 = l[0]
+ self.q1 = l[int(self.cnt/4.0+0.5)]
+ self.q2 = l[int(self.cnt/2.0+0.5)]
+ self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
+ self.q4 = l[self.cnt-1]
+ self.siqr = (self.q3-self.q1)/2.0;
+ self.spread = (self.q4-self.q0)
+ if len(l) == 1:
+ self.str = '%s %s' % (desc, l[0])
+ else:
+ self.str = '%s mean %.1f: 25%-ile %d %d %d %d %d' % \
+ (desc, self.mean, self.q0, self.q1, self.q2, self.q3, self.q4)
+ #end
+#end
+
+def RunCommand(args, ok = [0]):
+ #print 'run command %s' % (' '.join(args))
+ p = os.spawnvp(os.P_WAIT, args[0], args)
+ if p not in ok:
+ raise CommandError(args, 'exited %d' % p)
+ #end
+#end
+
+def RunCommandIO(args,infn,outfn):
+ p = os.fork()
+ if p == 0:
+ os.dup2(os.open(infn,os.O_RDONLY),0)
+ os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
+ os.execvp(args[0], args)
+ else:
+ s = os.waitpid(p,0)
+ o = os.WEXITSTATUS(s[1])
+ if not os.WIFEXITED(s[1]) or o != 0:
+ raise CommandError(args, 'exited %d' % o)
+ #end
+ #end
+#end
+
+class TimedTest:
+ def __init__(self, target, source, runnable,
+ skip_trials = SKIP_TRIALS,
+ min_trials = MIN_TRIALS,
+ max_trials = MAX_TRIALS,
+ min_stddev_pct = MIN_STDDEV_PCT):
+ self.target = target
+ self.source = source
+ self.runnable = runnable
+
+ self.skip_trials = skip_trials
+ self.min_trials = min(min_trials, max_trials)
+ self.max_trials = max_trials
+ self.min_stddev_pct = min_stddev_pct
+
+ self.encode_time = self.DoTest(DFILE,
+ lambda x: x.Encode(self.target,
+ self.source, DFILE))
+ self.encode_size = runnable.EncodeSize(DFILE)
+
+ self.decode_time = self.DoTest(RFILE,
+ lambda x: x.Decode(DFILE,
+ self.source, RFILE),
+ )
+ runnable.Verify(self.target, RFILE)
+ #end
+
+ def DoTest(self, fname, func):
+ trials = 0
+ measured = []
+
+ while 1:
+ try:
+ os.remove(fname)
+ except OSError:
+ pass
+
+ start_time = time.time()
+ start_clock = time.clock()
+
+ func(self.runnable)
+
+ total_clock = (time.clock() - start_clock)
+ total_time = (time.time() - start_time)
+
+ elap_time = max(total_time, 0.0000001)
+ elap_clock = max(total_clock, 0.0000001)
+
+ trials = trials + 1
+
+ # skip some of the first trials
+ if trials > self.skip_trials:
+ measured.append((elap_clock, elap_time))
+ #print 'measurement total: %.1f ms' % (total_time * 1000.0)
+
+ # at least so many
+ if trials < (self.skip_trials + self.min_trials):
+ #print 'continue: need more trials: %d' % trials
+ continue
+
+ # compute %variance
+ done = 0
+ if self.skip_trials + self.min_trials <= 2:
+ measured = measured + measured;
+ done = 1
+ #end
+
+ time_stat = StatList([x[1] for x in measured], 'elap time')
+ sp = float(time_stat.s) / float(time_stat.mean)
+
+ # what if MAX_TRIALS is exceeded?
+ too_many = (trials - self.skip_trials) >= self.max_trials
+ good = (100.0 * sp) < self.min_stddev_pct
+ if done or too_many or good:
+ trials = trials - self.skip_trials
+ if not done and not good:
+ #print 'too many trials: %d' % trials
+ pass
+ #clock = StatList([x[0] for x in measured], 'elap clock')
+ return time_stat
+ #end
+ #end
+ #end
+#end
+
+def Decimals(start, end):
+ l = []
+ step = start
+ while 1:
+ r = range(step, step * 10, step)
+ l = l + r
+ if step * 10 >= end:
+ l.append(step * 10)
+ break
+ step = step * 10
+ return l
+#end
+
+# This tests the raw speed of 0-byte inputs
+def RunSpeedTest():
+ for L in Decimals(MIN_RUN, MAX_RUN):
+ SetFileSize(RUNFILE, L)
+
+ trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
+ ReportSpeed(L, trx, '1MB ')
+
+ trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
+ ReportSpeed(L, trx, '512k')
+
+ trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
+ ReportSpeed(L, trx, '256k')
+
+ trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
+ ReportSpeed(L, trm, 'swig')
+
+ trg = TimedTest(RUNFILE, None, GzipRun1())
+ ReportSpeed(L,trg,'gzip')
+ #end
+#end
+
+def SetFileSize(F,L):
+ fd = os.open(F, os.O_CREAT | os.O_WRONLY)
+ os.ftruncate(fd,L)
+ assert os.fstat(fd).st_size == L
+ os.close(fd)
+#end
+
+def ReportSpeed(L,tr,desc):
+ print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
+ (desc, L,
+ tr.encode_size,
+ tr.encode_time.mean * 1000.0,
+ tr.decode_time.mean * 1000.0)
+#end
+
+class Xdelta3RunClass:
+ def __init__(self, extra):
+ self.extra = extra
+ #end
+
+ def __str__(self):
+ return ' '.join(self.extra)
+ #end
+
+ def New(self):
+ return Xdelta3Runner(self.extra)
+ #end
+#end
+
+class Xdelta3Runner:
+ # Use "forkexec" to get special command-line only features like
+ # external compression support.
+ def __init__(self, extra, forkexec=False):
+ self.forkexec = forkexec
+ self.extra = extra
+ #end
+
+ def Encode(self, target, source, output):
+ args = (ALL_ARGS +
+ self.extra +
+ ['-e'])
+ if source:
+ args.append('-s')
+ args.append(source)
+ #end
+ args = args + [target, output]
+ self.Main(args)
+ #end
+
+ def Decode(self, input, source, output):
+ args = (ALL_ARGS +
+ ['-d'])
+ if source:
+ args.append('-s')
+ args.append(source)
+ #end
+ args = args + [input, output]
+ self.Main(args)
+ #end
+
+ def Verify(self, target, recon):
+ if target[-3:] == ".gz":
+ RunCommandIO(('gzip', '-dc'), target, CMPTMP1)
+ RunCommandIO(('gzip', '-dc'), recon, CMPTMP2)
+ RunCommand(('cmp', CMPTMP1, CMPTMP2))
+ else:
+ RunCommand(('cmp', target, recon))
+ #end
+
+ def EncodeSize(self, output):
+ return os.stat(output).st_size
+ #end
+
+ def Main(self, args):
+ try:
+ if self.forkexec:
+ RunCommand(['../xdelta3'] + args)
+ else:
+ xdelta3.xd3_main_cmdline(args)
+ except Exception, e:
+ raise CommandError(args, "xdelta3.main exception: %s" % e)
+ #end
+ #end
+#end
+
+class Xdelta3Mod1:
+ def __init__(self, file):
+ self.target_data = open(file, 'r').read()
+ #end
+
+ def Encode(self, ignore1, ignore2, ignore3):
+ r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
+ if r1 != 0:
+ raise CommandError('memory', 'encode failed: %s' % r1)
+ #end
+ self.encoded = encoded
+ #end
+
+ def Decode(self, ignore1, ignore2, ignore3):
+ r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
+ if r2 != 0:
+ raise CommandError('memory', 'decode failed: %s' % r1)
+ #end
+ self.decoded = data1
+ #end
+
+ def Verify(self, ignore1, ignore2):
+ if self.target_data != self.decoded:
+ raise CommandError('memory', 'bad decode')
+ #end
+ #end
+
+ def EncodeSize(self, ignore1):
+ return len(self.encoded)
+ #end
+#end
+
+class GzipRun1:
+ def Encode(self, target, source, output):
+ assert source == None
+ RunCommandIO(['gzip', '-cf'], target, output)
+ #end
+
+ def Decode(self, input, source, output):
+ assert source == None
+ RunCommandIO(['gzip', '-dcf'], input, output)
+ #end
+
+ def Verify(self, target, recon):
+ RunCommand(('cmp', target, recon))
+ #end
+
+ def EncodeSize(self, output):
+ return os.stat(output).st_size
+ #end
+#end
+
+class Xdelta1RunClass:
+ def __str__(self):
+ return 'xdelta1'
+ #end
+
+ def New(self):
+ return Xdelta1Runner()
+ #end
+#end
+
+class Xdelta1Runner:
+ def Encode(self, target, source, output):
+ assert source != None
+ args = ['xdelta1', 'delta', '-q', source, target, output]
+ RunCommand(args, [0, 1])
+ #end
+
+ def Decode(self, input, source, output):
+ assert source != None
+ args = ['xdelta1', 'patch', '-q', input, source, output]
+ # Note: for dumb historical reasons, xdelta1 returns 1 or 0
+ RunCommand(args)
+ #end
+
+ def Verify(self, target, recon):
+ RunCommand(('cmp', target, recon))
+ #end
+
+ def EncodeSize(self, output):
+ return os.stat(output).st_size
+ #end
+#end
+
+# exceptions
+class SkipRcsException:
+ def __init__(self,reason):
+ self.reason = reason
+ #end
+#end
+
+class NotEnoughVersions:
+ def __init__(self):
+ pass
+ #end
+#end
+
+class CommandError:
+ def __init__(self,cmd,str):
+ if type(cmd) is types.TupleType or \
+ type(cmd) is types.ListType:
+ cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
+ #end
+ print 'command was: ',cmd
+ print 'command failed: ',str
+ print 'have fun debugging'
+ #end
+#end
+
+class RcsVersion:
+ def __init__(self,vstr):
+ self.vstr = vstr
+ #end
+ def __cmp__(self,other):
+ return cmp(self.date, other.date)
+ #end
+ def __str__(self):
+ return str(self.vstr)
+ #end
+#end
+
+class RcsFile:
+
+ def __init__(self, fname):
+ self.fname = fname
+ self.versions = []
+ self.state = HEAD_STATE
+ #end
+
+ def SetTotRev(self,s):
+ self.totrev = int(s)
+ #end
+
+ def Rev(self,s):
+ self.rev = RcsVersion(s)
+ if len(self.versions) >= self.totrev:
+ raise SkipRcsException('too many versions (in log messages)')
+ #end
+ self.versions.append(self.rev)
+ #end
+
+ def Date(self,s):
+ self.rev.date = s
+ #end
+
+ def Match(self, line, state, rx, gp, newstate, f):
+ if state == self.state:
+ m = rx.match(line)
+ if m:
+ if f:
+ f(m.group(gp))
+ #end
+ self.state = newstate
+ return 1
+ #end
+ #end
+ return None
+ #end
+
+ def Sum1Rlog(self):
+ f = os.popen('rlog '+self.fname, "r")
+ l = f.readline()
+ while l:
+ if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
+ pass
+ elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
+ pass
+ elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
+ pass
+ elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
+ pass
+ #end
+ l = f.readline()
+ #end
+ c = f.close()
+ if c != None:
+ raise c
+ #end
+ #end
+
+ def Sum1(self):
+ st = os.stat(self.fname)
+ self.rcssize = st.st_size
+ self.Sum1Rlog()
+ if self.totrev != len(self.versions):
+ raise SkipRcsException('wrong version count')
+ #end
+ self.versions.sort()
+ #end
+
+ def Checkout(self,n):
+ v = self.versions[n]
+ out = open(self.Verf(n), "w")
+ cmd = 'co -ko -p%s %s' % (v.vstr, self.fname)
+ total = 0
+ (inf,
+ stream,
+ err) = os.popen3(cmd, "r")
+ inf.close()
+ buf = stream.read()
+ while buf:
+ total = total + len(buf)
+ out.write(buf)
+ buf = stream.read()
+ #end
+ v.vsize = total
+ estr = ''
+ buf = err.read()
+ while buf:
+ estr = estr + buf
+ buf = err.read()
+ #end
+ if stream.close():
+ raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
+ #end
+ out.close()
+ err.close()
+ #end
+
+ def Vdate(self,n):
+ return self.versions[n].date
+ #end
+
+ def Vstr(self,n):
+ return self.versions[n].vstr
+ #end
+
+ def Verf(self,n):
+ return os.path.join(TMPDIR, 'input.%d' % n)
+ #end
+
+ def FilePairsByDate(self, runclass):
+ if self.totrev < 2:
+ raise NotEnoughVersions()
+ #end
+ self.Checkout(0)
+ ntrials = []
+ if self.totrev < 2:
+ return vtrials
+ #end
+ for v in range(0,self.totrev-1):
+ if v > 1:
+ os.remove(self.Verf(v-1))
+ #end
+ self.Checkout(v+1)
+ if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
+ os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
+ continue
+ #end
+
+ result = TimedTest(self.Verf(v+1),
+ self.Verf(v),
+ runclass.New())
+
+ target_size = os.stat(self.Verf(v+1)).st_size
+
+ ntrials.append(result)
+ #end
+
+ os.remove(self.Verf(self.totrev-1))
+ os.remove(self.Verf(self.totrev-2))
+ return ntrials
+ #end
+
+ def AppendVersion(self, f, n):
+ self.Checkout(n)
+ rf = open(self.Verf(n), "r")
+ data = rf.read()
+ f.write(data)
+ rf.close()
+ return len(data)
+ #end
+
+class RcsFinder:
+ def __init__(self):
+ self.subdirs = []
+ self.rcsfiles = []
+ self.others = []
+ self.skipped = []
+ self.biground = 0
+ #end
+
+ def Scan1(self,dir):
+ dents = os.listdir(dir)
+ subdirs = []
+ rcsfiles = []
+ others = []
+ for dent in dents:
+ full = os.path.join(dir, dent)
+ if os.path.isdir(full):
+ subdirs.append(full)
+ elif dent[len(dent)-2:] == ",v":
+ rcsfiles.append(RcsFile(full))
+ else:
+ others.append(full)
+ #end
+ #end
+ self.subdirs = self.subdirs + subdirs
+ self.rcsfiles = self.rcsfiles + rcsfiles
+ self.others = self.others + others
+ return subdirs
+ #end
+
+ def Crawl(self, dir):
+ subdirs = [dir]
+ while subdirs:
+ s1 = self.Scan1(subdirs[0])
+ subdirs = subdirs[1:] + s1
+ #end
+ #end
+
+ def Summarize(self):
+ good = []
+ for rf in self.rcsfiles:
+ try:
+ rf.Sum1()
+ if rf.totrev < 2:
+ raise SkipRcsException('too few versions (< 2)')
+ #end
+ except SkipRcsException, e:
+ #print 'skipping file %s: %s' % (rf.fname, e.reason)
+ self.skipped.append(rf)
+ else:
+ good.append(rf)
+ #end
+ self.rcsfiles = good
+ #end
+
+ def AllPairsByDate(self, runclass):
+ results = []
+ good = []
+ for rf in self.rcsfiles:
+ try:
+ results = results + rf.FilePairsByDate(runclass)
+ except SkipRcsException:
+ print 'file %s has compressed versions: skipping' % (rf.fname)
+ except NotEnoughVersions:
+ print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
+ else:
+ good.append(rf)
+ #end
+ self.rcsfiles = good
+ self.ReportPairs(runclass, results)
+ return results
+ #end
+
+ def ReportPairs(self, name, results):
+ encode_time = 0
+ decode_time = 0
+ encode_size = 0
+ for r in results:
+ encode_time += r.encode_time.mean
+ decode_time += r.decode_time.mean
+ encode_size += r.encode_size
+ #end
+ print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
+ (name, encode_time, decode_time, encode_size)
+ #end
+
+ def MakeBigFiles(self, rand):
+ f1 = open(TMPDIR + "/big.1", "w")
+ f2 = open(TMPDIR + "/big.2", "w")
+ population = []
+ for file in self.rcsfiles:
+ if len(file.versions) < 2:
+ continue
+ population.append(file)
+ #end
+ f1sz = 0
+ f2sz = 0
+ fcount = int(len(population) * FILE_P)
+ assert fcount > 0
+ for file in rand.sample(population, fcount):
+ m = IGNORE_FILENAME.match(file.fname)
+ if m != None:
+ continue
+ #end
+ r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
+ f1sz += file.AppendVersion(f1, r1)
+ f2sz += file.AppendVersion(f2, r2)
+ #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):],
+ #file.Vstr(r1), file.Vstr(r2)))
+ #end
+ testkey = 'rcs%d' % self.biground
+ self.biground = self.biground + 1
+
+ print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
+ f1.close()
+ f2.close()
+ return (TMPDIR + "/big.1",
+ TMPDIR + "/big.2",
+ testkey)
+ #end
+
+ def Generator(self):
+ return lambda rand: self.MakeBigFiles(rand)
+ #end
+#end
+
+# find a set of RCS files for testing
+def GetTestRcsFiles():
+ rcsf = RcsFinder()
+ rcsf.Crawl(RCSDIR)
+ if len(rcsf.rcsfiles) == 0:
+ raise CommandError('', 'no RCS files')
+ #end
+ rcsf.Summarize()
+ print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (
+ len(rcsf.rcsfiles),
+ len(rcsf.subdirs),
+ len(rcsf.others),
+ len(rcsf.skipped))
+ print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
+ print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
+ return rcsf
+#end
+
+class SampleDataTest:
+ def __init__(self, dirs):
+ dirs_in = dirs
+ self.pairs = []
+ while dirs:
+ d = dirs[0]
+ dirs = dirs[1:]
+ l = os.listdir(d)
+ files = []
+ for e in l:
+ p = os.path.join(d, e)
+ if os.path.isdir(p):
+ dirs.append(p)
+ else:
+ files.append(p)
+ #end
+ #end
+ if len(files) > 1:
+ files.sort()
+ for x in xrange(len(files)):
+ for y in xrange(len(files)):
+ self.pairs.append((files[x], files[y],
+ '%s-%s' % (files[x], files[y])))
+ #end
+ #end
+ #end
+ #end
+ print "Sample data test using %d file pairs in %s" % (
+ len(self.pairs), dirs_in)
+ #end
+
+ def Generator(self):
+ return lambda rand: rand.choice(self.pairs)
+ #end
+#end
+
+# configs are represented as a list of values,
+# program takes a list of strings:
+def ConfigToArgs(config):
+ args = [ '-C',
+ ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
+ for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
+ key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
+ val = config[i]
+ if val == 'true' or val == 'false':
+ if val == 'true':
+ args.append('%s' % key)
+ #end
+ else:
+ args.append('%s=%s' % (key, val))
+ #end
+ #end
+ return args
+#end
+
+#
+class RandomTest:
+ def __init__(self, tnum, tinput, config, syntuple = None):
+ self.mytinput = tinput[2]
+ self.myconfig = config
+ self.tnum = tnum
+
+ if syntuple != None:
+ self.runtime = syntuple[0]
+ self.compsize = syntuple[1]
+ self.decodetime = None
+ else:
+ args = ConfigToArgs(config)
+ result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
+
+ self.runtime = result.encode_time.mean
+ self.compsize = result.encode_size
+ self.decodetime = result.decode_time.mean
+ #end
+
+ self.score = None
+ self.time_pos = None
+ self.size_pos = None
+ self.score_pos = None
+ #end
+
+ def __str__(self):
+ decodestr = ' %s' % self.decodetime
+ return 'time %.6f%s size %d%s << %s >>%s' % (
+ self.time(), ((self.time_pos != None) and
+ (" (%s)" % self.time_pos) or ""),
+ self.size(), ((self.size_pos != None) and
+ (" (%s)" % self.size_pos) or ""),
+ c2str(self.config()),
+ decodestr)
+ #end
+
+ def time(self):
+ return self.runtime
+ #end
+
+ def size(self):
+ return self.compsize
+ #end
+
+ def config(self):
+ return self.myconfig
+ #end
+
+ def score(self):
+ return self.score
+ #end
+
+ def tinput(self):
+ return self.mytinput
+ #end
+#end
+
+def PosInAlist(l, e):
+ for i in range(0, len(l)):
+ if l[i][1] == e:
+ return i;
+ #end
+ #end
+ return -1
+#end
+
+# Generates a set of num_results test configurations, given the list of
+# retest-configs.
+def RandomTestConfigs(rand, input_configs, num_results):
+
+ outputs = input_configs[:]
+ have_set = dict([(c,c) for c in input_configs])
+
+ # Compute a random configuration
+ def RandomConfig():
+ config = []
+ cmap = {}
+ for key in CONFIG_ORDER:
+ val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
+ config.append(val)
+ #end
+ return tuple(config)
+ #end
+
+ while len(outputs) < num_results:
+ newc = None
+ for i in xrange(100):
+ c = RandomConfig()
+ if have_set.has_key(c):
+ continue
+ #end
+ have_set[c] = c
+ newc = c
+ break
+ if newc is None:
+ print 'stopped looking for configs at %d' % len(outputs)
+ break
+ #end
+ outputs.append(c)
+ #end
+ outputs.sort()
+ return outputs
+#end
+
+def RunOptimizationLoop(rand, generator, rounds):
+ configs = []
+ for rnum in xrange(rounds):
+ configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
+ tinput = generator(rand)
+ tests = []
+ for x in xrange(len(configs)):
+ t = RandomTest(x, tinput, configs[x])
+ print 'Round %d test %d: %s' % (rnum, x, t)
+ tests.append(t)
+ #end
+ results = ScoreTests(tests)
+
+ for r in results:
+ c = r.config()
+ if not test_all_config_results.has_key(c):
+ test_all_config_results[c] = [r]
+ else:
+ test_all_config_results[c].append(r)
+ #end
+ #end
+
+ #GraphResults('expt%d' % rnum, results)
+ #GraphSummary('sum%d' % rnum, results)
+
+ # re-test some fraction
+ configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
+ #end
+#end
+
+# TODO: cleanup
+test_all_config_results = {}
+
+def ScoreTests(results):
+ scored = []
+ timed = []
+ sized = []
+
+ t_min = float(min([test.time() for test in results]))
+ #t_max = float(max([test.time() for test in results]))
+ s_min = float(min([test.size() for test in results]))
+ #s_max = float(max([test.size() for test in results]))
+
+ for test in results:
+
+ # Hyperbolic function. Smaller scores still better
+ red = 0.999 # minimum factors for each dimension are 1/1000
+ test.score = ((test.size() - s_min * red) *
+ (test.time() - t_min * red))
+
+ scored.append((test.score, test))
+ timed.append((test.time(), test))
+ sized.append((test.size(), test))
+ #end
+
+ scored.sort()
+ timed.sort()
+ sized.sort()
+
+ best_by_size = []
+ best_by_time = []
+
+ pos = 0
+ for (score, test) in scored:
+ pos += 1
+ test.score_pos = pos
+ #end
+
+ scored = [x[1] for x in scored]
+
+ for test in scored:
+ test.size_pos = PosInAlist(sized, test)
+ test.time_pos = PosInAlist(timed, test)
+ #end
+
+ for test in scored:
+ c = test.config()
+ s = 0.0
+ print 'H-Score: %0.9f %s' % (test.score, test)
+ #end
+
+ return scored
+#end
+
+def GraphResults(desc, results):
+ f = open("data-%s.csv" % desc, "w")
+ for r in results:
+ f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
+ #end
+ f.close()
+ os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
+#end
+
+def GraphSummary(desc, results_ignore):
+ test_population = 0
+ config_ordered = []
+
+ # drops duplicate test/config pairs (TODO: don't retest them)
+ for config, cresults in test_all_config_results.items():
+ input_config_map = {}
+ uniq = []
+ for test in cresults:
+ assert test.config() == config
+ test_population += 1
+ key = test.tinput()
+ if not input_config_map.has_key(key):
+ input_config_map[key] = {}
+ #end
+ if input_config_map[key].has_key(config):
+ print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
+ continue
+ #end
+ input_config_map[key][config] = test
+ uniq.append(test)
+ #end
+ config_ordered.append(uniq)
+ #end
+
+ # sort configs descending by number of tests
+ config_ordered.sort(lambda x, y: len(y) - len(x))
+
+ print 'population %d: %d configs %d results' % \
+ (test_population,
+ len(config_ordered),
+ len(config_ordered[0]))
+
+ if config_ordered[0] == 1:
+ return
+ #end
+
+ # a map from test-key to test-list w/ various configs
+ input_set = {}
+ osize = len(config_ordered)
+
+ for i in xrange(len(config_ordered)):
+ config = config_ordered[i][0].config()
+ config_tests = config_ordered[i]
+
+ #print '%s has %d tested inputs' % (config, len(config_tests))
+
+ if len(input_set) == 0:
+ input_set = dict([(t.tinput(), [t]) for t in config_tests])
+ continue
+ #end
+
+ # a map from test-key to test-list w/ various configs
+ update_set = {}
+ for r in config_tests:
+ t = r.tinput()
+ if input_set.has_key(t):
+ update_set[t] = input_set[t] + [r]
+ else:
+ #print 'config %s does not have test %s' % (config, t)
+ pass
+ #end
+ #end
+
+ if len(update_set) <= 1:
+ break
+ #end
+
+ input_set = update_set
+
+ # continue if there are more w/ the same number of inputs
+ if i < (len(config_ordered) - 1) and \
+ len(config_ordered[i + 1]) == len(config_tests):
+ continue
+ #end
+
+ # synthesize results for multi-test inputs
+ config_num = None
+
+ # map of config to sum(various test-keys)
+ smap = {}
+ for (key, tests) in input_set.items():
+ if config_num == None:
+ # config_num should be the same in all elements
+ config_num = len(tests)
+ smap = dict([(r.config(),
+ (r.time(),
+ r.size()))
+ for r in tests])
+ else:
+ # compuate the per-config sum of time/size
+ assert config_num == len(tests)
+ smap = dict([(r.config(),
+ (smap[r.config()][0] + r.time(),
+ smap[r.config()][1] + r.size()))
+ for r in tests])
+ #end
+ #end
+
+ if config_num == 1:
+ continue
+ #end
+
+ if len(input_set) == osize:
+ break
+ #end
+
+ summary = '%s-%d' % (desc, len(input_set))
+ osize = len(input_set)
+
+ print 'generate %s w/ %d configs' % (summary, config_num)
+ syn = [RandomTest(0, (None, None, summary), config,
+ syntuple = (smap[config][0], smap[config][1]))
+ for config in smap.keys()]
+ syn = ScoreTests(syn)
+ #print 'smap is %s' % (smap,)
+ #print 'syn is %s' % (' and '.join([str(x) for x in syn]))
+ #GraphResults(summary, syn)
+ #end
+#end
+
+def RunRegressionTest(pairs, rounds):
+ for args in [
+ [],
+ ['-S=djw'],
+ ['-B=412907520'],
+ ['-B 412907520', ],
+
+ ]:
+ print "Args %s" % (args)
+ for (file1, file2, testkey) in pairs:
+ ttest = TimedTest(file1, file2, Xdelta3Runner(args, forkexec=True),
+ skip_trials = 0,
+ min_trials = 1,
+ max_trials = 1)
+ print "Source %s\nTarget %s\nEncode %s\nDecode %s\nSize %s\n\n" % (
+ file1, file2,
+ ttest.encode_time.str,
+ ttest.decode_time.str,
+ ttest.encode_size)
+ #end
+#end
+
+if __name__ == "__main__":
+ try:
+ RunCommand(['rm', '-rf', TMPDIR])
+ os.mkdir(TMPDIR)
+
+ #rcsf = GetTestRcsFiles()
+ #generator = rcsf.Generator()
+
+ sample = SampleDataTest([SAMPLEDIR])
+ generator = sample.Generator()
+
+ rand = random.Random(135135135135135)
+
+ RunRegressionTest(sample.pairs, TEST_ROUNDS)
+
+ #RunSpeedTest()
+
+ # the idea below is to add the default configurations and
+ # xdelta1 to the optimization loop:
+ #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-3', '-6']))
+ #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
+ #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
+ #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw']))
+ #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
+ #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
+
+ except CommandError:
+ pass
+ else:
+ RunCommand(['rm', '-rf', TMPDIR])
+ pass
+ #end
+#end
diff --git a/testing/xdelta3-test.py b/testing/xdelta3-test.py
new file mode 100755
index 0000000..5f1a263
--- /dev/null
+++ b/testing/xdelta3-test.py
@@ -0,0 +1,155 @@
+#!/usr/bin/python2.7
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import xdelta3
+
+# the test data section is expected to be len('target')
+source = 'source source input0 source source'
+target = 'source source target source source'
+
+#
+#
+
+print 'encode: basic ...'
+result, patch = xdelta3.xd3_encode_memory(target, source, 50)
+
+assert result == 0
+assert len(patch) < len(source)
+
+print 'encode: adler32 ...'
+result, patch_adler32 = xdelta3.xd3_encode_memory(target, source, 50,
+ xdelta3.XD3_ADLER32)
+
+assert result == 0
+assert len(patch_adler32) < len(source)
+assert len(patch_adler32) > len(patch)
+
+print 'encode: secondary ...'
+result, patch_djw = xdelta3.xd3_encode_memory(target, source, 50,
+ xdelta3.XD3_SEC_DJW)
+
+assert result == 0
+# secondary compression doesn't help
+assert len(patch_djw) > len(patch)
+
+print 'encode: exact ...'
+result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch))
+
+assert result == 0
+assert len(ignore) < len(source)
+
+print 'encode: out of space ...'
+result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch) - 1)
+
+assert result == 28
+assert ignore == None
+
+print 'encode: zero space ...'
+result, ignore = xdelta3.xd3_encode_memory(target, source, 0)
+
+assert result == 28
+assert ignore == None
+
+print 'encode: no source ...'
+result, zdata = xdelta3.xd3_encode_memory(target, None, 50)
+
+assert result == 0
+assert len(zdata) > len(patch)
+
+print 'encode: no input ...'
+result, ignore = xdelta3.xd3_encode_memory(None, None, 50)
+
+assert result != 0
+
+print 'decode: basic ...'
+result, target1 = xdelta3.xd3_decode_memory(patch, source, len(target))
+
+assert result == 0
+assert len(target1) == len(target)
+assert target1 == target
+
+print 'decode: out of space ...'
+result, ignore = xdelta3.xd3_decode_memory(patch, source, len(target) - 1)
+
+assert result == 28
+assert ignore == None
+
+print 'decode: zero space ...'
+result, ignore = xdelta3.xd3_decode_memory(patch, source, 0)
+
+assert result == 28
+assert ignore == None
+
+print 'decode: single byte error ...'
+# a few expected single-byte errors, e.g., unused address cache bits, see
+# xdelta3-test.h's single-bit error tests
+extra_count = 4
+noverify_count = 0
+for corrupt_pos in range(len(patch_adler32)):
+ input = ''.join([j == corrupt_pos and '\xff' or patch_adler32[j]
+ for j in range(len(patch_adler32))])
+
+ result, ignore = xdelta3.xd3_decode_memory(input, source, len(target), 0)
+ assert result == -17712
+ assert ignore == None
+
+ # without adler32 verification, the error may be in the data section which
+ # in this case is 6 bytes 'target'
+ result, corrupt = xdelta3.xd3_decode_memory(input, source, len(target),
+ xdelta3.XD3_ADLER32_NOVER)
+ if result == 0:
+ noverify_count = noverify_count + 1
+ #print "got %s" % corrupt
+ #end
+#end
+assert noverify_count == len('target') + extra_count
+
+print 'decode: no source ...'
+result, target2 = xdelta3.xd3_decode_memory(zdata, None, len(target))
+
+assert result == 0
+assert target == target2
+
+# Test compression level setting via flags. assumes a 9 byte checksum
+# and that level 9 steps 2, level 1 steps 15:
+# 01234567890123456789012345678901
+# level 1 only indexes 2 checksums "abcdefghi" and "ABCDEFGHI"
+# outputs 43 vs. 23 bytes
+print 'encode: compression level ...'
+
+source = '_la_la_abcdefghi_la_la_ABCDEFGHI'
+target = 'la_la_ABCDEFGH__la_la_abcdefgh__'
+
+result1, level1 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_1)
+result9, level9 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_9)
+
+assert result1 == 0 and result9 == 0
+assert len(level1) > len(level9)
+
+#
+# Issue 65
+print 'encode: 65 ...'
+source = 'Hello World'
+target = 'Hello everyone'
+result, patch = xdelta3.xd3_encode_memory(target, source, len(target))
+assert result != 0
+
+result, patch = xdelta3.xd3_encode_memory(target, source, 2 * len(target))
+assert result == 0
+
+print 'PASS'