diff options
Diffstat (limited to 'testing')
-rw-r--r-- | testing/Makefile | 8 | ||||
-rw-r--r-- | testing/checksum_test.cc | 756 | ||||
-rw-r--r-- | testing/checksum_test_c.c | 174 | ||||
-rw-r--r-- | testing/cmp.h | 53 | ||||
-rw-r--r-- | testing/delta.h | 73 | ||||
-rw-r--r-- | testing/file.h | 385 | ||||
-rw-r--r-- | testing/modify.h | 385 | ||||
-rw-r--r-- | testing/random.h | 143 | ||||
-rw-r--r-- | testing/regtest.cc | 1306 | ||||
-rw-r--r-- | testing/regtest_c.c | 2 | ||||
-rwxr-xr-x | testing/run_release.sh | 2 | ||||
-rw-r--r-- | testing/segment.h | 98 | ||||
-rw-r--r-- | testing/sizes.h | 111 | ||||
-rw-r--r-- | testing/test.h | 70 | ||||
-rwxr-xr-x | testing/xdelta3-regtest.py | 1273 | ||||
-rwxr-xr-x | testing/xdelta3-test.py | 155 |
16 files changed, 4994 insertions, 0 deletions
diff --git a/testing/Makefile b/testing/Makefile new file mode 100644 index 0000000..d0b9c9e --- /dev/null +++ b/testing/Makefile @@ -0,0 +1,8 @@ +all: + (cd .. && make all) + +xdelta3regtest: + (cd .. && make xdelta3regtest) + +xdelta3checksum: + (cd .. && make xdelta3checksum) diff --git a/testing/checksum_test.cc b/testing/checksum_test.cc new file mode 100644 index 0000000..9418d24 --- /dev/null +++ b/testing/checksum_test.cc @@ -0,0 +1,756 @@ +/* Copyright (C) 2007 Josh MacDonald */ + +#include "test.h" +#include <assert.h> +#include <list> +#include <vector> +#include <algorithm> + +#include "../cpp-btree/btree_map.h" + +extern "C" { +uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum, + const uint8_t *base, const usize_t look); + +uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look); +uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum, + const uint8_t *base, const usize_t look); +} + +using btree::btree_map; +using std::list; +using std::vector; + +// MLCG parameters +// a, a* +uint32_t good_32bit_values[] = { + 1597334677U, // ... + 741103597U, 887987685U, +}; + +// a, a* +uint64_t good_64bit_values[] = { + 1181783497276652981ULL, 4292484099903637661ULL, + 7664345821815920749ULL, // ... +}; + +void print_header() { + static int hdr_cnt = 0; + if (hdr_cnt++ % 20 == 0) { + printf("%-32sConf\t\tCount\tUniq\tFull\tCover\tColls" + "\tMB/s\tIters\t#Colls\n", "Name"); + } +} + +struct true_type { }; +struct false_type { }; + +template <typename Word> +usize_t bitsof(); + +template<> +usize_t bitsof<unsigned int>() { + return sizeof(unsigned int) * 8; +} + +template<> +usize_t bitsof<unsigned long>() { + return sizeof(unsigned long) * 8; +} + +template<> +usize_t bitsof<unsigned long long>() { + return sizeof(unsigned long long) * 8; +} + +template <typename Word> +struct hhash { // shift "s" bits leaving the high bits as a hash value for + // this checksum, which are the most "distant" in terms of the + // spectral test for the rabin_karp MLCG. For short windows, + // the high bits aren't enough, XOR "mask" worth of these in. + Word operator()(const Word t, const Word s, const Word mask) { + return (t >> s) ^ (t & mask); + } +}; + +template <typename Word> +Word good_word(); + +template<> +uint32_t good_word<uint32_t>() { + return good_32bit_values[0]; +} + +template<> +uint64_t good_word<uint64_t>() { + return good_64bit_values[0]; +} + +// CLASSES + +#define SELF Word, CksumSize, CksumSkip, Hash, Compaction +#define MEMBER template <typename Word, \ + int CksumSize, \ + int CksumSkip, \ + typename Hash, \ + int Compaction> + +MEMBER +struct cksum_params { + typedef Word word_type; + typedef Hash hash_type; + + static const int cksum_size = CksumSize; + static const int cksum_skip = CksumSkip; + static const int compaction = Compaction; +}; + +MEMBER +struct rabin_karp : public cksum_params<SELF> { + // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ... + rabin_karp() + : powers(make_powers()), + product(powers[0] * good_word<Word>()), + incr_state(0) { } + + static Word* make_powers() { + Word *p = new Word[CksumSize]; + p[CksumSize - 1] = 1; + for (int i = CksumSize - 2; i >= 0; i--) { + p[i] = p[i + 1] * good_word<Word>(); + } + return p; + } + + ~rabin_karp() { + delete [] powers; + } + + Word step(const uint8_t *ptr) { + Word h = 0; + for (int i = 0; i < CksumSize; i++) { + h += (ptr[i]) * powers[i]; + } + return h; + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = good_word<Word>() * incr_state - + product * (ptr[-1]) + (ptr[CksumSize - 1]); + return incr_state; + } + + const Word *const powers; + const Word product; + Word incr_state; +}; + +MEMBER +struct with_stream : public cksum_params<SELF> { + xd3_stream stream; + + with_stream() + { + xd3_config cfg; + memset (&stream, 0, sizeof (stream)); + xd3_init_config (&cfg, 0); + cfg.smatch_cfg = XD3_SMATCH_SOFT; + cfg.smatcher_soft.large_look = CksumSize; + cfg.smatcher_soft.large_step = CksumSkip; + cfg.smatcher_soft.small_look = 4; + cfg.smatcher_soft.small_chain = 4; + cfg.smatcher_soft.small_lchain = 4; + cfg.smatcher_soft.max_lazy = 4; + cfg.smatcher_soft.long_enough = 4; + CHECK_EQ(0, xd3_config_stream (&stream, &cfg)); + + CHECK_EQ(0, xd3_size_hashtable (&stream, + 1<<10 /* ignored */, + stream.smatcher.large_look, + & stream.large_hash)); + } + ~with_stream() + { + xd3_free_stream (&stream); + } +}; + +MEMBER +struct large_cksum : public with_stream<SELF> { + Word step(const uint8_t *ptr) { + return xd3_large_cksum (&this->stream.large_hash, ptr, CksumSize); + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = xd3_large_cksum_update (&this->stream.large_hash, + incr_state, ptr - 1, CksumSize); + return incr_state; + } + + Word incr_state; +}; + +#if SIZEOF_USIZE_T == 4 +#define xd3_large_cksum_old xd3_large32_cksum_old +#define xd3_large_cksum_update_old xd3_large32_cksum_update_old +#elif SIZEOF_USIZE_T == 8 +#define xd3_large_cksum_old xd3_large64_cksum_old +#define xd3_large_cksum_update_old xd3_large64_cksum_update_old +#endif + +MEMBER +struct large_cksum_old : public with_stream<SELF> { + Word step(const uint8_t *ptr) { + return xd3_large_cksum_old (&this->stream.large_hash, ptr, CksumSize); + } + + Word state0(const uint8_t *ptr) { + incr_state = step(ptr); + return incr_state; + } + + Word incr(const uint8_t *ptr) { + incr_state = xd3_large_cksum_update_old (&this->stream.large_hash, + incr_state, ptr - 1, CksumSize); + return incr_state; + } + + Word incr_state; +}; + +// TESTS + +template <typename Word> +struct file_stats { + typedef const uint8_t* ptr_type; + typedef Word word_type; + typedef btree::btree_multimap<word_type, ptr_type> table_type; + typedef typename table_type::iterator table_iterator; + + usize_t cksum_size; + usize_t cksum_skip; + usize_t unique; + usize_t unique_values; + usize_t count; + table_type table; + + file_stats(usize_t size, usize_t skip) + : cksum_size(size), + cksum_skip(skip), + unique(0), + unique_values(0), + count(0) { + } + + void reset() { + unique = 0; + unique_values = 0; + count = 0; + table.clear(); + } + + void update(word_type word, ptr_type ptr) { + table_iterator t_i = table.find(word); + + count++; + if (t_i != table.end()) { + int collisions = 0; + for (table_iterator p_i = t_i; + p_i != table.end() && p_i->first == word; + ++p_i) { + if (memcmp(p_i->second, ptr, cksum_size) == 0) { + return; + } + collisions++; + } + if (collisions >= 1000) { + fprintf(stderr, "Something is not right, lots of collisions=%d\n", + collisions); + abort(); + } + } else { + unique_values++; + } + unique++; + table.insert(std::make_pair(word, ptr)); + return; + } + + void freeze() { + table.clear(); + } +}; + +struct test_result_base; + +static vector<test_result_base*> all_tests; + +struct test_result_base { + virtual ~test_result_base() { + } + virtual void reset() = 0; + virtual void print() = 0; + virtual void get(const uint8_t* buf, const size_t buf_size, + usize_t iters) = 0; + virtual void stat() = 0; + virtual usize_t count() = 0; + virtual usize_t dups() = 0; + virtual double uniqueness() = 0; + virtual double fullness() = 0; + virtual double collisions() = 0; + virtual double coverage() = 0; + virtual double compression() = 0; + virtual double time() = 0; + virtual double total_time() = 0; + virtual usize_t total_count() = 0; + virtual usize_t total_dups() = 0; +}; + +template <typename Checksum> +struct test_result : public test_result_base { + Checksum cksum; + const char *test_name; + file_stats<typename Checksum::word_type> fstats; + usize_t test_size; + usize_t n_steps; + usize_t n_incrs; + typename Checksum::word_type s_bits; + typename Checksum::word_type s_mask; + usize_t t_entries; + usize_t h_bits; + usize_t h_buckets_full; + char *hash_table; + long accum_millis; + usize_t accum_iters; + + // These are not reset + double accum_time; + usize_t accum_count; + usize_t accum_dups; + usize_t accum_colls; + size_t accum_size; + + test_result(const char *name) + : test_name(name), + fstats(Checksum::cksum_size, Checksum::cksum_skip), + hash_table(NULL), + accum_millis(0), + accum_iters(0), + accum_time(0.0), + accum_count(0), + accum_dups(0), + accum_colls(0), + accum_size(0) { + all_tests.push_back(this); + } + + ~test_result() { + reset(); + } + + void reset() { + // size of file + test_size = 0; + + // count + n_steps = 0; + n_incrs = 0; + + // four values used by new_table()/summarize_table() + s_bits = 0; + s_mask = 0; + t_entries = 0; + h_bits = 0; + h_buckets_full = 0; + + accum_millis = 0; + accum_iters = 0; + + fstats.reset(); + + // temporary + if (hash_table) { + delete(hash_table); + hash_table = NULL; + } + } + + usize_t count() { + if (Checksum::cksum_skip == 1) { + return n_incrs; + } else { + return n_steps; + } + } + + usize_t dups() { + return fstats.count - fstats.unique; + } + + /* Fraction of distinct strings of length cksum_size which are not + * represented in the hash table. */ + double collisions() { + return (fstats.unique - fstats.unique_values) / (double) fstats.unique; + } + usize_t colls() { + return (fstats.unique - fstats.unique_values); + } + + double uniqueness() { + return 1.0 - (double) dups() / count(); + } + + double fullness() { + return (double) h_buckets_full / (1 << h_bits); + } + + double coverage() { + return (double) h_buckets_full / uniqueness() / count(); + } + + double compression() { + return 1.0 - coverage(); + } + + double time() { + return (double) accum_millis / accum_iters; + } + + double total_time() { + return accum_time; + } + + usize_t total_count() { + return accum_count; + } + + usize_t total_dups() { + return accum_dups; + } + + usize_t total_colls() { + return accum_dups; + } + + void stat() { + accum_time += time(); + accum_count += count(); + accum_dups += dups(); + accum_colls += colls(); + accum_size += test_size; + } + + void print() { + if (fstats.count != count()) { + fprintf(stderr, "internal error: %" W "d != %" W "d\n", fstats.count, count()); + abort(); + } + print_header(); + printf("%-32s%d/%d 2^%" W "u\t%" W "u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t" + "%" W "u\t%" W "u\n", + test_name, + Checksum::cksum_size, + Checksum::cksum_skip, + h_bits, + count(), + uniqueness(), + fullness(), + coverage(), + collisions(), + 0.001 * accum_iters * test_size / accum_millis, + accum_iters, + colls()); + } + + usize_t size_log2 (usize_t slots) { + usize_t bits = bitsof<typename Checksum::word_type>() - 1; + usize_t i; + + for (i = 3; i <= bits; i += 1) { + if (slots <= (1U << i)) { + return i - Checksum::compaction; + } + } + + return bits; + } + + void new_table(usize_t entries) { + t_entries = entries; + h_bits = size_log2(entries); + + usize_t n = 1 << h_bits; + + s_bits = bitsof<typename Checksum::word_type>() - h_bits; + s_mask = n - 1U; + + hash_table = new char[n / 8]; + memset(hash_table, 0, n / 8); + } + + int get_table_bit(usize_t i) { + return hash_table[i/8] & (1 << i%8); + } + + int set_table_bit(usize_t i) { + return hash_table[i/8] |= (1 << i%8); + } + + void summarize_table() { + usize_t n = 1 << h_bits; + usize_t f = 0; + for (usize_t i = 0; i < n; i++) { + if (get_table_bit(i)) { + f++; + } + } + h_buckets_full = f; + } + + void get(const uint8_t* buf, const size_t buf_size, usize_t test_iters) { + typename Checksum::hash_type hash; + const uint8_t *ptr; + const uint8_t *end; + usize_t periods; + int64_t last_offset; + int64_t stop; + + test_size = buf_size; + last_offset = buf_size - Checksum::cksum_size; + + if (last_offset < 0) { + periods = 0; + n_steps = 0; + n_incrs = 0; + stop = -Checksum::cksum_size; + } else { + periods = last_offset / Checksum::cksum_skip; + n_steps = periods + 1; + n_incrs = last_offset + 1; + stop = last_offset - (periods + 1) * Checksum::cksum_skip; + } + + // Compute file stats once. + if (fstats.unique_values == 0) { + if (Checksum::cksum_skip == 1) { + for (size_t i = 0; i <= buf_size - Checksum::cksum_size; i++) { + fstats.update(hash(cksum.step(buf + i), s_bits, s_mask), buf + i); + } + } else { + ptr = buf + last_offset; + end = buf + stop; + + for (; ptr != end; ptr -= Checksum::cksum_skip) { + fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr); + } + } + fstats.freeze(); + } + + long start_test = get_millisecs_now(); + + if (Checksum::cksum_skip != 1) { + new_table(n_steps); + + for (usize_t i = 0; i < test_iters; i++) { + ptr = buf + last_offset; + end = buf + stop; + + for (; ptr != end; ptr -= Checksum::cksum_skip) { + set_table_bit(hash(cksum.step(ptr), s_bits, s_mask)); + } + } + + summarize_table(); + } + + stop = buf_size - Checksum::cksum_size + 1; + if (stop < 0) { + stop = 0; + } + + if (Checksum::cksum_skip == 1) { + new_table(n_incrs); + + for (usize_t i = 0; i < test_iters; i++) { + ptr = buf; + end = buf + stop; + + if (ptr != end) { + set_table_bit(hash(cksum.state0(ptr++), s_bits, s_mask)); + } + + for (; ptr != end; ptr++) { + typename Checksum::word_type w = cksum.incr(ptr); + CHECK_EQ(w, cksum.step(ptr)); + set_table_bit(hash(w, s_bits, s_mask)); + } + } + + summarize_table(); + } + + accum_iters += test_iters; + accum_millis += get_millisecs_now() - start_test; + } +}; + +static int read_whole_file(const char *name, + uint8_t **buf_ptr, + size_t *buf_len) { + main_file file; + int ret; + xoff_t len; + size_t nread; + main_file_init(&file); + file.filename = name; + ret = main_file_open(&file, name, XO_READ); + if (ret != 0) { + fprintf(stderr, "open failed\n"); + goto exit; + } + ret = main_file_stat(&file, &len); + if (ret != 0) { + fprintf(stderr, "stat failed\n"); + goto exit; + } + + (*buf_len) = (size_t)len; + (*buf_ptr) = (uint8_t*) main_malloc(*buf_len); + ret = main_file_read(&file, *buf_ptr, *buf_len, &nread, + "read failed"); + if (ret == 0 && *buf_len == nread) { + ret = 0; + } else { + fprintf(stderr, "invalid read\n"); + ret = XD3_INTERNAL; + } + exit: + main_file_cleanup(&file); + return ret; +} + +int main(int argc, char** argv) { + int i; + uint8_t *buf = NULL; + size_t buf_len = 0; + int ret; + + if (argc <= 1) { + fprintf(stderr, "usage: %s file ...\n", argv[0]); + return 1; + } + +// TODO: The xdelta3-hash.h code is identical now; add sameness test. +// using rabin_karp<> template. +#define TEST(T,Z,S,C) \ + test_result<large_cksum<T,Z,S,hhash<T>,C>> \ + _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C \ + ("xck_" #T "_" #Z "_" #S "_" #C); \ + test_result<large_cksum_old<T,Z,S,hhash<T>,C>> \ + _old_ ## T ## _ ## Z ## _ ## S ## _ ## C \ + ("old_" #T "_" #Z "_" #S "_" #C) + +#define TESTS(SIZE, SKIP) \ + TEST(usize_t, SIZE, SKIP, 1); \ + TEST(usize_t, SIZE, SKIP, 2) + + TESTS(5, 1); + TESTS(6, 1); + TESTS(7, 1); + TESTS(8, 1); + TESTS(9, 1); + TESTS(10, 1); + TESTS(11, 1); + TESTS(12, 1); + TESTS(13, 1); + TESTS(14, 1); + TESTS(15, 1); + TESTS(16, 1); + TESTS(17, 1); + TESTS(18, 1); + TESTS(19, 1); + TESTS(20, 1); + TESTS(21, 1); + TESTS(22, 1); + TESTS(23, 1); + TESTS(24, 1); + TESTS(25, 1); + TESTS(26, 1); + TESTS(27, 1); + TESTS(28, 1); + TESTS(29, 1); + TESTS(30, 1); + TESTS(31, 1); + TESTS(32, 1); + TESTS(33, 1); + TESTS(34, 1); + TESTS(35, 1); + TESTS(36, 1); + TESTS(37, 1); + TESTS(38, 1); + TESTS(39, 1); + + + for (i = 1; i < argc; i++) { + if ((ret = read_whole_file(argv[i], + & buf, + & buf_len))) { + return 1; + } + + fprintf(stderr, "file %s is %zu bytes\n", + argv[i], buf_len); + + double min_time = -1.0; + double min_compression = 0.0; + + for (vector<test_result_base*>::iterator iter = all_tests.begin(); + iter != all_tests.end(); ++iter) { + test_result_base *test = *iter; + test->reset(); + + usize_t iters = 1; + long start_test = get_millisecs_now(); + + do { + test->get(buf, buf_len, iters); + iters *= 3; + iters /= 2; + } while (get_millisecs_now() - start_test < 2000); + + test->stat(); + + if (min_time < 0.0) { + min_compression = test->compression(); + min_time = test->time(); + } + + if (min_time > test->time()) { + min_time = test->time(); + } + + if (min_compression > test->compression()) { + min_compression = test->compression(); + } + + test->print(); + } + + main_free(buf); + buf = NULL; + } + + return 0; +} diff --git a/testing/checksum_test_c.c b/testing/checksum_test_c.c new file mode 100644 index 0000000..8f0507a --- /dev/null +++ b/testing/checksum_test_c.c @@ -0,0 +1,174 @@ +#include "../xdelta3.c" + +// OLD CHECKSUM CODE + +#define PERMUTE32(x) (__single_hash32[x]) +#define PERMUTE64(x) (__single_hash64[x]) + +const uint16_t __single_hash32[256] = +{ + /* This hashes the input alphabet (Scheme SLIB pseudo-random). */ + 0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46, + 0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602, + 0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6, + 0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a, + 0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58, + 0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b, + 0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94, + 0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176, + 0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743, + 0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6, + 0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227, + 0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d, + 0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a, + 0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31, + 0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9, + 0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6, + 0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f, + 0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f, + 0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49, + 0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037, + 0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad, + 0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f, + 0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d, + 0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b, + 0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624, + 0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272, + 0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806, + 0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050, + 0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113, + 0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2, + 0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25, + 0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5 +}; + +const uint32_t __single_hash64[256] = +{ + /* http://random.org 2014.10.24 */ + 0xd25e9f0a, 0xb1af9d5e, 0xb753dfa2, 0x157050f7, /* 0 */ + 0xc84b072c, 0xdd14fe7c, 0xf92208c3, 0xdf08a0c0, + 0x63a5c118, 0x76f5d90f, 0xa2f8b93e, 0xb6c12d22, + 0xaf074957, 0x966fb7d9, 0x62f7b785, 0xb40e8a09, + 0x0a811d5d, 0x323a6daa, 0xb62f7c5b, 0xfdcb9a53, + 0xf25a9067, 0x4506bc7a, 0xff58a74b, 0x5ae62817, + 0x74097675, 0x722c0fd9, 0x116a2a66, 0x65f76728, + 0x72c79651, 0xe043cf9d, 0x64b867c7, 0x6604834f, + 0xcdca58a6, 0x0f164e2d, 0x24515f05, 0x632cdbf8, + 0x18091d4a, 0x3eff4128, 0x673d1c33, 0xd8e10c71, + 0x1a3edf11, 0xba52892f, 0xa56949e0, 0xf3e1dd77, /* 10 */ + 0x86fcbe3e, 0x138d66d0, 0x4fc98359, 0xc22e5dd6, + 0xc59f2267, 0x6c6dd739, 0xe03da190, 0x07e8469c, + 0xadcfb02c, 0x00d3b0d9, 0xa1f44918, 0x8bd84d87, + 0x08ec9ec1, 0xbbcd156f, 0xb57718e3, 0x3177e752, + 0xf52a4d70, 0xde7aaad9, 0x075f1da0, 0x21ba00c6, + 0xb9469a5c, 0xcf08d5ba, 0x91ac9edc, 0xc6167b63, + 0xc1974919, 0xc8c8d195, 0x4b1996dd, 0xeff8991c, + 0xf7f66c6b, 0x25b012e2, 0x59d12a98, 0xea40d3cc, + 0x41f9970b, 0xec48101a, 0xa3bdcf90, 0x99f16905, + 0x27af6c97, 0xc849af37, 0x49cad89b, 0xf48c2278, /* 20 */ + 0x5529c3d8, 0x9e7d6dce, 0x16feb52d, 0xf1b0aca1, + 0xaf28fccb, 0x48e4ce3c, 0xc4436617, 0x64524e3e, + 0x61806681, 0x6384f2d7, 0x1172880f, 0x34a5ef5f, + 0xcc8cc0a8, 0x66e8f100, 0x2866085f, 0xba9b1b2d, + 0x51285949, 0x2be4b574, 0x889b1ef5, 0x3dbe920d, + 0x9277a62f, 0x0584a9f6, 0x085d8fc4, 0x4b5d403d, + 0x4e46ca78, 0x3294c2f9, 0x29313e70, 0xe4f09b24, + 0xe73b331c, 0x072f5552, 0x2e390b78, 0xea0021ca, + 0xd8f40320, 0xed0e16fd, 0x7de9cf7a, 0xf17e3d6c, + 0x8df1bd85, 0x052cae67, 0x3486e512, 0x3a1c09b8, /* 30 */ + 0x6c2a7b4e, 0x83455753, 0xbc0353ac, 0x0ffe20b6, + 0x5fdcef85, 0x010f506c, 0x595ce972, 0xe28680d0, + 0xa7e216b2, 0xa392ee0f, 0x25b73faa, 0x2b1f4983, + 0xeeaefe98, 0x1d3d9cbc, 0x6aebe97b, 0x8b7b3584, + 0x9e6a9a07, 0xd37f1e99, 0x4ac2a441, 0x8ae9a213, + 0x7d0e27d7, 0x5de54b9a, 0x8621de1f, 0xf0f2f866, + 0xcb08d275, 0x49c3f87e, 0xd5ee68c1, 0x9802fc77, + 0x68be6c5e, 0x65aa8c27, 0xf423d5f7, 0x10ec5502, + 0x9909bce1, 0x509cdf1b, 0x338fea72, 0x2733e9bf, + 0xf92f4fd7, 0x87738ea2, 0x931a8bbc, 0x0a5c9155, /* 40 */ + 0xbe5edd9b, 0xadbf5838, 0x0338f8d2, 0x290da210, + 0x390c37d8, 0xe7cffae8, 0x20617ebe, 0x464322dd, + 0x7b3c4e78, 0xac142dcb, 0x2d5cef76, 0xd8fe49fc, + 0x60f4e9a9, 0x7473816f, 0x0dc35f39, 0x5eed80c1, + 0x0cb55ab6, 0x1d3ac541, 0x13c7f529, 0x7bffdf4a, + 0xe334785b, 0x85263ec1, 0xd132ae56, 0x7c868b9e, + 0x47f60638, 0x1012b979, 0x81c31dd3, 0x1af868c8, + 0x0c5d0742, 0xd1b3e1a2, 0x5873200a, 0xf848465c, + 0x0fc4d596, 0x609c18af, 0xc9f5a480, 0xd1a94a84, + 0xa1431a3f, 0x7de8bb1a, 0x25f1256b, 0x1dcc732c, /* 50 */ + 0x6aa1549a, 0xa2367281, 0x32f2a77e, 0x82e62a0f, + 0x045cbb56, 0x74b2027c, 0xd71a32d9, 0x022e7cb5, + 0xe99be177, 0x60222fdf, 0xd69681ca, 0x9008ee2c, + 0x32923db4, 0xcf82bf97, 0x38960a5b, 0xb3503d5b, + 0x9bd4c7f2, 0x33c029c8, 0x1ef504a3, 0xdb249d3b, + 0x91e89676, 0x4ca43b36, 0x9191433c, 0x465d5dc4, + 0xf4dcb118, 0x9d11dd00, 0xb592f058, 0xdbe5ce30, + 0x74790d92, 0x779850a8, 0x7180d25b, 0xfa951d99, + 0x5990935a, 0x921cb022, 0x3b7c39bc, 0x6a38a7c7, + 0xdc22703b, 0x142bab3b, 0x4e3d9479, 0x44bb8482, /* 60 */ + 0x8043abce, 0xfebe832a, 0x8e6a2f98, 0x4d43c4fe, + 0xd192a70a, 0x802f3c3a, 0x5d11bbab, 0x2665d241, + 0xb3f3a680, 0x3a8d223f, 0xcf82cdb4, 0x4ed28743, +}; + +uint64_t +xd3_large64_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look) +{ + static const uint64_t kBits = 32; + static const uint64_t kMask = 0xffffffff; + usize_t i = 0; + uint64_t low = 0; + uint64_t high = 0; + + for (; i < look; i += 1) + { + low += PERMUTE64(*base++); + high += low; + } + + return ((high & kMask) << kBits) | (low & kMask); +} + +uint64_t +xd3_large64_cksum_update_old (xd3_hash_cfg *ignore, const uint64_t cksum, + const uint8_t *base, const usize_t look) +{ + static const uint64_t kBits = 32; + static const uint64_t kMask = 0xffffffff; + uint64_t old_c = PERMUTE64(base[0]); + uint64_t new_c = PERMUTE64(base[look]); + uint64_t low = ((cksum & kMask) - old_c + new_c) & kMask; + uint64_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask; + return (high << kBits) | low; +} + +uint32_t +xd3_large32_cksum_old (xd3_hash_cfg *ignore, const uint8_t *base, const usize_t look) +{ + static const uint32_t kBits = 16; + static const uint32_t kMask = 0xffff; + usize_t i = 0; + uint32_t low = 0; + uint32_t high = 0; + + for (; i < look; i += 1) + { + low += PERMUTE32(*base++); + high += low; + } + + return ((high & kMask) << kBits) | (low & kMask); +} + +uint32_t +xd3_large32_cksum_update_old (xd3_hash_cfg *ignore, const uint32_t cksum, + const uint8_t *base, const usize_t look) +{ + static const uint32_t kBits = 16; + static const uint32_t kMask = 0xffff; + uint32_t old_c = PERMUTE32(base[0]); + uint32_t new_c = PERMUTE32(base[look]); + uint32_t low = ((cksum & kMask) - old_c + new_c) & kMask; + uint32_t high = ((cksum >> kBits) - (old_c * look) + low) & kMask; + return (high << kBits) | low; +} diff --git a/testing/cmp.h b/testing/cmp.h new file mode 100644 index 0000000..53ae671 --- /dev/null +++ b/testing/cmp.h @@ -0,0 +1,53 @@ +/* -*- Mode: C++ -*- */ +static size_t CmpDifferentBlockBytes(const Block &a, const Block &b) { + size_t total = 0; + size_t i = 0; + size_t m = min(a.Size(), b.Size()); + + for (; i < m; i++) { + if (a[i] != b[i]) { + total++; + } + } + + total += a.Size() - i; + total += b.Size() - i; + + return total; +} + +static xoff_t CmpDifferentBytes(const FileSpec &a, const FileSpec &b) { + Block block_a, block_b; + xoff_t total = 0; + typename FileSpec::iterator a_i(a), b_i(b); + + for (; !a_i.Done() && !b_i.Done(); a_i.Next(), b_i.Next()) { + + a_i.Get(&block_a); + b_i.Get(&block_b); + + total += CmpDifferentBlockBytes(block_a, block_b); + } + + for (; !a_i.Done(); a_i.Next()) { + total += a_i.BytesOnBlock(); + } + for (; !b_i.Done(); b_i.Next()) { + total += b_i.BytesOnBlock(); + } + + return total; +} + +static size_t CmpDifferentBlockBytesAtOffset(const Block &a, + const FileSpec &b_spec, + xoff_t offset) { + Block b; + size_t size = a.Size(); + CHECK_LE(offset, b_spec.Size()); + if (b_spec.Size() < offset + size) { + size = b_spec.Size() - offset; + } + b_spec.Get(&b, offset, size); + return CmpDifferentBlockBytes(a, b); +} diff --git a/testing/delta.h b/testing/delta.h new file mode 100644 index 0000000..deac217 --- /dev/null +++ b/testing/delta.h @@ -0,0 +1,73 @@ +// Mode: -*- C++ -*- + +class Delta { +public: + Delta(const Block &block) { + int ret; + xd3_config config; + memset(&stream_, 0, sizeof (stream_)); + memset(&config, 0, sizeof (config)); + + xd3_init_config(&config, XD3_SKIP_EMIT | XD3_ADLER32_NOVER); + + CHECK_EQ(0, xd3_config_stream (&stream_, &config)); + + xd3_avail_input (&stream_, block.Data(), block.Size()); + + bool done = false; + while (!done) { + ret = xd3_decode_input(&stream_); + + switch (ret) { + case XD3_INPUT: + done = true; + break; + case XD3_OUTPUT: + CHECK_EQ(0, xd3_whole_append_window (&stream_)); + break; + case XD3_GOTHEADER: + case XD3_WINSTART: + case XD3_WINFINISH: + break; + default: + cerr << "decode: " << done; + abort(); + } + } + } + + ~Delta() { + xd3_free_stream(&stream_); + } + + xoff_t AddedBytes() const { + return stream_.whole_target.addslen; + } + + xoff_t Windows() const { + return stream_.whole_target.wininfolen; + } + +// Note: This does not benefit from -Wformat= checking, due to the +// enclosing template. Further, it was not used. +// void Print() const { +// for (size_t i = 0; i < stream_.whole_target.instlen; i++) { +// xd3_winst &winst = stream_.whole_target.inst[i]; +// switch (winst.type) { +// case XD3_RUN: +// DP(RINT, "%" Q "u run %" W "u\n", winst.position, winst.size); +// break; +// case XD3_ADD: +// DP(RINT "%" Q "u add %" W "u\n", winst.position, winst.size); +// break; +// default: +// DP(RINT "%" Q "u copy %" W "u @ %" Q "u (mode %u)\n", +// winst.position, winst.size, winst.addr, winst.mode); +// break; +// } +// } +// } + +private: + xd3_stream stream_; +}; diff --git a/testing/file.h b/testing/file.h new file mode 100644 index 0000000..2188f7a --- /dev/null +++ b/testing/file.h @@ -0,0 +1,385 @@ +/* -*- Mode: C++ -*- */ +class Block; +class BlockIterator; +class TmpFile; + +class Block { +public: + Block() + : data_(NULL), + data_size_(0), + size_(0) { } + + ~Block() { + if (data_) { + delete [] data_; + } + } + + size_t Size() const { + return size_; + } + + uint8_t operator[](size_t i) const { + CHECK_LT(i, size_); + return data_[i]; + } + + uint8_t* Data() const { + if (data_ == NULL) { + CHECK_EQ(0, size_); + data_size_ = 1; + data_ = new uint8_t[1]; + } + return data_; + } + + // For writing to blocks + void Append(const uint8_t *data, size_t size) { + if (data_ == NULL) { + CHECK_EQ(0, size_); + CHECK_EQ(0, data_size_); + data_ = new uint8_t[Constants::BLOCK_SIZE]; + data_size_ = Constants::BLOCK_SIZE; + } + + if (size_ + size > data_size_) { + uint8_t *tmp = data_; + while (size_ + size > data_size_) { + data_size_ *= 2; + } + data_ = new uint8_t[data_size_]; + memcpy(data_, tmp, size_); + delete [] tmp; + } + + memcpy(data_ + size_, data, size); + size_ += size; + } + + // For cleaing a block + void Reset() { + size_ = 0; + } + + // Note: This does not benefit from -Wformat= checking, due to the + // enclosing template. Further, it was not used. + // void Print() const { + // xoff_t pos = 0; + // for (size_t i = 0; i < Size(); i++) { + // if (pos % 16 == 0) { + // DP(RINT "%5" Q "x: ", pos); + // } + // DP(RINT "%02x ", (*this)[i]); + // if (pos % 16 == 15) { + // DP(RINT "\n"); + // } + // pos++; + // } + // DP(RINT "\n"); + // } + + void WriteTmpFile(TmpFile *f) const { + f->Append(this); + } + + void SetSize(size_t size) { + uint8_t *t = NULL; + if (data_size_ < size) { + if (data_) { + t = data_; + } + data_ = new uint8_t[size]; + data_size_ = size; + } + if (t && size < size_) { + memcpy(data_, t, size); + } + delete [] t; + size_ = size; + } + +private: + friend class BlockIterator; + + mutable uint8_t *data_; + mutable size_t data_size_; + size_t size_; +}; + +class FileSpec { + public: + FileSpec(MTRandom *rand) + : rand_(rand) { + } + + // Generates a file with a known size + void GenerateFixedSize(xoff_t size) { + Reset(); + + for (xoff_t p = 0; p < size; ) { + xoff_t t = min(Constants::BLOCK_SIZE, size - p); + table_.insert(make_pair(p, Segment(t, rand_))); + p += t; + } + } + + // Generates a file with exponential-random distributed size + void GenerateRandomSize(xoff_t mean) { + GenerateFixedSize(rand_->ExpRand(mean)); + } + + // Returns the size of the file + xoff_t Size() const { + if (table_.empty()) { + return 0; + } + ConstSegmentMapIterator i = --table_.end(); + return i->first + i->second.Size(); + } + + // Returns the number of blocks + xoff_t Blocks(size_t blksize = Constants::BLOCK_SIZE) const { + if (table_.empty()) { + return 0; + } + return ((Size() - 1) / blksize) + 1; + } + + // Returns the number of segments + xoff_t Segments() const { + return table_.size(); + } + + // Create a mutation according to "what". + void ModifyTo(const Mutator &mutator, + FileSpec *modify) const { + modify->Reset(); + mutator.Mutate(&modify->table_, &table_, rand_); + modify->CheckSegments(); + } + + void CheckSegments() const { + for (ConstSegmentMapIterator iter(table_.begin()); + iter != table_.end(); ) { + ConstSegmentMapIterator iter0(iter++); + if (iter == table_.end()) { + break; + } + CHECK_EQ(iter0->first + iter0->second.Size(), iter->first); + } + } + + void Reset() { + table_.clear(); + } + + void Print() const { + for (ConstSegmentMapIterator iter(table_.begin()); + iter != table_.end(); + ++iter) { + const Segment &seg = iter->second; + cerr << "Segment at " << iter->first + << " (" << seg.ToString() << ")" << endl; + } + } + + void PrintData() const { + Block block; + for (BlockIterator iter(*this); !iter.Done(); iter.Next()) { + iter.Get(&block); + block.Print(); + } + } + + void WriteTmpFile(TmpFile *f) const { + Block block; + for (BlockIterator iter(*this); !iter.Done(); iter.Next()) { + iter.Get(&block); + f->Append(&block); + } + } + + void Get(Block *block, xoff_t offset, size_t size) const { + size_t got = 0; + block->SetSize(size); + + ConstSegmentMapIterator pos = table_.upper_bound(offset); + if (pos == table_.begin()) { + CHECK_EQ(0, Size()); + return; + } + --pos; + + while (got < size) { + CHECK(pos != table_.end()); + CHECK_GE(offset, pos->first); + + const Segment &seg = pos->second; + + // The position of this segment may start before this block starts, + // and then the position of the data may be offset from the seeding + // position. + size_t seg_offset = offset - pos->first; + size_t advance = min(seg.Size() - seg_offset, + size - got); + + seg.Fill(seg_offset, advance, block->Data() + got); + + got += advance; + offset += advance; + ++pos; + } + } + + typedef BlockIterator iterator; + + private: + friend class BlockIterator; + + MTRandom *rand_; + SegmentMap table_; +}; + +class BlockIterator { +public: + explicit BlockIterator(const FileSpec& spec) + : spec_(spec), + blkno_(0), + blksize_(Constants::BLOCK_SIZE) { } + + BlockIterator(const FileSpec& spec, + size_t blksize) + : spec_(spec), + blkno_(0), + blksize_(blksize) { } + + bool Done() const { + return blkno_ >= spec_.Blocks(blksize_); + } + + void Next() { + blkno_++; + } + + xoff_t Blkno() const { + return blkno_; + } + + xoff_t Blocks() const { + return spec_.Blocks(blksize_); + } + + xoff_t Offset() const { + return blkno_ * blksize_; + } + + void SetBlock(xoff_t blkno) { + CHECK_LE(blkno, Blocks()); + blkno_ = blkno; + } + + void Get(Block *block) const { + spec_.Get(block, blkno_ * blksize_, BytesOnBlock()); + } + + size_t BytesOnBlock() const { + xoff_t blocks = spec_.Blocks(blksize_); + xoff_t size = spec_.Size(); + + DCHECK((blkno_ < blocks) || + (blkno_ == blocks && size % blksize_ == 0)); + + if (blkno_ == blocks) { + return 0; + } + if (blkno_ + 1 == blocks) { + return ((size - 1) % blksize_) + 1; + } + return blksize_; + } + + size_t BlockSize() const { + return blksize_; + } + +private: + const FileSpec& spec_; + xoff_t blkno_; + size_t blksize_; +}; + +class ExtFile { +public: + ExtFile() { + static int static_counter = 0; + pid_t pid = getpid(); + char buf[64]; + xoff_t xpid = pid; + snprintf(buf, 64, "/tmp/regtest.%" Q "u.%d", xpid, static_counter++); + filename_.append(buf); + unlink(filename_.c_str()); + } + + ~ExtFile() { + unlink(filename_.c_str()); + } + + const char* Name() const { + return filename_.c_str(); + } + + // Check whether a real file matches a file spec. + bool EqualsSpec(const FileSpec &spec) const { + main_file t; + main_file_init(&t); + CHECK_EQ(0, main_file_open(&t, Name(), XO_READ)); + + Block tblock; + Block sblock; + for (BlockIterator iter(spec); !iter.Done(); iter.Next()) { + iter.Get(&sblock); + tblock.SetSize(sblock.Size()); + size_t tread; + CHECK_EQ(0, main_file_read(&t, + tblock.Data(), + tblock.Size(), &tread, "read failed")); + CHECK_EQ(0, CmpDifferentBlockBytes(tblock, sblock)); + } + + CHECK_EQ(0, main_file_close(&t)); + main_file_cleanup(&t); + return true; + } + +protected: + string filename_; +}; + +class TmpFile : public ExtFile { +public: + TmpFile() { + main_file_init(&file_); + CHECK_EQ(0, main_file_open(&file_, Name(), XO_WRITE)); + } + + ~TmpFile() { + main_file_cleanup(&file_); + } + + void Append(const Block *block) { + CHECK_EQ(0, main_file_write(&file_, + block->Data(), block->Size(), + "tmpfile write failed")); + } + + const char* Name() const { + if (main_file_isopen(&file_)) { + CHECK_EQ(0, main_file_close(&file_)); + } + return ExtFile::Name(); + } + +private: + mutable main_file file_; +}; diff --git a/testing/modify.h b/testing/modify.h new file mode 100644 index 0000000..93f7bde --- /dev/null +++ b/testing/modify.h @@ -0,0 +1,385 @@ +// -*- Mode: C++ -*- +class Mutator { +public: + virtual ~Mutator() { } + virtual void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const = 0; +}; + +class Change { +public: + enum Kind { + MODIFY = 1, // Mutate a certain range w/ random or supplied data + ADD = 2, // Insert random or supplied data + DELRANGE = 3, // Delete a specified range of data + COPY = 4, // Copy from one region, inserting elsewhere + MOVE = 5, // Copy then delete copied-from range + COPYOVER = 6 // Copy then delete copied-to range + + // ADD, DELRANGE, and COPY change the file size + // MODIFY, MOVE, COPYOVER preserve the file size + }; + + // Constructor for modify, add, delete. + Change(Kind kind0, xoff_t size0, xoff_t addr1_0) + : kind(kind0), + size(size0), + addr1(addr1_0), + addr2(0), + insert(NULL) { + CHECK(kind != MOVE && kind != COPY && kind != COPYOVER); + } + + // Constructor for modify, add w/ provided data. + Change(Kind kind0, xoff_t size0, xoff_t addr1_0, Segment *insert0) + : kind(kind0), + size(size0), + addr1(addr1_0), + addr2(0), + insert(insert0) { + CHECK(kind != MOVE && kind != COPY && kind != COPYOVER); + } + + // Constructor for move, copy, overwrite + Change(Kind kind0, xoff_t size0, xoff_t addr1_0, xoff_t addr2_0) + : kind(kind0), + size(size0), + addr1(addr1_0), + addr2(addr2_0), + insert(NULL) { + CHECK(kind == MOVE || kind == COPY || kind == COPYOVER); + } + + Kind kind; + xoff_t size; + xoff_t addr1; + xoff_t addr2; + Segment *insert; // For modify and/or add +}; + +typedef list<Change> ChangeList; +typedef typename ChangeList::const_iterator ConstChangeListIterator; +typedef typename ChangeList::iterator ChangeListIterator; + +class ChangeListMutator : public Mutator { +public: + ChangeListMutator(const ChangeList &cl) + : cl_(cl) { } + + ChangeListMutator() { } + + void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const { + // The speed of processing gigabytes of data is so slow compared with + // these table-copy operations, no attempt to make this fast. + SegmentMap tmp; + + for (ConstChangeListIterator iter(cl_.begin()); + iter != cl_.end(); ++iter) { + const Change &ch = *iter; + tmp.clear(); + Mutate(ch, &tmp, source_table, rand); + tmp.swap(*table); + source_table = table; + } + } + + static void Mutate(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + switch (ch.kind) { + case Change::ADD: + AddChange(ch, table, source_table, rand); + break; + case Change::MODIFY: + ModifyChange(ch, table, source_table, rand); + break; + case Change::DELRANGE: + DeleteChange(ch, table, source_table, rand); + break; + case Change::COPY: + CopyChange(ch, table, source_table, rand); + break; + case Change::MOVE: + MoveChange(ch, table, source_table, rand); + break; + case Change::COPYOVER: + OverwriteChange(ch, table, source_table, rand); + break; + } + } + + static void ModifyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t m_end = m_start + ch.size; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start || i_start >= m_end) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + // Insert the entire segment, even though it may extend into later + // segments. This condition avoids inserting it during later + // segments. + if (m_start >= i_start) { + if (ch.insert != NULL) { + table->insert(table->end(), make_pair(m_start, *ch.insert)); + } else { + Segment part(m_end - m_start, rand); + table->insert(table->end(), make_pair(m_start, part)); + } + } + + if (i_end > m_end) { + table->insert(table->end(), + make_pair(m_end, + seg.Subseg(m_end - i_start, i_end - m_end))); + } + } + + // This check verifies that the modify does not extend past the + // source_table EOF. + CHECK_LE(m_end, i_end); + } + + static void AddChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start > m_start) { + table->insert(table->end(), make_pair(i_start + ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + if (ch.insert != NULL) { + table->insert(table->end(), make_pair(m_start, *ch.insert)); + } else { + Segment addseg(ch.size, rand); + table->insert(table->end(), make_pair(m_start, addseg)); + } + + if (m_start < i_end) { + table->insert(table->end(), + make_pair(m_start + ch.size, + seg.Subseg(m_start - i_start, + i_end - m_start))); + } + } + + CHECK_LE(m_start, i_end); + + // Special case for add at end-of-input. + if (m_start == i_end) { + Segment addseg(ch.size, rand); + table->insert(table->end(), make_pair(m_start, addseg)); + } + } + + static void DeleteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + xoff_t m_start = ch.addr1; + xoff_t m_end = m_start + ch.size; + xoff_t i_start = 0; + xoff_t i_end = 0; + + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start >= m_end) { + table->insert(table->end(), make_pair(i_start - ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + if (i_end > m_end) { + table->insert(table->end(), + make_pair(m_end - ch.size, + seg.Subseg(m_end - i_start, i_end - m_end))); + } + } + + CHECK_LT(m_start, i_end); + CHECK_LE(m_end, i_end); + } + + // A move is a copy followed by delete of the copied-from range. + static void MoveChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + SegmentMap tmp; + CHECK_NE(ch.addr1, ch.addr2); + CopyChange(ch, &tmp, source_table, rand); + Change d(Change::DELRANGE, ch.size, + ch.addr1 < ch.addr2 ? ch.addr1 : ch.addr1 + ch.size); + DeleteChange(d, table, &tmp, rand); + } + + // An overwrite is a copy followed by a delete of the copied-to range. + static void OverwriteChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) { + SegmentMap tmp; + CHECK_NE(ch.addr1, ch.addr2); + CopyChange(ch, &tmp, source_table, rand); + Change d(Change::DELRANGE, ch.size, ch.addr2 + ch.size); + DeleteChange(d, table, &tmp, rand); + } + + static void CopyChange(const Change &ch, + SegmentMap *table, + const SegmentMap *source_table, + MTRandom *ignore) { + xoff_t m_start = ch.addr2; + xoff_t c_start = ch.addr1; + xoff_t i_start = 0; + xoff_t i_end = 0; + + // Like AddChange() with AppendCopy instead of a random segment. + for (ConstSegmentMapIterator iter(source_table->begin()); + iter != source_table->end(); + ++iter) { + const Segment &seg = iter->second; + i_start = iter->first; + i_end = i_start + seg.Size(); + + if (i_end <= m_start) { + table->insert(table->end(), make_pair(i_start, seg)); + continue; + } + + if (i_start > m_start) { + table->insert(table->end(), make_pair(i_start + ch.size, seg)); + continue; + } + + if (i_start < m_start) { + table->insert(table->end(), + make_pair(i_start, + seg.Subseg(0, m_start - i_start))); + } + + AppendCopy(table, source_table, c_start, m_start, ch.size); + + if (m_start < i_end) { + table->insert(table->end(), + make_pair(m_start + ch.size, + seg.Subseg(m_start - i_start, i_end - m_start))); + } + } + + CHECK_LE(m_start, i_end); + + // Special case for copy to end-of-input. + if (m_start == i_end) { + AppendCopy(table, source_table, c_start, m_start, ch.size); + } + } + + static void AppendCopy(SegmentMap *table, + const SegmentMap *source_table, + xoff_t copy_offset, + xoff_t append_offset, + xoff_t length) { + ConstSegmentMapIterator pos(source_table->upper_bound(copy_offset)); + --pos; + xoff_t got = 0; + + while (got < length) { + size_t seg_offset = copy_offset - pos->first; + size_t advance = min(pos->second.Size() - seg_offset, + (size_t)(length - got)); + + table->insert(table->end(), + make_pair(append_offset, + pos->second.Subseg(seg_offset, + advance))); + + got += advance; + copy_offset += advance; + append_offset += advance; + ++pos; + } + } + + ChangeList* Changes() { + return &cl_; + } + + const ChangeList* Changes() const { + return &cl_; + } + +private: + ChangeList cl_; +}; + +class Modify1stByte : public Mutator { +public: + void Mutate(SegmentMap *table, + const SegmentMap *source_table, + MTRandom *rand) const { + ChangeListMutator::Mutate(Change(Change::MODIFY, 1, 0), + table, source_table, rand); + } +}; diff --git a/testing/random.h b/testing/random.h new file mode 100644 index 0000000..8ff64db --- /dev/null +++ b/testing/random.h @@ -0,0 +1,143 @@ +/* -*- Mode: C++ -*- */ +/* This is public-domain Mersenne Twister code, + * attributed to Michael Brundage. Thanks! + * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html + */ +#undef MT_LEN +#undef MT_IA +class MTRandom { + public: + enum Constants { + MT_LEN = 624, + MT_IA = 397 + }; + + static const uint32_t TEST_SEED1; + static const uint32_t UPPER_MASK; + static const uint32_t LOWER_MASK; + static const uint32_t MATRIX_A; + + MTRandom() { + Init(TEST_SEED1); + } + + explicit MTRandom(uint32_t seed) { + Init(seed); + } + + uint32_t Rand32 () { + uint32_t y; + static unsigned long mag01[2] = { + 0 , MATRIX_A + }; + + if (mt_index_ >= MT_LEN) { + int kk; + + for (kk = 0; kk < MT_LEN - MT_IA; kk++) { + y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK); + mt_buffer_[kk] = mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (;kk < MT_LEN - 1; kk++) { + y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK); + mt_buffer_[kk] = mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt_buffer_[0] & LOWER_MASK); + mt_buffer_[MT_LEN - 1] = mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mt_index_ = 0; + } + + y = mt_buffer_[mt_index_++]; + + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; + } + + uint32_t ExpRand32(uint32_t mean) { + double mean_d = mean; + double erand = log (1.0 / (Rand32() / (double)UINT32_MAX)); + uint32_t x = (uint32_t) (mean_d * erand + 0.5); + return x; + } + + uint64_t Rand64() { + return ((uint64_t)Rand32() << 32) | Rand32(); + } + + uint64_t ExpRand64(uint64_t mean) { + double mean_d = mean; + double erand = log (1.0 / (Rand64() / (double)UINT32_MAX)); + uint64_t x = (uint64_t) (mean_d * erand + 0.5); + return x; + } + + template <typename T> + T Rand() { + switch (sizeof(T)) { + case sizeof(uint32_t): + return Rand32(); + case sizeof(uint64_t): + return Rand64(); + default: + cerr << "Invalid sizeof T" << endl; + abort(); + } + } + + template <typename T> + T ExpRand(T mean) { + switch (sizeof(T)) { + case sizeof(uint32_t): + return ExpRand32(mean); + case sizeof(uint64_t): + return ExpRand64(mean); + default: + cerr << "Invalid sizeof T" << endl; + abort(); + } + } + + private: + void Init(uint32_t seed) { + mt_buffer_[0] = seed; + mt_index_ = MT_LEN; + for (int i = 1; i < MT_LEN; i++) { + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt_buffer_[i] = + (1812433253UL * (mt_buffer_[i-1] ^ (mt_buffer_[i-1] >> 30)) + i); + } + } + + int mt_index_; + uint32_t mt_buffer_[MT_LEN]; +}; + +const uint32_t MTRandom::TEST_SEED1 = 5489UL; +const uint32_t MTRandom::UPPER_MASK = 0x80000000; +const uint32_t MTRandom::LOWER_MASK = 0x7FFFFFFF; +const uint32_t MTRandom::MATRIX_A = 0x9908B0DF; + +class MTRandom8 { +public: + MTRandom8(MTRandom *rand) + : rand_(rand) { + } + + uint8_t Rand8() { + uint32_t r = rand_->Rand32(); + + // TODO: make this use a single byte at a time? + return (r & 0xff) ^ (r >> 7) ^ (r >> 15) ^ (r >> 21); + } + +private: + MTRandom *rand_; +}; diff --git a/testing/regtest.cc b/testing/regtest.cc new file mode 100644 index 0000000..dd2ab8d --- /dev/null +++ b/testing/regtest.cc @@ -0,0 +1,1306 @@ +/* -*- Mode: C++ -*- */ +#include "test.h" +#include "random.h" +#include "sizes.h" + +template <typename Constants> +class Regtest { +public: + typedef typename Constants::Sizes Sizes; + + struct Options { + Options() + : encode_srcwin_maxsz(1<<20), + block_size(Constants::BLOCK_SIZE), + window_size(Constants::WINDOW_SIZE), + size_known(false), + iopt_size(XD3_DEFAULT_IOPT_SIZE), + smatch_cfg(XD3_SMATCH_DEFAULT) { } + + xoff_t encode_srcwin_maxsz; + size_t block_size; + xoff_t window_size; + bool size_known; + usize_t iopt_size; + xd3_smatch_cfg smatch_cfg; + }; + +#include "segment.h" +#include "modify.h" +#include "file.h" +#include "cmp.h" +#include "delta.h" + + void InMemoryEncodeDecode(const FileSpec &source_file, + const FileSpec &target_file, + Block *coded_data, + const Options &options) { + xd3_stream encode_stream; + xd3_config encode_config; + xd3_source encode_source; + + xd3_stream decode_stream; + xd3_config decode_config; + xd3_source decode_source; + xoff_t verified_bytes = 0; + xoff_t encoded_bytes = 0; + + if (coded_data) { + coded_data->Reset(); + } + + memset(&encode_stream, 0, sizeof (encode_stream)); + memset(&encode_source, 0, sizeof (encode_source)); + + memset(&decode_stream, 0, sizeof (decode_stream)); + memset(&decode_source, 0, sizeof (decode_source)); + + xd3_init_config(&encode_config, XD3_ADLER32); + xd3_init_config(&decode_config, XD3_ADLER32); + + encode_config.winsize = options.window_size; + encode_config.iopt_size = options.iopt_size; + encode_config.smatch_cfg = options.smatch_cfg; + + CHECK_EQ(0, xd3_config_stream (&encode_stream, &encode_config)); + CHECK_EQ(0, xd3_config_stream (&decode_stream, &decode_config)); + + encode_source.blksize = options.block_size; + decode_source.blksize = options.block_size; + + encode_source.max_winsize = options.encode_srcwin_maxsz; + decode_source.max_winsize = options.encode_srcwin_maxsz; + + if (!options.size_known) + { + xd3_set_source (&encode_stream, &encode_source); + xd3_set_source (&decode_stream, &decode_source); + } + else + { + xd3_set_source_and_size (&encode_stream, &encode_source, + source_file.Size()); + xd3_set_source_and_size (&decode_stream, &decode_source, + source_file.Size()); + } + + BlockIterator source_iterator(source_file, options.block_size); + BlockIterator target_iterator(target_file, Constants::WINDOW_SIZE); + Block encode_source_block, decode_source_block; + Block decoded_block, target_block; + bool encoding = true; + bool done = false; + bool done_after_input = false; + + IF_DEBUG1 (XPR(NTR "source %" Q "u[%" Z "u] target %" Q "u winsize %" Z "u\n", + source_file.Size(), options.block_size, + target_file.Size(), + Constants::WINDOW_SIZE)); + + while (!done) { + target_iterator.Get(&target_block); + + xoff_t blks = target_iterator.Blocks(); + + IF_DEBUG2(XPR(NTR "target in %s: %" Q "u[%" Z "u] %" Q "u(%" Q "u) " + "verified %" Q "u\n", + encoding ? "encoding" : "decoding", + target_iterator.Offset(), + target_block.Size(), + target_iterator.Blkno(), + blks, + verified_bytes)); + + if (blks == 0 || target_iterator.Blkno() == (blks - 1)) { + xd3_set_flags(&encode_stream, XD3_FLUSH | encode_stream.flags); + } + + xd3_avail_input(&encode_stream, target_block.Data(), target_block.Size()); + encoded_bytes += target_block.Size(); + + process: + int ret; + const char *msg; + if (encoding) { + ret = xd3_encode_input(&encode_stream); + msg = encode_stream.msg; + } else { + ret = xd3_decode_input(&decode_stream); + msg = decode_stream.msg; + } + (void) msg; + + switch (ret) { + case XD3_OUTPUT: + if (encoding) { + if (coded_data != NULL) { + // Optional encoded-output to the caller + coded_data->Append(encode_stream.next_out, + encode_stream.avail_out); + } + // Feed this data to the decoder. + xd3_avail_input(&decode_stream, + encode_stream.next_out, + encode_stream.avail_out); + xd3_consume_output(&encode_stream); + encoding = false; + } else { + decoded_block.Append(decode_stream.next_out, + decode_stream.avail_out); + xd3_consume_output(&decode_stream); + } + goto process; + + case XD3_GETSRCBLK: { + xd3_source *src = (encoding ? &encode_source : &decode_source); + Block *block = (encoding ? &encode_source_block : &decode_source_block); + if (encoding) { + IF_DEBUG2(XPR(NTR "[srcblock] %" Q "u last srcpos %" Q "u " + "encodepos %" Q "u\n", + encode_source.getblkno, + encode_stream.match_last_srcpos, + encode_stream.input_position + encode_stream.total_in)); + } + + source_iterator.SetBlock(src->getblkno); + source_iterator.Get(block); + src->curblkno = src->getblkno; + src->onblk = block->Size(); + src->curblk = block->Data(); + + goto process; + } + + case XD3_INPUT: + if (!encoding) { + encoding = true; + goto process; + } else { + if (done_after_input) { + done = true; + continue; + } + + if (target_block.Size() < target_iterator.BlockSize()) { + encoding = false; + } else { + target_iterator.Next(); + } + continue; + } + + case XD3_WINFINISH: + if (encoding) { + if (encode_stream.flags & XD3_FLUSH) { + done_after_input = true; + } + encoding = false; + } else { + CHECK_EQ(0, CmpDifferentBlockBytesAtOffset(decoded_block, + target_file, + verified_bytes)); + verified_bytes += decoded_block.Size(); + decoded_block.Reset(); + encoding = true; + } + goto process; + + case XD3_WINSTART: + case XD3_GOTHEADER: + goto process; + + default: + XPR(NTR "%s = %s %s\n", encoding ? "E " : " D", + xd3_strerror(ret), + msg == NULL ? "" : msg); + + CHECK_EQ(0, ret); + CHECK_EQ(-1, ret); + } + } + + CHECK_EQ(target_file.Size(), encoded_bytes); + CHECK_EQ(target_file.Size(), verified_bytes); + CHECK_EQ(0, xd3_close_stream(&decode_stream)); + CHECK_EQ(0, xd3_close_stream(&encode_stream)); + xd3_free_stream(&encode_stream); + xd3_free_stream(&decode_stream); + } + + void MainEncodeDecode(const TmpFile &source_file, + const TmpFile &target_file, + ExtFile *coded_data, + const Options &options) { + vector<const char*> ecmd; + char bbuf[16]; + snprintf(bbuf, sizeof(bbuf), "-B%" Q "u", options.encode_srcwin_maxsz); + ecmd.push_back("xdelta3"); + ecmd.push_back(bbuf); + ecmd.push_back("-s"); + ecmd.push_back(source_file.Name()); + ecmd.push_back(target_file.Name()); + ecmd.push_back(coded_data->Name()); + ecmd.push_back(NULL); + + CHECK_EQ(0, xd3_main_cmdline(ecmd.size() - 1, + const_cast<char**>(&ecmd[0]))); + + vector<const char*> dcmd; + ExtFile recon_file; + dcmd.push_back("xdelta3"); + ecmd.push_back(bbuf); + dcmd.push_back("-d"); + dcmd.push_back("-s"); + dcmd.push_back(source_file.Name()); + dcmd.push_back(coded_data->Name()); + dcmd.push_back(recon_file.Name()); + dcmd.push_back(NULL); + + CHECK_EQ(0, xd3_main_cmdline(dcmd.size() - 1, + const_cast<char**>(&dcmd[0]))); + + CHECK_EQ(0, test_compare_files(recon_file.Name(), + target_file.Name())); + } + + // Similar to xd3_process_memory, with support for test Options. + // Exercises xd3_process_stream. + int TestProcessMemory (int is_encode, + int (*func) (xd3_stream *), + const uint8_t *input, + usize_t input_size, + const uint8_t *source, + usize_t source_size, + uint8_t *output, + usize_t *output_size, + usize_t output_size_max, + const Options &options) { + xd3_stream stream; + xd3_config config; + xd3_source src; + int ret; + + memset (& stream, 0, sizeof (stream)); + memset (& config, 0, sizeof (config)); + + if (is_encode) + { + config.winsize = input_size; + config.iopt_size = options.iopt_size; + config.sprevsz = xd3_pow2_roundup (config.winsize); + } + + if ((ret = xd3_config_stream (&stream, &config)) != 0) + { + goto exit; + } + + if (source != NULL) + { + memset (& src, 0, sizeof (src)); + + src.blksize = source_size; + src.onblk = source_size; + src.curblk = source; + src.curblkno = 0; + src.max_winsize = source_size; + + if ((ret = xd3_set_source_and_size (&stream, &src, source_size)) != 0) + { + goto exit; + } + } + + if ((ret = xd3_process_stream (is_encode, + & stream, + func, 1, + input, input_size, + output, + output_size, + output_size_max)) != 0) + { + goto exit; + } + + exit: + if (ret != 0) + { + IF_DEBUG2 (DP(RINT "test_process_memory: %d: %s\n", ret, stream.msg)); + } + xd3_free_stream(&stream); + return ret; + } + + void EncodeDecodeAPI(const FileSpec &spec0, const FileSpec &spec1, + Block *delta, const Options &options) { + Block from; + Block to; + spec0.Get(&from, 0, spec0.Size()); + spec1.Get(&to, 0, spec1.Size()); + + delta->SetSize(to.Size() * 1.5); + usize_t out_size; + int enc_ret = TestProcessMemory(true, + &xd3_encode_input, + to.Data(), + to.Size(), + from.Data(), + from.Size(), + delta->Data(), + &out_size, + delta->Size(), + options); + CHECK_EQ(0, enc_ret); + delta->SetSize(out_size); + + Block recon; + recon.SetSize(to.Size()); + usize_t recon_size; + int dec_ret = xd3_decode_memory(delta->Data(), + delta->Size(), + from.Data(), + from.Size(), + recon.Data(), + &recon_size, + recon.Size(), + 0); + CHECK_EQ(0, dec_ret); + CHECK_EQ(0, CmpDifferentBlockBytes(to, recon)); + } + +////////////////////////////////////////////////////////////////////// + +void TestPrintf() { + char buf[64]; + xoff_t x = XOFF_T_MAX; + snprintf_func (buf, sizeof(buf), "%" Q "u", x); + const char *expect = XD3_USE_LARGEFILE64 ? + "18446744073709551615" : "4294967295"; + XD3_ASSERT(strcmp (buf, expect) == 0); +} + +void TestRandomNumbers() { + MTRandom rand; + int rounds = 1<<20; + uint64_t usum = 0; + uint64_t esum = 0; + + for (int i = 0; i < rounds; i++) { + usum += rand.Rand32(); + esum += rand.ExpRand32(1024); + } + + double allowed_error = 0.01; + + uint32_t umean = usum / rounds; + uint32_t emean = esum / rounds; + + uint32_t uexpect = UINT32_MAX / 2; + uint32_t eexpect = 1024; + + if (umean < uexpect * (1.0 - allowed_error) || + umean > uexpect * (1.0 + allowed_error)) { + XPR(NT "uniform mean error: %u != %u\n", umean, uexpect); + abort(); + } + + if (emean < eexpect * (1.0 - allowed_error) || + emean > eexpect * (1.0 + allowed_error)) { + XPR(NT "exponential mean error: %u != %u\n", emean, eexpect); + abort(); + } +} + +void TestRandomFile() { + MTRandom rand1; + FileSpec spec1(&rand1); + BlockIterator bi(spec1); + + spec1.GenerateFixedSize(0); + CHECK_EQ(0, spec1.Size()); + CHECK_EQ(0, spec1.Segments()); + CHECK_EQ(0, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(0, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(1); + CHECK_EQ(1, spec1.Size()); + CHECK_EQ(1, spec1.Segments()); + CHECK_EQ(1, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(1, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(Constants::BLOCK_SIZE); + CHECK_EQ(Constants::BLOCK_SIZE, spec1.Size()); + CHECK_EQ(1, spec1.Segments()); + CHECK_EQ(1, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); + bi.SetBlock(1); + CHECK_EQ(0, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(Constants::BLOCK_SIZE + 1); + CHECK_EQ(Constants::BLOCK_SIZE + 1, spec1.Size()); + CHECK_EQ(2, spec1.Segments()); + CHECK_EQ(2, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); + bi.SetBlock(1); + CHECK_EQ(1, bi.BytesOnBlock()); + + spec1.GenerateFixedSize(Constants::BLOCK_SIZE * 2); + CHECK_EQ(Constants::BLOCK_SIZE * 2, spec1.Size()); + CHECK_EQ(2, spec1.Segments()); + CHECK_EQ(2, spec1.Blocks()); + bi.SetBlock(0); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); + bi.SetBlock(1); + CHECK_EQ(Constants::BLOCK_SIZE, bi.BytesOnBlock()); +} + +void TestFirstByte() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(0); + spec1.GenerateFixedSize(1); + CHECK_EQ(0, CmpDifferentBytes(spec0, spec0)); + CHECK_EQ(0, CmpDifferentBytes(spec1, spec1)); + CHECK_EQ(1, CmpDifferentBytes(spec0, spec1)); + CHECK_EQ(1, CmpDifferentBytes(spec1, spec0)); + + spec0.GenerateFixedSize(1); + spec0.ModifyTo(Modify1stByte(), &spec1); + CHECK_EQ(1, CmpDifferentBytes(spec0, spec1)); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE + 1); + spec0.ModifyTo(Modify1stByte(), &spec1); + CHECK_EQ(1, CmpDifferentBytes(spec0, spec1)); + + SizeIterator<size_t, Sizes> si(&rand, Constants::TEST_ROUNDS); + + for (; !si.Done(); si.Next()) { + size_t size = si.Get(); + if (size == 0) { + continue; + } + spec0.GenerateFixedSize(size); + spec0.ModifyTo(Modify1stByte(), &spec1); + InMemoryEncodeDecode(spec0, spec1, NULL, Options()); + } +} + +void TestModifyMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + struct { + size_t size; + size_t addr; + } test_cases[] = { + { Constants::BLOCK_SIZE, 0 }, + { Constants::BLOCK_SIZE / 2, 1 }, + { Constants::BLOCK_SIZE, 1 }, + { Constants::BLOCK_SIZE * 2, 1 }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::MODIFY, test_cases[i].size, + test_cases[i].addr)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + size_t diff = CmpDifferentBytes(spec0, spec1); + CHECK_LE(diff, test_cases[i].size); + + // There is a 1/256 probability of the changed byte matching the + // original value. The following allows double the probability to + // pass. + CHECK_GE(diff, test_cases[i].size - (2 * test_cases[i].size / 256)); + + InMemoryEncodeDecode(spec0, spec1, NULL, Options()); + } +} + +void TestAddMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 2); + // TODO: fix this test (for all block sizes)! it's broken because + // the same byte could be added? + + struct { + size_t size; + size_t addr; + size_t expected_adds; + } test_cases[] = { + { 1, 0, 2 /* 1st byte, last byte (short block) */ }, + { 1, 1, 3 /* 1st 2 bytes, last byte */ }, + { 1, Constants::BLOCK_SIZE - 1, 2 /* changed, last */ }, + { 1, Constants::BLOCK_SIZE, 2 /* changed, last */ }, + { 1, Constants::BLOCK_SIZE + 1, 3 /* changed + 1st of 2nd block, last */ }, + { 1, 2 * Constants::BLOCK_SIZE, 1 /* last byte */ }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::ADD, test_cases[i].size, test_cases[i].addr)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(test_cases[i].expected_adds, + delta.AddedBytes()); + } +} + +void TestDeleteMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 4); + + struct { + size_t size; + size_t addr; + } test_cases[] = { + // Note: an entry { Constants::BLOCK_SIZE, 0 }, + // does not work because the xd3_srcwin_move_point logic won't + // find a copy if it occurs >= double its size into the file. + { Constants::BLOCK_SIZE / 2, 0 }, + { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2 }, + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE / 2 }, + { Constants::BLOCK_SIZE * 2, Constants::BLOCK_SIZE * 3 / 2 }, + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2 }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::DELRANGE, test_cases[i].size, + test_cases[i].addr)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size() - test_cases[i].size, spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(0, delta.AddedBytes()); + } +} + +void TestCopyMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + struct { + size_t size; + size_t from; + size_t to; + } test_cases[] = { + // Copy is difficult to write tests for because where Xdelta finds + // copies, it does not enter checksums. So these tests copy data from + // later to earlier so that checksumming will start. + { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 }, + { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE, }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::COPY, test_cases[i].size, + test_cases[i].from, test_cases[i].to)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size() + test_cases[i].size, spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(0, delta.AddedBytes()); + } +} + +void TestMoveMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + struct { + size_t size; + size_t from; + size_t to; + } test_cases[] = { + // This is easier to test than Copy but has the same trouble as Delete. + { Constants::BLOCK_SIZE / 2, Constants::BLOCK_SIZE / 2, 0 }, + { Constants::BLOCK_SIZE / 2, 0, Constants::BLOCK_SIZE / 2 }, + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE, 2 * + Constants::BLOCK_SIZE }, + { Constants::BLOCK_SIZE, 2 * Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE }, + { Constants::BLOCK_SIZE * 3 / 2, Constants::BLOCK_SIZE, + Constants::BLOCK_SIZE * 3 / 2 }, + + // This is a no-op + { Constants::BLOCK_SIZE, Constants::BLOCK_SIZE * 2, + 3 * Constants::BLOCK_SIZE }, + }; + + for (size_t i = 0; i < SIZEOF_ARRAY(test_cases); i++) { + ChangeList cl1; + cl1.push_back(Change(Change::MOVE, test_cases[i].size, + test_cases[i].from, test_cases[i].to)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + Block coded; + InMemoryEncodeDecode(spec0, spec1, &coded, Options()); + + Delta delta(coded); + CHECK_EQ(0, delta.AddedBytes()); + } +} + +void TestOverwriteMutator() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE); + + ChangeList cl1; + cl1.push_back(Change(Change::COPYOVER, 10, 0, 20)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + Block b0, b1; + BlockIterator(spec0).Get(&b0); + BlockIterator(spec1).Get(&b1); + + CHECK(memcmp(b0.Data(), b1.Data() + 20, 10) == 0); + CHECK(memcmp(b0.Data(), b1.Data(), 20) == 0); + CHECK(memcmp(b0.Data() + 30, b1.Data() + 30, + Constants::BLOCK_SIZE - 30) == 0); + + xoff_t zero = 0; + cl1.clear(); + cl1.push_back(Change(Change::COPYOVER, 10, 20, zero)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + CHECK_EQ(spec0.Size(), spec1.Size()); + + BlockIterator(spec0).Get(&b0); + BlockIterator(spec1).Get(&b1); + + CHECK(memcmp(b0.Data() + 20, b1.Data(), 10) == 0); + CHECK(memcmp(b0.Data() + 10, b1.Data() + 10, + Constants::BLOCK_SIZE - 10) == 0); +} + +// Note: this test is written to expose a problem, but the problem was +// only exposed with BLOCK_SIZE = 128. +void TestNonBlocking() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + FileSpec spec2(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 3); + + // This is a lazy target match + Change ct(Change::COPYOVER, 22, + Constants::BLOCK_SIZE + 50, + Constants::BLOCK_SIZE + 20); + + // This is a source match just after the block boundary, shorter + // than the lazy target match. + Change cs1(Change::COPYOVER, 16, + Constants::BLOCK_SIZE + 51, + Constants::BLOCK_SIZE - 1); + + // This overwrites the original source bytes. + Change cs2(Change::MODIFY, 108, + Constants::BLOCK_SIZE + 20); + + // This changes the first blocks + Change c1st(Change::MODIFY, Constants::BLOCK_SIZE - 2, 0); + + ChangeList csl; + csl.push_back(cs1); + csl.push_back(cs2); + csl.push_back(c1st); + + spec0.ModifyTo(ChangeListMutator(csl), &spec1); + + ChangeList ctl; + ctl.push_back(ct); + ctl.push_back(c1st); + + spec0.ModifyTo(ChangeListMutator(ctl), &spec2); + + InMemoryEncodeDecode(spec1, spec2, NULL, Options()); +} + +void TestEmptyInMemory() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + Block block; + + spec0.GenerateFixedSize(0); + spec1.GenerateFixedSize(0); + + InMemoryEncodeDecode(spec0, spec1, &block, Options()); + + Delta delta(block); + CHECK_LT(0, block.Size()); + CHECK_EQ(1, delta.Windows()); +} + +void TestBlockInMemory() { + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + Block block; + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE); + spec1.GenerateFixedSize(Constants::BLOCK_SIZE); + + InMemoryEncodeDecode(spec0, spec1, &block, Options()); + + Delta delta(block); + CHECK_EQ(spec1.Blocks(Constants::WINDOW_SIZE), delta.Windows()); +} + +void TestSmallStride() { + MTRandom rand; + FileSpec spec0(&rand); + usize_t size = Constants::BLOCK_SIZE * 4; + spec0.GenerateFixedSize(size); + + // Note: Not very good performance due to hash collisions, note 3x + // multiplier below. + for (int s = 15; s < 101; s++) { + usize_t changes = 0; + ChangeList cl; + for (usize_t j = s; j < size; j += s, ++changes) + { + cl.push_back(Change(Change::MODIFY, 1, j)); + } + + FileSpec spec1(&rand); + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Options options; + options.encode_srcwin_maxsz = size; + options.iopt_size = 128; + options.smatch_cfg = XD3_SMATCH_SLOW; + options.size_known = false; + + Block block; + InMemoryEncodeDecode(spec0, spec1, &block, options); + Delta delta(block); + + IF_DEBUG1(DP(RINT "[stride=%d] changes=%" W "u adds=%" Q "u\n", + s, changes, delta.AddedBytes())); + double allowance = Constants::BLOCK_SIZE < 8192 || s < 30 ? 3.0 : 1.1; + CHECK_GE(allowance * changes, (double)delta.AddedBytes()); + } +} + +void TestCopyWindow() { + // Construct an input that has many copies, to fill the IOPT buffer + // and force a source window decision. "srclen" may be set to a + // value that goes beyond the end-of-source. + const int clen = 16; + const int size = 4096; + const int nmov = size / clen; + const int iters = 16; + uint32_t added_01 = 0; + uint32_t added_10 = 0; + for (int i = 1; i <= iters; i++) { + MTRandom rand(MTRandom::TEST_SEED1 * i); + FileSpec spec0(&rand); + ChangeList cl; + + spec0.GenerateFixedSize(size); + + for (int j = 0; j < nmov; j += 2) + { + cl.push_back(Change(Change::MOVE, + clen, (j + 1) * clen, j * clen)); + } + + FileSpec spec1(&rand); + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Options options; + options.encode_srcwin_maxsz = size; + options.iopt_size = 128; + options.smatch_cfg = XD3_SMATCH_SLOW; + + Block block1; + InMemoryEncodeDecode(spec0, spec1, &block1, options); + Delta delta1(block1); + // Allow one missed window (e.g., hash collisions) + added_01 += delta1.AddedBytes(); + + Block block2; + InMemoryEncodeDecode(spec1, spec0, &block2, options); + Delta delta2(block2); + // Allow one missed window (e.g., hash collisions) + added_10 += delta2.AddedBytes(); + + Block block3; + Block block4; + EncodeDecodeAPI(spec0, spec1, &block3, options); + EncodeDecodeAPI(spec1, spec0, &block4, options); + } + // Average less than 0.5 misses (of length clen) per iteration. + CHECK_GE(clen * iters / 2, added_01); + CHECK_GE(clen * iters / 2, added_10); +} + +void TestCopyFromEnd() { + // Copies from the end of the source buffer, which reach a block + // boundary end-of-file. + const int size = 4096; + const int clen = 16; + const int nmov = (size / 2) / clen; + const int iters = 16; + uint32_t added_01 = 0; + uint32_t added_10 = 0; + for (int i = 1; i <= iters; i++) { + MTRandom rand(MTRandom::TEST_SEED1 * i); + FileSpec spec0(&rand); + ChangeList cl; + + spec0.GenerateFixedSize(size); + + cl.push_back(Change(Change::MODIFY, 2012, 2048)); + + for (int j = 0; j < nmov; j += 2) + { + cl.push_back(Change(Change::MOVE, + clen, (j + 1) * clen, j * clen)); + } + + cl.push_back(Change(Change::COPYOVER, 28, 4068, 3000)); + cl.push_back(Change(Change::COPYOVER, 30, 4066, 3100)); + cl.push_back(Change(Change::COPYOVER, 32, 4064, 3200)); + + FileSpec spec1(&rand); + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Options options; + options.encode_srcwin_maxsz = size; + options.iopt_size = 128; + options.smatch_cfg = XD3_SMATCH_SLOW; + + Block block1; + InMemoryEncodeDecode(spec0, spec1, &block1, options); + Delta delta1(block1); + added_01 += delta1.AddedBytes(); + + Block block2; + InMemoryEncodeDecode(spec1, spec0, &block2, options); + Delta delta2(block2); + added_10 += delta2.AddedBytes(); + + Block block3; + Block block4; + EncodeDecodeAPI(spec0, spec1, &block3, options); + EncodeDecodeAPI(spec1, spec0, &block4, options); + } + CHECK_GE(2000 * iters, added_01); + CHECK_LE(2000 * iters, added_10); +} + +void TestHalfBlockCopy() { + // Create a half-block copy, 7.5 blocks apart, in a pair of files: + // 0 1 ... 6 7 + // spec0 [bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb][ccccc][bbbb_] + // spec1 [aaaaa][ccccc][aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_] + // where stage= + // 0: the final block is full + // a. (source)spec1->(target)spec0 copies block C: reads 8 source + // blocks during target block 0. + // b. (source)spec0->(target)spec1 does not copy block C b/c attempt + // to read past EOF empties block 0 from (virtual) block cache + // 1: the final block is less than full. + // a. (same) copies block C + // b. (same) copies block C, unlike 0a, no attempt to read past EOF + // + // "virtual" above refers to XD3_TOOFARBACK, since there is no caching + // in the API, there is simply a promise not to request blocks that are + // beyond source->max_winsize from the last known source file position. + for (int stage = 0; stage < 2; stage++) + { + IF_DEBUG1 (DP(RINT "half_block_copy stage %d\n", stage)); + + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + + spec0.GenerateFixedSize(Constants::BLOCK_SIZE * 8 - stage); + + ChangeList cl1; + cl1.push_back(Change(Change::MODIFY, + Constants::BLOCK_SIZE / 2, // size + 0)); + cl1.push_back(Change(Change::COPYOVER, + Constants::BLOCK_SIZE / 2, // size + Constants::BLOCK_SIZE * 7, // offset + Constants::BLOCK_SIZE / 2)); + cl1.push_back(Change(Change::MODIFY, + Constants::BLOCK_SIZE * 7, + Constants::BLOCK_SIZE - stage)); + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + + Options options; + options.encode_srcwin_maxsz = Constants::BLOCK_SIZE * 8; + + Block block0; + Block block1; + InMemoryEncodeDecode(spec0, spec1, &block0, options); + InMemoryEncodeDecode(spec1, spec0, &block1, options); + + Delta delta0(block0); + Delta delta1(block1); + + const int yes = + Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2; + const int no = + Constants::BLOCK_SIZE * 8 - Constants::BLOCK_SIZE / 2; + + if (stage == 0) + { + CHECK_EQ(yes, delta0.AddedBytes()); + CHECK_EQ(no, delta1.AddedBytes()); + } + else + { + CHECK_EQ(yes, delta0.AddedBytes()); + CHECK_EQ(yes, delta1.AddedBytes()); + } + } +} + +void FourWayMergeTest(const FileSpec &spec0, + const FileSpec &spec1, + const FileSpec &spec2, + const FileSpec &spec3) { + TmpFile f0, f1, f2, f3; + ExtFile d01, d12, d23; + Options options; + options.encode_srcwin_maxsz = + std::max(spec0.Size(), options.encode_srcwin_maxsz); + + spec0.WriteTmpFile(&f0); + spec1.WriteTmpFile(&f1); + spec2.WriteTmpFile(&f2); + spec3.WriteTmpFile(&f3); + + MainEncodeDecode(f0, f1, &d01, options); + MainEncodeDecode(f1, f2, &d12, options); + MainEncodeDecode(f2, f3, &d23, options); + + // Merge 2 + ExtFile out; + vector<const char*> mcmd; + mcmd.push_back("xdelta3"); + mcmd.push_back("merge"); + mcmd.push_back("-m"); + mcmd.push_back(d01.Name()); + mcmd.push_back(d12.Name()); + mcmd.push_back(out.Name()); + mcmd.push_back(NULL); + + // XPR(NTR "Running one merge: %s\n", CommandToString(mcmd).c_str()); + CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1, + const_cast<char**>(&mcmd[0]))); + + ExtFile recon; + vector<const char*> tcmd; + tcmd.push_back("xdelta3"); + tcmd.push_back("-d"); + tcmd.push_back("-s"); + tcmd.push_back(f0.Name()); + tcmd.push_back(out.Name()); + tcmd.push_back(recon.Name()); + tcmd.push_back(NULL); + + // XPR(NTR "Running one recon! %s\n", CommandToString(tcmd).c_str()); + CHECK_EQ(0, xd3_main_cmdline(tcmd.size() - 1, + const_cast<char**>(&tcmd[0]))); + // XPR(NTR "Should equal! %s\n", f2.Name()); + + CHECK(recon.EqualsSpec(spec2)); + + // Merge 3 + ExtFile out3; + vector<const char*> mcmd3; + mcmd3.push_back("xdelta3"); + mcmd3.push_back("merge"); + mcmd3.push_back("-m"); + mcmd3.push_back(d01.Name()); + mcmd3.push_back("-m"); + mcmd3.push_back(d12.Name()); + mcmd3.push_back(d23.Name()); + mcmd3.push_back(out3.Name()); + mcmd3.push_back(NULL); + + // XPR(NTR "Running one 3-merge: %s\n", CommandToString(mcmd3).c_str()); + CHECK_EQ(0, xd3_main_cmdline(mcmd3.size() - 1, + const_cast<char**>(&mcmd3[0]))); + + ExtFile recon3; + vector<const char*> tcmd3; + tcmd3.push_back("xdelta3"); + tcmd3.push_back("-d"); + tcmd3.push_back("-s"); + tcmd3.push_back(f0.Name()); + tcmd3.push_back(out3.Name()); + tcmd3.push_back(recon3.Name()); + tcmd3.push_back(NULL); + + // XPR(NTR "Running one 3-recon %s\n", CommandToString(tcmd3).c_str()); + CHECK_EQ(0, xd3_main_cmdline(tcmd3.size() - 1, + const_cast<char**>(&tcmd3[0]))); + // XPR(NTR "Should equal %s\n", f3.Name()); + + CHECK(recon3.EqualsSpec(spec3)); +} + +void TestMergeCommand1() { + /* Repeat random-input testing for a number of iterations. + * Test 2, 3, and 4-file scenarios (i.e., 1, 2, and 3-delta merges). */ + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + FileSpec spec2(&rand); + FileSpec spec3(&rand); + + SizeIterator<size_t, Sizes> si0(&rand, 10); + + for (; !si0.Done(); si0.Next()) { + size_t size0 = si0.Get(); + + SizeIterator<size_t, Sizes> si1(&rand, 10); + for (; !si1.Done(); si1.Next()) { + size_t change1 = si1.Get(); + + if (change1 == 0) { + continue; + } + + // XPR(NTR "S0 = %lu\n", size0); + // XPR(NTR "C1 = %lu\n", change1); + // XPR(NTR "."); + + size_t add1_pos = size0 ? rand.Rand32() % size0 : 0; + size_t del2_pos = size0 ? rand.Rand32() % size0 : 0; + + spec0.GenerateFixedSize(size0); + + ChangeList cl1, cl2, cl3; + + size_t change3 = change1; + size_t change3_pos; + + if (change3 >= size0) { + change3 = size0; + change3_pos = 0; + } else { + change3_pos = rand.Rand32() % (size0 - change3); + } + + cl1.push_back(Change(Change::ADD, change1, add1_pos)); + cl2.push_back(Change(Change::DELRANGE, change1, del2_pos)); + cl3.push_back(Change(Change::MODIFY, change3, change3_pos)); + + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + spec1.ModifyTo(ChangeListMutator(cl2), &spec2); + spec2.ModifyTo(ChangeListMutator(cl3), &spec3); + + FourWayMergeTest(spec0, spec1, spec2, spec3); + FourWayMergeTest(spec3, spec2, spec1, spec0); + } + } +} + +void TestMergeCommand2() { + /* Same as above, different mutation pattern. */ + /* TODO: run this with large sizes too */ + /* TODO: run this with small sizes too */ + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + FileSpec spec2(&rand); + FileSpec spec3(&rand); + + SizeIterator<size_t, Sizes> si0(&rand, 10); + for (; !si0.Done(); si0.Next()) { + size_t size0 = si0.Get(); + + SizeIterator<size_t, Sizes> si1(&rand, 10); + for (; !si1.Done(); si1.Next()) { + size_t size1 = si1.Get(); + + SizeIterator<size_t, Sizes> si2(&rand, 10); + for (; !si2.Done(); si2.Next()) { + size_t size2 = si2.Get(); + + SizeIterator<size_t, Sizes> si3(&rand, 10); + for (; !si3.Done(); si3.Next()) { + size_t size3 = si3.Get(); + + // We're only interested in three sizes, strictly decreasing. */ + if (size3 >= size2 || size2 >= size1 || size1 >= size0) { + continue; + } + + // XPR(NTR "S0 = %lu\n", size0); + // XPR(NTR "S1 = %lu\n", size1); + // XPR(NTR "S2 = %lu\n", size2); + // XPR(NTR "S3 = %lu\n", size3); + // XPR(NTR "."); + + spec0.GenerateFixedSize(size0); + + ChangeList cl1, cl2, cl3; + + cl1.push_back(Change(Change::DELRANGE, size0 - size1, 0)); + cl2.push_back(Change(Change::DELRANGE, size0 - size2, 0)); + cl3.push_back(Change(Change::DELRANGE, size0 - size3, 0)); + + spec0.ModifyTo(ChangeListMutator(cl1), &spec1); + spec0.ModifyTo(ChangeListMutator(cl2), &spec2); + spec0.ModifyTo(ChangeListMutator(cl3), &spec3); + + FourWayMergeTest(spec0, spec1, spec2, spec3); + FourWayMergeTest(spec3, spec2, spec1, spec0); + } + } + } + } +} + +void TestLastFrontierBlock() { + // This test constructs an input that can expose + // https://github.com/jmacd/xdelta/issues/188 + // when run through the command-line with source via a FIFO. + // That is not tested here, but the test stays. + if (Constants::WINDOW_SIZE < XD3_ALLOCSIZE) + { + return; + } + + MTRandom rand; + FileSpec spec0(&rand); + FileSpec spec1(&rand); + const xoff_t size = XD3_ALLOCSIZE * 64; // == XD3_MINSRCWINSZ * 2 + const xoff_t edit = XD3_ALLOCSIZE; + + Options options; + options.encode_srcwin_maxsz = XD3_MINSRCWINSZ; + options.block_size = XD3_ALLOCSIZE; + options.window_size = XD3_MINSRCWINSZ; + options.size_known = false; + + spec0.GenerateFixedSize(size); + + ChangeList cl; + + // Modify the 0th byte in order to induce indexing of subsequent + // bytes, but allow copying most of the file to keep the test fast. + cl.push_back(Change(Change::MODIFY, 1, edit * 31)); + cl.push_back(Change(Change::COPYOVER, edit, edit * 31, edit * 63)); + + spec0.ModifyTo(ChangeListMutator(cl), &spec1); + + Block noblock; + InMemoryEncodeDecode(spec0, spec1, &noblock, options); + InMemoryEncodeDecode(spec1, spec0, &noblock, options); +} + +}; // class Regtest<Constants> + +#define TEST(x) XPR(NTR #x "...\n"); regtest.x() + +// These tests are primarily tests of the testing framework itself. +template <class T> +void UnitTest() { + Regtest<T> regtest; + TEST(TestPrintf); + TEST(TestRandomNumbers); + TEST(TestRandomFile); + TEST(TestFirstByte); + TEST(TestModifyMutator); + TEST(TestAddMutator); + TEST(TestDeleteMutator); + TEST(TestCopyMutator); + TEST(TestMoveMutator); + TEST(TestOverwriteMutator); +} + +// These are Xdelta tests. +template <class T> +void MainTest() { + XPR(NT "Blocksize %" Q "u windowsize %" Z "u\n", + T::BLOCK_SIZE, T::WINDOW_SIZE); + Regtest<T> regtest; + TEST(TestEmptyInMemory); + TEST(TestBlockInMemory); + TEST(TestSmallStride); + TEST(TestCopyWindow); + TEST(TestCopyFromEnd); + TEST(TestNonBlocking); + TEST(TestHalfBlockCopy); + TEST(TestLastFrontierBlock); + TEST(TestMergeCommand1); + TEST(TestMergeCommand2); +} + +#undef TEST + +int main(int argc, char **argv) +{ + vector<const char*> mcmd; + string pn; + const char *sp = strrchr(argv[0], '/'); + if (sp != NULL) { + pn.append(argv[0], sp - argv[0] + 1); + } + pn.append("xdelta3"); + mcmd.push_back(pn.c_str()); + mcmd.push_back("test"); + mcmd.push_back(NULL); + + UnitTest<SmallBlock>(); + MainTest<SmallBlock>(); + MainTest<MixedBlock>(); + MainTest<OversizeBlock>(); + MainTest<LargeBlock>(); + + CHECK_EQ(0, xd3_main_cmdline(mcmd.size() - 1, + const_cast<char**>(&mcmd[0]))); + + return 0; +} + diff --git a/testing/regtest_c.c b/testing/regtest_c.c new file mode 100644 index 0000000..e2845ee --- /dev/null +++ b/testing/regtest_c.c @@ -0,0 +1,2 @@ +/* -*- Mode: C++ -*- */ +#include "../xdelta3.c" diff --git a/testing/run_release.sh b/testing/run_release.sh new file mode 100755 index 0000000..85ed1f7 --- /dev/null +++ b/testing/run_release.sh @@ -0,0 +1,2 @@ +#!/bin/sh +(cd .. && ./run_release.sh) diff --git a/testing/segment.h b/testing/segment.h new file mode 100644 index 0000000..ea3dcee --- /dev/null +++ b/testing/segment.h @@ -0,0 +1,98 @@ +// -*- Mode: C++ -*- + +class Segment { + public: + Segment(size_t size, MTRandom *rand) + : size_(size), + seed_(rand->Rand32()), + seed_offset_(0), + data_(NULL) { + CHECK_GT(size_, 0); + } + + Segment(size_t size, uint32_t seed) + : size_(size), + seed_(seed), + seed_offset_(0), + data_(NULL) { + CHECK_GT(size_, 0); + } + + Segment(size_t size, uint8_t *data) + : size_(size), + seed_(0), + seed_offset_(0), + data_(data) { + CHECK_GT(size_, 0); + } + + size_t Size() const { + return size_; + } + + Segment Subseg(size_t start, size_t size) const { + CHECK_LE(start + size, size_); + if (data_) { + return Segment(size, data_ + start); + } else { + return Segment(size, seed_, seed_offset_ + start); + } + } + + void Fill(size_t seg_offset, size_t size, uint8_t *data) const { + CHECK_LE(seg_offset + size, size_); + if (data_) { + memcpy(data, data_ + seg_offset, size); + } else { + size_t skip = seg_offset + seed_offset_; + MTRandom gen(seed_); + MTRandom8 gen8(&gen); + while (skip--) { + gen8.Rand8(); + } + for (size_t i = 0; i < size; i++) { + data[i] = gen8.Rand8(); + } + } + } + + string ToString() const { + string r; + if (data_) { + for (size_t i = 0; i < size_; i++) { + char buf[10]; + sprintf(buf, "%02x ", data_[i]); + r.append(buf); + } + } else { + char buf[256]; + sprintf(buf, "size=%ld,seed=%ud,skip=%ld", size_, seed_, seed_offset_); + r.append(buf); + } + return r; + } + +private: + // Used by Subseg() + Segment(size_t size, uint32_t seed, size_t seed_offset) + : size_(size), + seed_(seed), + seed_offset_(seed_offset), + data_(NULL) { + CHECK_GT(size_, 0); + } + + size_t size_; // Size of this segment + + // For random segments + uint32_t seed_; // Seed used for generating byte sequence + size_t seed_offset_; // Seed positions the sequence this many bytes + // before its beginning. + + // For literal segments (data is not owned) + uint8_t *data_; +}; + +typedef map<xoff_t, Segment> SegmentMap; +typedef typename SegmentMap::const_iterator ConstSegmentMapIterator; +typedef typename SegmentMap::iterator SegmentMapIterator; diff --git a/testing/sizes.h b/testing/sizes.h new file mode 100644 index 0000000..223d359 --- /dev/null +++ b/testing/sizes.h @@ -0,0 +1,111 @@ +// -*- Mode: C++ -*- +template <typename T, typename U> +class SizeIterator { + public: + SizeIterator(MTRandom *rand, size_t howmany) + : rand_(rand), + count_(0), + fixed_(U::sizes), + fixed_size_(SIZEOF_ARRAY(U::sizes)), + howmany_(howmany) { } + + T Get() { + if (count_ < fixed_size_) { + return fixed_[count_]; + } + return rand_->Rand<T>() % U::max_value; + } + + bool Done() { + return count_ >= fixed_size_ && count_ >= howmany_; + } + + void Next() { + count_++; + } + + private: + MTRandom *rand_; + size_t count_; + T* fixed_; + size_t fixed_size_; + size_t howmany_; +}; + +// Small sizes +class SmallSizes { +public: + static size_t sizes[]; + static size_t max_value; +}; + +size_t SmallSizes::sizes[] = { + 0, 1, 128 / 4, 3333, + 128 - (128 / 3), + 128, + 128 + (128 / 3), + 2 * 128 - (128 / 3), + 2 * 128, + 2 * 128 + (128 / 3), +}; + +size_t SmallSizes::max_value = 128 * 3; + +// Large sizes +class LargeSizes { +public: + static size_t sizes[]; + static size_t max_value; +}; + +size_t LargeSizes::sizes[] = { + 1 << 20, + 1 << 18, + 1 << 16, +}; + +size_t LargeSizes::max_value = 1<<20; + +// Base constants +struct BaseConstants { + static const size_t TEST_ROUNDS; +}; + +const size_t BaseConstants::TEST_ROUNDS = 10; + +// Regtest<> arguments +struct SmallBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef SmallSizes Sizes; +}; + +const xoff_t SmallBlock::BLOCK_SIZE = 1<<7; +const size_t SmallBlock::WINDOW_SIZE = 1<<7; + +struct LargeBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef LargeSizes Sizes; +}; + +const xoff_t LargeBlock::BLOCK_SIZE = (1 << 13); +const size_t LargeBlock::WINDOW_SIZE = (1 << 13); + +struct MixedBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef SmallSizes Sizes; +}; + +const xoff_t MixedBlock::BLOCK_SIZE = 1<<7; +const size_t MixedBlock::WINDOW_SIZE = 1<<8; + +struct OversizeBlock : public BaseConstants { + static const xoff_t BLOCK_SIZE; + static const size_t WINDOW_SIZE; + typedef SmallSizes Sizes; +}; + +const xoff_t OversizeBlock::BLOCK_SIZE = 1<<8; +const size_t OversizeBlock::WINDOW_SIZE = 1<<7; diff --git a/testing/test.h b/testing/test.h new file mode 100644 index 0000000..7de24fb --- /dev/null +++ b/testing/test.h @@ -0,0 +1,70 @@ +// -*- Mode: C++ -*- + +extern "C" { +#include "../xdelta3.h" +#include "../xdelta3-internal.h" +} + +#include <unistd.h> +#include <math.h> +#include <string> + +#define CHECK_EQ(x,y) CHECK_OP(x,y,==) +#define CHECK_NE(x,y) CHECK_OP(x,y,!=) +#define CHECK_LT(x,y) CHECK_OP(x,y,<) +#define CHECK_GT(x,y) CHECK_OP(x,y,>) +#define CHECK_LE(x,y) CHECK_OP(x,y,<=) +#define CHECK_GE(x,y) CHECK_OP(x,y,>=) + +#define CHECK_OP(x,y,OP) \ + do { \ + __typeof__(x) _x(x); \ + __typeof__(x) _y(y); \ + if (!(_x OP _y)) { \ + cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \ + cerr << __FILE__ << ":" << __LINE__ << " {0} " << _x << endl; \ + cerr << __FILE__ << ":" << __LINE__ << " {1} " << _y << endl; \ + abort(); \ + } } while (false) +#undef CHECK +#define CHECK(x) \ + do {if (!(x)) { \ + cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x << endl; \ + abort(); \ + } } while (false) + +#define DCHECK(x) + +using std::string; + +#include <vector> +using std::vector; + +inline string CommandToString(const vector<const char*> &v) { + string s(v[0]); + for (size_t i = 1; i < v.size() && v[i] != NULL; i++) { + s.append(" "); + s.append(v[i]); + } + return s; +} + +#include <iostream> +using std::cerr; +using std::endl; +using std::ostream; + +#include <map> +using std::map; +using std::pair; + +#include <list> +using std::list; + +template <typename T, typename U> +pair<T, U> make_pair(const T& t, const U& u) { + return pair<T, U>(t, u); +} + +using std::min; +using std::max; diff --git a/testing/xdelta3-regtest.py b/testing/xdelta3-regtest.py new file mode 100755 index 0000000..cb07198 --- /dev/null +++ b/testing/xdelta3-regtest.py @@ -0,0 +1,1273 @@ +#!/usr/bin/python2.6 +# xdelta 3 - delta compression tools and library +# Copyright (C) 2003, 2006, 2007, 2008. Joshua P. MacDonald +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +# TODO: test 1.5 vs. greedy + +import os, sys, math, re, time, types, array, random +import xdelta3 + +#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' +#RCSDIR = '/tmp/PRCS_read_copy' +#SAMPLEDIR = "/tmp/WESNOTH_tmp/diff" + +#RCSDIR = 'G:/jmacd/PRCS_copy' +#SAMPLEDIR = "C:/sample_data/Wesnoth/tar" + +RCSDIR = '/Users/jmacd/src/ftp.kernel.org' +SAMPLEDIR = '/Users/jmacd/src/xdelta3/linux' + +# +MIN_SIZE = 0 + +TIME_TOO_SHORT = 0.050 + +SKIP_TRIALS = 2 +MIN_TRIALS = 3 +MAX_TRIALS = 15 + +# 10 = fast 1.5 = slow +MIN_STDDEV_PCT = 1.5 + +# How many results per round +MAX_RESULTS = 500 +TEST_ROUNDS = 10 +KEEP_P = (0.5) + +# For RCS testing, what percent to select +FILE_P = (0.50) + +# For run-speed tests +MIN_RUN = 1000 * 1000 * 1 +MAX_RUN = 1000 * 1000 * 10 + +# Testwide defaults +ALL_ARGS = [ + '-q' # '-vv' + ] + +# The first 7 args go to -C +SOFT_CONFIG_CNT = 7 + +CONFIG_ORDER = [ 'large_look', + 'large_step', + 'small_look', + 'small_chain', + 'small_lchain', + 'max_lazy', + 'long_enough', + + # > SOFT_CONFIG_CNT + 'nocompress', + 'winsize', + 'srcwinsize', + 'sprevsz', + 'iopt', + 'djw', + 'altcode', + ] + +CONFIG_ARGMAP = { + 'winsize' : '-W', + 'srcwinsize' : '-B', + 'sprevsz' : '-P', + 'iopt' : '-I', + 'nocompress' : '-N', + 'djw' : '-Sdjw', + 'altcode' : '-T', + } + +def INPUT_SPEC(rand): + return { + + # Time/space costs: + + # -C 1,2,3,4,5,6,7 + 'large_look' : lambda d: rand.choice([9, 10, 11, 12]), + 'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]), + 'small_look' : lambda d: rand.choice([4]), + 'small_chain' : lambda d: rand.choice([1]), + 'small_lchain' : lambda d: rand.choice([1]), + 'max_lazy' : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]), + + # Note: long_enough only refers to small matching and has no effect if + # small_chain == 1. + 'long_enough' : lambda d: rand.choice([4]), + + # -N + 'nocompress' : lambda d: rand.choice(['false']), + + # -T + 'altcode' : lambda d: rand.choice(['false']), + + # -S djw + 'djw' : lambda d: rand.choice(['false']), + + # Memory costs: + + # -W + 'winsize' : lambda d: 8 * (1<<20), + + # -B + 'srcwinsize' : lambda d: 64 * (1<<20), + + # -I 0 is unlimited + 'iopt' : lambda d: 0, + + # -P only powers of two + 'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]), + } +#end + +# +TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() + +RUNFILE = os.path.join(TMPDIR, 'run') +DFILE = os.path.join(TMPDIR, 'output') +RFILE = os.path.join(TMPDIR, 'recon') +CMPTMP1 = os.path.join(TMPDIR, 'cmptmp1') +CMPTMP2 = os.path.join(TMPDIR, 'cmptmp2') + +HEAD_STATE = 0 +BAR_STATE = 1 +REV_STATE = 2 +DATE_STATE = 3 + +# +IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*') + +# rcs output +RE_TOTREV = re.compile('total revisions: (\\d+)') +RE_BAR = re.compile('----------------------------') +RE_REV = re.compile('revision (.+)') +RE_DATE = re.compile('date: ([^;]+);.*') +# xdelta output +RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') +RE_EXTCOMP = re.compile('XDELTA ext comp.*') + +def c2str(c): + return ' '.join(['%s' % x for x in c]) +#end + +def SumList(l): + return reduce(lambda x,y: x+y, l) +#end + +# returns (total, mean, stddev, q2 (median), +# (q3-q1)/2 ("semi-interquartile range"), max-min (spread)) +class StatList: + def __init__(self,l,desc): + cnt = len(l) + assert(cnt > 1) + l.sort() + self.cnt = cnt + self.l = l + self.total = SumList(l) + self.mean = self.total / float(self.cnt) + self.s = math.sqrt(SumList([(x-self.mean) * + (x - self.mean) for x in l]) / + float(self.cnt-1)) + self.q0 = l[0] + self.q1 = l[int(self.cnt/4.0+0.5)] + self.q2 = l[int(self.cnt/2.0+0.5)] + self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))] + self.q4 = l[self.cnt-1] + self.siqr = (self.q3-self.q1)/2.0; + self.spread = (self.q4-self.q0) + if len(l) == 1: + self.str = '%s %s' % (desc, l[0]) + else: + self.str = '%s mean %.1f: 25%-ile %d %d %d %d %d' % \ + (desc, self.mean, self.q0, self.q1, self.q2, self.q3, self.q4) + #end +#end + +def RunCommand(args, ok = [0]): + #print 'run command %s' % (' '.join(args)) + p = os.spawnvp(os.P_WAIT, args[0], args) + if p not in ok: + raise CommandError(args, 'exited %d' % p) + #end +#end + +def RunCommandIO(args,infn,outfn): + p = os.fork() + if p == 0: + os.dup2(os.open(infn,os.O_RDONLY),0) + os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1) + os.execvp(args[0], args) + else: + s = os.waitpid(p,0) + o = os.WEXITSTATUS(s[1]) + if not os.WIFEXITED(s[1]) or o != 0: + raise CommandError(args, 'exited %d' % o) + #end + #end +#end + +class TimedTest: + def __init__(self, target, source, runnable, + skip_trials = SKIP_TRIALS, + min_trials = MIN_TRIALS, + max_trials = MAX_TRIALS, + min_stddev_pct = MIN_STDDEV_PCT): + self.target = target + self.source = source + self.runnable = runnable + + self.skip_trials = skip_trials + self.min_trials = min(min_trials, max_trials) + self.max_trials = max_trials + self.min_stddev_pct = min_stddev_pct + + self.encode_time = self.DoTest(DFILE, + lambda x: x.Encode(self.target, + self.source, DFILE)) + self.encode_size = runnable.EncodeSize(DFILE) + + self.decode_time = self.DoTest(RFILE, + lambda x: x.Decode(DFILE, + self.source, RFILE), + ) + runnable.Verify(self.target, RFILE) + #end + + def DoTest(self, fname, func): + trials = 0 + measured = [] + + while 1: + try: + os.remove(fname) + except OSError: + pass + + start_time = time.time() + start_clock = time.clock() + + func(self.runnable) + + total_clock = (time.clock() - start_clock) + total_time = (time.time() - start_time) + + elap_time = max(total_time, 0.0000001) + elap_clock = max(total_clock, 0.0000001) + + trials = trials + 1 + + # skip some of the first trials + if trials > self.skip_trials: + measured.append((elap_clock, elap_time)) + #print 'measurement total: %.1f ms' % (total_time * 1000.0) + + # at least so many + if trials < (self.skip_trials + self.min_trials): + #print 'continue: need more trials: %d' % trials + continue + + # compute %variance + done = 0 + if self.skip_trials + self.min_trials <= 2: + measured = measured + measured; + done = 1 + #end + + time_stat = StatList([x[1] for x in measured], 'elap time') + sp = float(time_stat.s) / float(time_stat.mean) + + # what if MAX_TRIALS is exceeded? + too_many = (trials - self.skip_trials) >= self.max_trials + good = (100.0 * sp) < self.min_stddev_pct + if done or too_many or good: + trials = trials - self.skip_trials + if not done and not good: + #print 'too many trials: %d' % trials + pass + #clock = StatList([x[0] for x in measured], 'elap clock') + return time_stat + #end + #end + #end +#end + +def Decimals(start, end): + l = [] + step = start + while 1: + r = range(step, step * 10, step) + l = l + r + if step * 10 >= end: + l.append(step * 10) + break + step = step * 10 + return l +#end + +# This tests the raw speed of 0-byte inputs +def RunSpeedTest(): + for L in Decimals(MIN_RUN, MAX_RUN): + SetFileSize(RUNFILE, L) + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)])) + ReportSpeed(L, trx, '1MB ') + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)])) + ReportSpeed(L, trx, '512k') + + trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)])) + ReportSpeed(L, trx, '256k') + + trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE)) + ReportSpeed(L, trm, 'swig') + + trg = TimedTest(RUNFILE, None, GzipRun1()) + ReportSpeed(L,trg,'gzip') + #end +#end + +def SetFileSize(F,L): + fd = os.open(F, os.O_CREAT | os.O_WRONLY) + os.ftruncate(fd,L) + assert os.fstat(fd).st_size == L + os.close(fd) +#end + +def ReportSpeed(L,tr,desc): + print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \ + (desc, L, + tr.encode_size, + tr.encode_time.mean * 1000.0, + tr.decode_time.mean * 1000.0) +#end + +class Xdelta3RunClass: + def __init__(self, extra): + self.extra = extra + #end + + def __str__(self): + return ' '.join(self.extra) + #end + + def New(self): + return Xdelta3Runner(self.extra) + #end +#end + +class Xdelta3Runner: + # Use "forkexec" to get special command-line only features like + # external compression support. + def __init__(self, extra, forkexec=False): + self.forkexec = forkexec + self.extra = extra + #end + + def Encode(self, target, source, output): + args = (ALL_ARGS + + self.extra + + ['-e']) + if source: + args.append('-s') + args.append(source) + #end + args = args + [target, output] + self.Main(args) + #end + + def Decode(self, input, source, output): + args = (ALL_ARGS + + ['-d']) + if source: + args.append('-s') + args.append(source) + #end + args = args + [input, output] + self.Main(args) + #end + + def Verify(self, target, recon): + if target[-3:] == ".gz": + RunCommandIO(('gzip', '-dc'), target, CMPTMP1) + RunCommandIO(('gzip', '-dc'), recon, CMPTMP2) + RunCommand(('cmp', CMPTMP1, CMPTMP2)) + else: + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end + + def Main(self, args): + try: + if self.forkexec: + RunCommand(['../xdelta3'] + args) + else: + xdelta3.xd3_main_cmdline(args) + except Exception, e: + raise CommandError(args, "xdelta3.main exception: %s" % e) + #end + #end +#end + +class Xdelta3Mod1: + def __init__(self, file): + self.target_data = open(file, 'r').read() + #end + + def Encode(self, ignore1, ignore2, ignore3): + r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10) + if r1 != 0: + raise CommandError('memory', 'encode failed: %s' % r1) + #end + self.encoded = encoded + #end + + def Decode(self, ignore1, ignore2, ignore3): + r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data)) + if r2 != 0: + raise CommandError('memory', 'decode failed: %s' % r1) + #end + self.decoded = data1 + #end + + def Verify(self, ignore1, ignore2): + if self.target_data != self.decoded: + raise CommandError('memory', 'bad decode') + #end + #end + + def EncodeSize(self, ignore1): + return len(self.encoded) + #end +#end + +class GzipRun1: + def Encode(self, target, source, output): + assert source == None + RunCommandIO(['gzip', '-cf'], target, output) + #end + + def Decode(self, input, source, output): + assert source == None + RunCommandIO(['gzip', '-dcf'], input, output) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end +#end + +class Xdelta1RunClass: + def __str__(self): + return 'xdelta1' + #end + + def New(self): + return Xdelta1Runner() + #end +#end + +class Xdelta1Runner: + def Encode(self, target, source, output): + assert source != None + args = ['xdelta1', 'delta', '-q', source, target, output] + RunCommand(args, [0, 1]) + #end + + def Decode(self, input, source, output): + assert source != None + args = ['xdelta1', 'patch', '-q', input, source, output] + # Note: for dumb historical reasons, xdelta1 returns 1 or 0 + RunCommand(args) + #end + + def Verify(self, target, recon): + RunCommand(('cmp', target, recon)) + #end + + def EncodeSize(self, output): + return os.stat(output).st_size + #end +#end + +# exceptions +class SkipRcsException: + def __init__(self,reason): + self.reason = reason + #end +#end + +class NotEnoughVersions: + def __init__(self): + pass + #end +#end + +class CommandError: + def __init__(self,cmd,str): + if type(cmd) is types.TupleType or \ + type(cmd) is types.ListType: + cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd) + #end + print 'command was: ',cmd + print 'command failed: ',str + print 'have fun debugging' + #end +#end + +class RcsVersion: + def __init__(self,vstr): + self.vstr = vstr + #end + def __cmp__(self,other): + return cmp(self.date, other.date) + #end + def __str__(self): + return str(self.vstr) + #end +#end + +class RcsFile: + + def __init__(self, fname): + self.fname = fname + self.versions = [] + self.state = HEAD_STATE + #end + + def SetTotRev(self,s): + self.totrev = int(s) + #end + + def Rev(self,s): + self.rev = RcsVersion(s) + if len(self.versions) >= self.totrev: + raise SkipRcsException('too many versions (in log messages)') + #end + self.versions.append(self.rev) + #end + + def Date(self,s): + self.rev.date = s + #end + + def Match(self, line, state, rx, gp, newstate, f): + if state == self.state: + m = rx.match(line) + if m: + if f: + f(m.group(gp)) + #end + self.state = newstate + return 1 + #end + #end + return None + #end + + def Sum1Rlog(self): + f = os.popen('rlog '+self.fname, "r") + l = f.readline() + while l: + if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev): + pass + elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None): + pass + elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev): + pass + elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date): + pass + #end + l = f.readline() + #end + c = f.close() + if c != None: + raise c + #end + #end + + def Sum1(self): + st = os.stat(self.fname) + self.rcssize = st.st_size + self.Sum1Rlog() + if self.totrev != len(self.versions): + raise SkipRcsException('wrong version count') + #end + self.versions.sort() + #end + + def Checkout(self,n): + v = self.versions[n] + out = open(self.Verf(n), "w") + cmd = 'co -ko -p%s %s' % (v.vstr, self.fname) + total = 0 + (inf, + stream, + err) = os.popen3(cmd, "r") + inf.close() + buf = stream.read() + while buf: + total = total + len(buf) + out.write(buf) + buf = stream.read() + #end + v.vsize = total + estr = '' + buf = err.read() + while buf: + estr = estr + buf + buf = err.read() + #end + if stream.close(): + raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr)) + #end + out.close() + err.close() + #end + + def Vdate(self,n): + return self.versions[n].date + #end + + def Vstr(self,n): + return self.versions[n].vstr + #end + + def Verf(self,n): + return os.path.join(TMPDIR, 'input.%d' % n) + #end + + def FilePairsByDate(self, runclass): + if self.totrev < 2: + raise NotEnoughVersions() + #end + self.Checkout(0) + ntrials = [] + if self.totrev < 2: + return vtrials + #end + for v in range(0,self.totrev-1): + if v > 1: + os.remove(self.Verf(v-1)) + #end + self.Checkout(v+1) + if os.stat(self.Verf(v)).st_size < MIN_SIZE or \ + os.stat(self.Verf(v+1)).st_size < MIN_SIZE: + continue + #end + + result = TimedTest(self.Verf(v+1), + self.Verf(v), + runclass.New()) + + target_size = os.stat(self.Verf(v+1)).st_size + + ntrials.append(result) + #end + + os.remove(self.Verf(self.totrev-1)) + os.remove(self.Verf(self.totrev-2)) + return ntrials + #end + + def AppendVersion(self, f, n): + self.Checkout(n) + rf = open(self.Verf(n), "r") + data = rf.read() + f.write(data) + rf.close() + return len(data) + #end + +class RcsFinder: + def __init__(self): + self.subdirs = [] + self.rcsfiles = [] + self.others = [] + self.skipped = [] + self.biground = 0 + #end + + def Scan1(self,dir): + dents = os.listdir(dir) + subdirs = [] + rcsfiles = [] + others = [] + for dent in dents: + full = os.path.join(dir, dent) + if os.path.isdir(full): + subdirs.append(full) + elif dent[len(dent)-2:] == ",v": + rcsfiles.append(RcsFile(full)) + else: + others.append(full) + #end + #end + self.subdirs = self.subdirs + subdirs + self.rcsfiles = self.rcsfiles + rcsfiles + self.others = self.others + others + return subdirs + #end + + def Crawl(self, dir): + subdirs = [dir] + while subdirs: + s1 = self.Scan1(subdirs[0]) + subdirs = subdirs[1:] + s1 + #end + #end + + def Summarize(self): + good = [] + for rf in self.rcsfiles: + try: + rf.Sum1() + if rf.totrev < 2: + raise SkipRcsException('too few versions (< 2)') + #end + except SkipRcsException, e: + #print 'skipping file %s: %s' % (rf.fname, e.reason) + self.skipped.append(rf) + else: + good.append(rf) + #end + self.rcsfiles = good + #end + + def AllPairsByDate(self, runclass): + results = [] + good = [] + for rf in self.rcsfiles: + try: + results = results + rf.FilePairsByDate(runclass) + except SkipRcsException: + print 'file %s has compressed versions: skipping' % (rf.fname) + except NotEnoughVersions: + print 'testing %s on %s: not enough versions' % (runclass, rf.fname) + else: + good.append(rf) + #end + self.rcsfiles = good + self.ReportPairs(runclass, results) + return results + #end + + def ReportPairs(self, name, results): + encode_time = 0 + decode_time = 0 + encode_size = 0 + for r in results: + encode_time += r.encode_time.mean + decode_time += r.decode_time.mean + encode_size += r.encode_size + #end + print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \ + (name, encode_time, decode_time, encode_size) + #end + + def MakeBigFiles(self, rand): + f1 = open(TMPDIR + "/big.1", "w") + f2 = open(TMPDIR + "/big.2", "w") + population = [] + for file in self.rcsfiles: + if len(file.versions) < 2: + continue + population.append(file) + #end + f1sz = 0 + f2sz = 0 + fcount = int(len(population) * FILE_P) + assert fcount > 0 + for file in rand.sample(population, fcount): + m = IGNORE_FILENAME.match(file.fname) + if m != None: + continue + #end + r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) + f1sz += file.AppendVersion(f1, r1) + f2sz += file.AppendVersion(f2, r2) + #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], + #file.Vstr(r1), file.Vstr(r2))) + #end + testkey = 'rcs%d' % self.biground + self.biground = self.biground + 1 + + print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz) + f1.close() + f2.close() + return (TMPDIR + "/big.1", + TMPDIR + "/big.2", + testkey) + #end + + def Generator(self): + return lambda rand: self.MakeBigFiles(rand) + #end +#end + +# find a set of RCS files for testing +def GetTestRcsFiles(): + rcsf = RcsFinder() + rcsf.Crawl(RCSDIR) + if len(rcsf.rcsfiles) == 0: + raise CommandError('', 'no RCS files') + #end + rcsf.Summarize() + print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % ( + len(rcsf.rcsfiles), + len(rcsf.subdirs), + len(rcsf.others), + len(rcsf.skipped)) + print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str + print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str + return rcsf +#end + +class SampleDataTest: + def __init__(self, dirs): + dirs_in = dirs + self.pairs = [] + while dirs: + d = dirs[0] + dirs = dirs[1:] + l = os.listdir(d) + files = [] + for e in l: + p = os.path.join(d, e) + if os.path.isdir(p): + dirs.append(p) + else: + files.append(p) + #end + #end + if len(files) > 1: + files.sort() + for x in xrange(len(files)): + for y in xrange(len(files)): + self.pairs.append((files[x], files[y], + '%s-%s' % (files[x], files[y]))) + #end + #end + #end + #end + print "Sample data test using %d file pairs in %s" % ( + len(self.pairs), dirs_in) + #end + + def Generator(self): + return lambda rand: rand.choice(self.pairs) + #end +#end + +# configs are represented as a list of values, +# program takes a list of strings: +def ConfigToArgs(config): + args = [ '-C', + ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])] + for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)): + key = CONFIG_ARGMAP[CONFIG_ORDER[i]] + val = config[i] + if val == 'true' or val == 'false': + if val == 'true': + args.append('%s' % key) + #end + else: + args.append('%s=%s' % (key, val)) + #end + #end + return args +#end + +# +class RandomTest: + def __init__(self, tnum, tinput, config, syntuple = None): + self.mytinput = tinput[2] + self.myconfig = config + self.tnum = tnum + + if syntuple != None: + self.runtime = syntuple[0] + self.compsize = syntuple[1] + self.decodetime = None + else: + args = ConfigToArgs(config) + result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) + + self.runtime = result.encode_time.mean + self.compsize = result.encode_size + self.decodetime = result.decode_time.mean + #end + + self.score = None + self.time_pos = None + self.size_pos = None + self.score_pos = None + #end + + def __str__(self): + decodestr = ' %s' % self.decodetime + return 'time %.6f%s size %d%s << %s >>%s' % ( + self.time(), ((self.time_pos != None) and + (" (%s)" % self.time_pos) or ""), + self.size(), ((self.size_pos != None) and + (" (%s)" % self.size_pos) or ""), + c2str(self.config()), + decodestr) + #end + + def time(self): + return self.runtime + #end + + def size(self): + return self.compsize + #end + + def config(self): + return self.myconfig + #end + + def score(self): + return self.score + #end + + def tinput(self): + return self.mytinput + #end +#end + +def PosInAlist(l, e): + for i in range(0, len(l)): + if l[i][1] == e: + return i; + #end + #end + return -1 +#end + +# Generates a set of num_results test configurations, given the list of +# retest-configs. +def RandomTestConfigs(rand, input_configs, num_results): + + outputs = input_configs[:] + have_set = dict([(c,c) for c in input_configs]) + + # Compute a random configuration + def RandomConfig(): + config = [] + cmap = {} + for key in CONFIG_ORDER: + val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) + config.append(val) + #end + return tuple(config) + #end + + while len(outputs) < num_results: + newc = None + for i in xrange(100): + c = RandomConfig() + if have_set.has_key(c): + continue + #end + have_set[c] = c + newc = c + break + if newc is None: + print 'stopped looking for configs at %d' % len(outputs) + break + #end + outputs.append(c) + #end + outputs.sort() + return outputs +#end + +def RunOptimizationLoop(rand, generator, rounds): + configs = [] + for rnum in xrange(rounds): + configs = RandomTestConfigs(rand, configs, MAX_RESULTS) + tinput = generator(rand) + tests = [] + for x in xrange(len(configs)): + t = RandomTest(x, tinput, configs[x]) + print 'Round %d test %d: %s' % (rnum, x, t) + tests.append(t) + #end + results = ScoreTests(tests) + + for r in results: + c = r.config() + if not test_all_config_results.has_key(c): + test_all_config_results[c] = [r] + else: + test_all_config_results[c].append(r) + #end + #end + + #GraphResults('expt%d' % rnum, results) + #GraphSummary('sum%d' % rnum, results) + + # re-test some fraction + configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]] + #end +#end + +# TODO: cleanup +test_all_config_results = {} + +def ScoreTests(results): + scored = [] + timed = [] + sized = [] + + t_min = float(min([test.time() for test in results])) + #t_max = float(max([test.time() for test in results])) + s_min = float(min([test.size() for test in results])) + #s_max = float(max([test.size() for test in results])) + + for test in results: + + # Hyperbolic function. Smaller scores still better + red = 0.999 # minimum factors for each dimension are 1/1000 + test.score = ((test.size() - s_min * red) * + (test.time() - t_min * red)) + + scored.append((test.score, test)) + timed.append((test.time(), test)) + sized.append((test.size(), test)) + #end + + scored.sort() + timed.sort() + sized.sort() + + best_by_size = [] + best_by_time = [] + + pos = 0 + for (score, test) in scored: + pos += 1 + test.score_pos = pos + #end + + scored = [x[1] for x in scored] + + for test in scored: + test.size_pos = PosInAlist(sized, test) + test.time_pos = PosInAlist(timed, test) + #end + + for test in scored: + c = test.config() + s = 0.0 + print 'H-Score: %0.9f %s' % (test.score, test) + #end + + return scored +#end + +def GraphResults(desc, results): + f = open("data-%s.csv" % desc, "w") + for r in results: + f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r)) + #end + f.close() + os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc)) +#end + +def GraphSummary(desc, results_ignore): + test_population = 0 + config_ordered = [] + + # drops duplicate test/config pairs (TODO: don't retest them) + for config, cresults in test_all_config_results.items(): + input_config_map = {} + uniq = [] + for test in cresults: + assert test.config() == config + test_population += 1 + key = test.tinput() + if not input_config_map.has_key(key): + input_config_map[key] = {} + #end + if input_config_map[key].has_key(config): + print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test) + continue + #end + input_config_map[key][config] = test + uniq.append(test) + #end + config_ordered.append(uniq) + #end + + # sort configs descending by number of tests + config_ordered.sort(lambda x, y: len(y) - len(x)) + + print 'population %d: %d configs %d results' % \ + (test_population, + len(config_ordered), + len(config_ordered[0])) + + if config_ordered[0] == 1: + return + #end + + # a map from test-key to test-list w/ various configs + input_set = {} + osize = len(config_ordered) + + for i in xrange(len(config_ordered)): + config = config_ordered[i][0].config() + config_tests = config_ordered[i] + + #print '%s has %d tested inputs' % (config, len(config_tests)) + + if len(input_set) == 0: + input_set = dict([(t.tinput(), [t]) for t in config_tests]) + continue + #end + + # a map from test-key to test-list w/ various configs + update_set = {} + for r in config_tests: + t = r.tinput() + if input_set.has_key(t): + update_set[t] = input_set[t] + [r] + else: + #print 'config %s does not have test %s' % (config, t) + pass + #end + #end + + if len(update_set) <= 1: + break + #end + + input_set = update_set + + # continue if there are more w/ the same number of inputs + if i < (len(config_ordered) - 1) and \ + len(config_ordered[i + 1]) == len(config_tests): + continue + #end + + # synthesize results for multi-test inputs + config_num = None + + # map of config to sum(various test-keys) + smap = {} + for (key, tests) in input_set.items(): + if config_num == None: + # config_num should be the same in all elements + config_num = len(tests) + smap = dict([(r.config(), + (r.time(), + r.size())) + for r in tests]) + else: + # compuate the per-config sum of time/size + assert config_num == len(tests) + smap = dict([(r.config(), + (smap[r.config()][0] + r.time(), + smap[r.config()][1] + r.size())) + for r in tests]) + #end + #end + + if config_num == 1: + continue + #end + + if len(input_set) == osize: + break + #end + + summary = '%s-%d' % (desc, len(input_set)) + osize = len(input_set) + + print 'generate %s w/ %d configs' % (summary, config_num) + syn = [RandomTest(0, (None, None, summary), config, + syntuple = (smap[config][0], smap[config][1])) + for config in smap.keys()] + syn = ScoreTests(syn) + #print 'smap is %s' % (smap,) + #print 'syn is %s' % (' and '.join([str(x) for x in syn])) + #GraphResults(summary, syn) + #end +#end + +def RunRegressionTest(pairs, rounds): + for args in [ + [], + ['-S=djw'], + ['-B=412907520'], + ['-B 412907520', ], + + ]: + print "Args %s" % (args) + for (file1, file2, testkey) in pairs: + ttest = TimedTest(file1, file2, Xdelta3Runner(args, forkexec=True), + skip_trials = 0, + min_trials = 1, + max_trials = 1) + print "Source %s\nTarget %s\nEncode %s\nDecode %s\nSize %s\n\n" % ( + file1, file2, + ttest.encode_time.str, + ttest.decode_time.str, + ttest.encode_size) + #end +#end + +if __name__ == "__main__": + try: + RunCommand(['rm', '-rf', TMPDIR]) + os.mkdir(TMPDIR) + + #rcsf = GetTestRcsFiles() + #generator = rcsf.Generator() + + sample = SampleDataTest([SAMPLEDIR]) + generator = sample.Generator() + + rand = random.Random(135135135135135) + + RunRegressionTest(sample.pairs, TEST_ROUNDS) + + #RunSpeedTest() + + # the idea below is to add the default configurations and + # xdelta1 to the optimization loop: + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-3', '-6'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw'])) + #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T'])) + #x1r = rcsf.AllPairsByDate(Xdelta1RunClass()) + + except CommandError: + pass + else: + RunCommand(['rm', '-rf', TMPDIR]) + pass + #end +#end diff --git a/testing/xdelta3-test.py b/testing/xdelta3-test.py new file mode 100755 index 0000000..5f1a263 --- /dev/null +++ b/testing/xdelta3-test.py @@ -0,0 +1,155 @@ +#!/usr/bin/python2.7 +# xdelta 3 - delta compression tools and library +# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +import xdelta3 + +# the test data section is expected to be len('target') +source = 'source source input0 source source' +target = 'source source target source source' + +# +# + +print 'encode: basic ...' +result, patch = xdelta3.xd3_encode_memory(target, source, 50) + +assert result == 0 +assert len(patch) < len(source) + +print 'encode: adler32 ...' +result, patch_adler32 = xdelta3.xd3_encode_memory(target, source, 50, + xdelta3.XD3_ADLER32) + +assert result == 0 +assert len(patch_adler32) < len(source) +assert len(patch_adler32) > len(patch) + +print 'encode: secondary ...' +result, patch_djw = xdelta3.xd3_encode_memory(target, source, 50, + xdelta3.XD3_SEC_DJW) + +assert result == 0 +# secondary compression doesn't help +assert len(patch_djw) > len(patch) + +print 'encode: exact ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch)) + +assert result == 0 +assert len(ignore) < len(source) + +print 'encode: out of space ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, len(patch) - 1) + +assert result == 28 +assert ignore == None + +print 'encode: zero space ...' +result, ignore = xdelta3.xd3_encode_memory(target, source, 0) + +assert result == 28 +assert ignore == None + +print 'encode: no source ...' +result, zdata = xdelta3.xd3_encode_memory(target, None, 50) + +assert result == 0 +assert len(zdata) > len(patch) + +print 'encode: no input ...' +result, ignore = xdelta3.xd3_encode_memory(None, None, 50) + +assert result != 0 + +print 'decode: basic ...' +result, target1 = xdelta3.xd3_decode_memory(patch, source, len(target)) + +assert result == 0 +assert len(target1) == len(target) +assert target1 == target + +print 'decode: out of space ...' +result, ignore = xdelta3.xd3_decode_memory(patch, source, len(target) - 1) + +assert result == 28 +assert ignore == None + +print 'decode: zero space ...' +result, ignore = xdelta3.xd3_decode_memory(patch, source, 0) + +assert result == 28 +assert ignore == None + +print 'decode: single byte error ...' +# a few expected single-byte errors, e.g., unused address cache bits, see +# xdelta3-test.h's single-bit error tests +extra_count = 4 +noverify_count = 0 +for corrupt_pos in range(len(patch_adler32)): + input = ''.join([j == corrupt_pos and '\xff' or patch_adler32[j] + for j in range(len(patch_adler32))]) + + result, ignore = xdelta3.xd3_decode_memory(input, source, len(target), 0) + assert result == -17712 + assert ignore == None + + # without adler32 verification, the error may be in the data section which + # in this case is 6 bytes 'target' + result, corrupt = xdelta3.xd3_decode_memory(input, source, len(target), + xdelta3.XD3_ADLER32_NOVER) + if result == 0: + noverify_count = noverify_count + 1 + #print "got %s" % corrupt + #end +#end +assert noverify_count == len('target') + extra_count + +print 'decode: no source ...' +result, target2 = xdelta3.xd3_decode_memory(zdata, None, len(target)) + +assert result == 0 +assert target == target2 + +# Test compression level setting via flags. assumes a 9 byte checksum +# and that level 9 steps 2, level 1 steps 15: +# 01234567890123456789012345678901 +# level 1 only indexes 2 checksums "abcdefghi" and "ABCDEFGHI" +# outputs 43 vs. 23 bytes +print 'encode: compression level ...' + +source = '_la_la_abcdefghi_la_la_ABCDEFGHI' +target = 'la_la_ABCDEFGH__la_la_abcdefgh__' + +result1, level1 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_1) +result9, level9 = xdelta3.xd3_encode_memory(target, source, 50, xdelta3.XD3_COMPLEVEL_9) + +assert result1 == 0 and result9 == 0 +assert len(level1) > len(level9) + +# +# Issue 65 +print 'encode: 65 ...' +source = 'Hello World' +target = 'Hello everyone' +result, patch = xdelta3.xd3_encode_memory(target, source, len(target)) +assert result != 0 + +result, patch = xdelta3.xd3_encode_memory(target, source, 2 * len(target)) +assert result == 0 + +print 'PASS' |