1 files changed, 269 insertions, 190 deletions
diff --git a/backend/large.c b/backend/large.c
index ff44ecd..6f7d7af 100644
--- a/backend/large.c
+++ b/backend/large.c
@@ -2,7 +2,7 @@
 
 /*
     libzint - the open source barcode library
-    Copyright (C) 2008 Robin Stuart <robin@zint.org.uk>
+    Copyright (C) 2008 - 2020 Robin Stuart <rstuart114@gmail.com>
 
     Redistribution and use in source and binary forms, with or without
     modification, are permitted provided that the following conditions
@@ -28,211 +28,290 @@
     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     SUCH DAMAGE.
-*/
+ */
+/* vim: set ts=4 sw=4 et : */
 
+/* `large_mul_u64()` and `large_div_u64()` are adapted from articles by F. W. Jacob
+ *   https://www.codeproject.com/Tips/618570/UInt-Multiplication-Squaring
+ *   "This article, along with any associated source code and files, is licensed under The BSD License"
+ *   http://www.codeproject.com/Tips/785014/UInt-Division-Modulus
+ *   "This article, along with any associated source code and files, is licensed under The BSD License"
+ *
+ * These in turn are based on Hacker's Delight (2nd Edition, 2012) by Henry S. Warren, Jr.
+ *   "You are free to use, copy, and distribute any of the code on this web site, whether modified by you or not."
+ *   https://web.archive.org/web/20190716204559/http://www.hackersdelight.org/permissions.htm
+ *
+ * `clz_u64()` and other bits and pieces are adapted from r128.h by Alan Hickman (fahickman)
+ *   https://github.com/fahickman/r128/blob/master/r128.h
+ *   "R128 is released into the public domain. See LICENSE for details." LICENSE is The Unlicense.
+ */
 #include <stdio.h>
-#include <string.h>
+#ifdef _MSC_VER
+#include <malloc.h>
+#endif
 #include "common.h"
 #include "large.h"
 
-static const short int BCD[40] = {
-	0, 0, 0, 0,
-	1, 0, 0, 0,
-	0, 1, 0, 0,
-	1, 1, 0, 0,
-	0, 0, 1, 0,
-	1, 0, 1, 0,
-	0, 1, 1, 0,
-	1, 1, 1, 0,
-	0, 0, 0, 1,
-	1, 0, 0, 1 };
-
-void binary_add(short int accumulator[], short int input_buffer[])
-{ /* Binary addition */
-	int i, carry, done;
-	carry = 0;
-
-	for(i = 0; i < 112; i++) {
-		done = 0;
-		if(((input_buffer[i] == 0) && (accumulator[i] == 0)) && ((carry == 0) && (done == 0))) {
-			accumulator[i] = 0;
-			carry = 0;
-			done = 1;
-		}
-		if(((input_buffer[i] == 0) && (accumulator[i] == 0)) && ((carry == 1) && (done == 0))) {
-			accumulator[i] = 1;
-			carry = 0;
-			done = 1;
-		}
-		if(((input_buffer[i] == 0) && (accumulator[i] == 1)) && ((carry == 0) && (done == 0))) {
-			accumulator[i] = 1;
-			carry = 0;
-			done = 1;
-		}
-		if(((input_buffer[i] == 0) && (accumulator[i] == 1)) && ((carry == 1) && (done == 0))) {
-			accumulator[i] = 0;
-			carry = 1;
-			done = 1;
-		}
-		if(((input_buffer[i] == 1) && (accumulator[i] == 0)) && ((carry == 0) && (done == 0))) {
-			accumulator[i] = 1;
-			carry = 0;
-			done = 1;
-		}
-		if(((input_buffer[i] == 1) && (accumulator[i] == 0)) && ((carry == 1) && (done == 0))) {
-			accumulator[i] = 0;
-			carry = 1;
-			done = 1;
-		}
-		if(((input_buffer[i] == 1) && (accumulator[i] == 1)) && ((carry == 0) && (done == 0))) {
-			accumulator[i] = 0;
-			carry = 1;
-			done = 1;
-		}
-		if(((input_buffer[i] == 1) && (accumulator[i] == 1)) && ((carry == 1) && (done == 0))) {
-			accumulator[i] = 1;
-			carry = 1;
-			done = 1;
-		}
-	}
+#define MASK32  0xFFFFFFFF
+
+/* Convert decimal string `s` of (at most) length `length` to 64-bit and place in 128-bit `t` */
+INTERNAL void large_load_str_u64(large_int *t, const unsigned char *s, int length) {
+    uint64_t val = 0;
+    const unsigned char *se = s + length;
+    for (; s < se && *s >= '0' && *s <= '9'; s++) {
+        val *= 10;
+        val += *s - '0';
+    }
+    t->lo = val;
+    t->hi = 0;
 }
 
-void binary_subtract(short int accumulator[], short int input_buffer[])
-{ 	/* 2's compliment subtraction */
-	/* take input_buffer from accumulator and put answer in accumulator */
-	int i;
-	short int sub_buffer[112];
-	
-	for(i = 0; i < 112; i++) {
-		if(input_buffer[i] == 0) {
-			sub_buffer[i] = 1;
-		} else {
-			sub_buffer[i] = 0;
-		}
-	}
-	binary_add(accumulator, sub_buffer);
-	
-	sub_buffer[0] = 1;
-	
-	for(i = 1; i < 112; i++) {
-		sub_buffer[i] = 0;
-	}
-	binary_add(accumulator, sub_buffer);
+/* Add 128-bit `s` to 128-bit `t` */
+INTERNAL void large_add(large_int *t, const large_int *s) {
+    t->lo += s->lo;
+    t->hi += s->hi + (t->lo < s->lo);
 }
 
-void shiftdown(short int buffer[])
-{
-	int i;
-	
-	buffer[102] = 0;
-	buffer[103] = 0;
+/* Add 64-bit `s` to 128-bit `t` */
+INTERNAL void large_add_u64(large_int *t, uint64_t s) {
+    t->lo += s;
+    if (t->lo < s) {
+        t->hi++;
+    }
+}
 
-	for(i = 0; i < 102; i++) {
-		buffer[i] = buffer[i + 1];
-	}
+/* Subtract 64-bit `s` from 128-bit `t` */
+INTERNAL void large_sub_u64(large_int *t, uint64_t s) {
+    uint64_t r = t->lo - s;
+    if (r > t->lo) {
+        t->hi--;
+    }
+    t->lo = r;
 }
 
-void shiftup(short int buffer[])
-{
-	int i;
-	
-	for(i = 102; i > 0; i--) {
-		buffer[i] = buffer[i - 1];
-	}
-	
-	buffer[0] = 0;
+/* Multiply 128-bit `t` by 64-bit `s`
+ * See Jacob `mult64to128()` and Warren Section 8-2
+ * Note '0' denotes low 32-bits, '1' high 32-bits
+ * if   p00 == s0 * tlo0
+ *      k00 == carry of p00
+ *      p01 == s0 * tlo1
+ *      k01 == carry of (p01 + k00)
+ *      p10 == s1 * tlo0
+ *      k10 == carry of p10
+ *      p11 == s1 * tlo1 (unmasked, i.e. including unshifted carry if any)
+ * then t->lo == (p01 + p10 + k00) << 32 + p00
+ * and  t->hi == p11 + k10 + k01 + thi * s
+ *
+ *      (thi)      tlo1      tlo0
+ * x                 s1        s0
+ *      -------------------------
+ *                            p00
+ *      k01        p01 + k00
+ *                 p10
+ *      p11 + k10
+ */
+INTERNAL void large_mul_u64(large_int *t, uint64_t s) {
+    uint64_t thi = t->hi;
+    uint64_t tlo0 = t->lo & MASK32;
+    uint64_t tlo1 = t->lo >> 32;
+
+    uint64_t s0 = s & MASK32;
+    uint64_t s1 = s >> 32;
+
+    uint64_t tmp = s0 * tlo0; /* p00 (unmasked) */
+    uint64_t p00 = tmp & MASK32;
+    uint64_t k10;
+
+    tmp = (s1 * tlo0) + (tmp >> 32); /* (p10 + k00) (p10 unmasked) */
+    k10 = tmp >> 32;
+
+    tmp = (s0 * tlo1) + (tmp & MASK32); /* (p01 + p10 + k00) (p01 unmasked) */
+
+    t->lo = (tmp << 32) + p00; /* (p01 + p10 + k00) << 32 + p00 (note any carry from unmasked p01 shifted out) */
+    t->hi = (s1 * tlo1) + k10 + (tmp >> 32) + thi * s; /* p11 + k10 + k01 + thi * s */
 }
 
-short int islarger(short int accum[], short int reg[])
-{
-	/* Returns 1 if accum[] is larger than reg[], else 0 */
-	int i, latch, larger;
-	latch = 0;
-	i = 103;
-	larger = 0;
-	
-	
-	do {
-		if((accum[i] == 1) && (reg[i] == 0)) {
-			latch = 1;
-			larger = 1;
-		}
-		if((accum[i] == 0) && (reg[i] == 1)) {
-			latch = 1;
-		}
-		i--;
-	} while ((latch == 0) && (i >= -1));
-	
-	return larger;
+/* Count leading zeroes. See Hickman `r128__clz64()` */
+STATIC_UNLESS_ZINT_TEST int clz_u64(uint64_t x) {
+   uint64_t n = 64, y;
+   y = x >> 32; if (y) { n -= 32; x = y; }
+   y = x >> 16; if (y) { n -= 16; x = y; }
+   y = x >>  8; if (y) { n -=  8; x = y; }
+   y = x >>  4; if (y) { n -=  4; x = y; }
+   y = x >>  2; if (y) { n -=  2; x = y; }
+   y = x >>  1; if (y) { n -=  1; x = y; }
+   return (int) (n - x);
 }
 
-void binary_load(short int reg[], char data[], const unsigned int src_len)
-{
-	int read, i;
-	short int temp[112] = { 0 };
-	
-	for(i = 0; i < 112; i++) {
-		reg[i] = 0;
-	}
-	
-	for(read = 0; read < src_len; read++) {
-
-		for(i = 0; i < 112; i++) {
-			temp[i] = reg[i];
-		}
-		
-		for(i = 0; i < 9; i++) {
-			binary_add(reg, temp);
-		}
-		
-		temp[0] = BCD[ctoi(data[read]) * 4];
-		temp[1] = BCD[(ctoi(data[read]) * 4) + 1];
-		temp[2] = BCD[(ctoi(data[read]) * 4) + 2];
-		temp[3] = BCD[(ctoi(data[read]) * 4) + 3];
-		for(i = 4; i < 112; i++) {
-			temp[i] = 0;
-		}
-		
-		binary_add(reg, temp);
-	}
+/* Divide 128-bit dividend `t` by 64-bit divisor `v`
+ * See Jacob `divmod128by128/64()` and Warren Section 9–2 (divmu64.c.txt)
+ * Note digits are 32-bit parts */
+INTERNAL uint64_t large_div_u64(large_int *t, uint64_t v) {
+    const uint64_t b = 0x100000000; /* Number base (2**32) */
+    uint64_t qhi = 0; /* High digit of returned quotient */
+
+    uint64_t tnhi, tnlo, tnlo1, tnlo0, vn1, vn0; /* Normalized forms of (parts of) t and v */
+    uint64_t rnhilo1; /* Remainder after dividing 1st 3 digits of t by v */
+    uint64_t qhat1, qhat0; /* Estimated quotient digits */
+    uint64_t rhat; /* Remainder of estimated quotient digit */
+    uint64_t tmp;
+    int norm_shift;
+
+    /* Deal with single-digit (i.e. 32-bit) divisor here */
+    if (v < b) {
+        qhi = t->hi / v;
+        tmp = ((t->hi - qhi * v) << 32) + (t->lo >> 32); /* k * b + tlo1 */
+        qhat1 = tmp / v;
+        tmp = ((tmp - qhat1 * v) << 32) + (t->lo & MASK32); /* k * b + tlo0 */
+        qhat0 = tmp / v;
+        t->lo = (qhat1 << 32) | qhat0;
+        t->hi = qhi;
+        return tmp - qhat0 * v;
+    }
+
+    /* Main algorithm requires t->hi < v */
+    if (t->hi >= v) {
+        qhi = t->hi / v;
+        t->hi %= v;
+    }
+
+    /* Normalize by shifting v left just enough so that its high-order
+     * bit is on, and shift t left the same amount. Note don't need extra
+     * high-end digit for dividend as t->hi < v */
+
+    norm_shift = clz_u64(v);
+    v <<= norm_shift;
+    vn1 = v >> 32;
+    vn0 = v & MASK32;
+
+    if (norm_shift > 0) {
+        tnhi = (t->hi << norm_shift) | (t->lo >> (64 - norm_shift));
+        tnlo = t->lo << norm_shift;
+    } else {
+        tnhi = t->hi;
+        tnlo = t->lo;
+    }
+
+    tnlo1 = tnlo >> 32;
+    tnlo0 = tnlo & MASK32;
+
+    /* Compute qhat1 estimate */
+
+    qhat1 = tnhi / vn1; /* Divide first digit of v into first 2 digits of t */
+    rhat = tnhi % vn1;
+
+    /* Loop until qhat1 one digit and <= (rhat * b + 3rd digit of t) / vn0 */
+    for (tmp = qhat1 * vn0; qhat1 >= b || tmp > (rhat << 32) + tnlo1; tmp -= vn0) {
+        --qhat1;
+        rhat += vn1;
+        if (rhat >= b) { /* Must check here as (rhat << 32) would overflow */
+            break; /* qhat1 * vn0 < b * b (since vn0 < b) */
+        }
+    }
+    /* Note qhat1 will be exact as have fully divided by 2-digit divisor
+     * (can only be too high by 1 (and require "add back" step) if divisor at least 3 digits) */
+
+    rnhilo1 = (tnhi << 32) + tnlo1 - (qhat1 * v); /* Note high digit (if any) of both tnhi and (qhat1 * v) shifted out */
+
+    /* Compute qhat0 estimate */
+
+    qhat0 = rnhilo1 / vn1; /* Divide first digit of v into 2-digit remains of first 3 digits of t */
+    rhat = rnhilo1 % vn1;
+
+    /* Loop until qhat0 one digit and <= (rhat * b + 4th digit of t) / vn0 */
+    for (tmp = qhat0 * vn0; qhat0 >= b || tmp > (rhat << 32) + tnlo0; tmp -= vn0) {
+        --qhat0;
+        rhat += vn1;
+        if (rhat >= b) {
+            break;
+        }
+    }
+    /* Similarly qhat0 will be exact */
+
+    t->lo = (qhat1 << 32) | qhat0;
+    t->hi = qhi;
+
+    /* Unnormalize remainder */
+    return ((rnhilo1 << 32) + tnlo0 - (qhat0 * v)) >> norm_shift;
+}
+
+/* Unset a bit (zero-based) */
+INTERNAL void large_unset_bit(large_int *t, int bit) {
+    if (bit < 64) {
+        t->lo &= ~(((uint64_t) 1) << bit);
+    } else if (bit < 128) {
+        t->hi &= ~(((uint64_t) 1) << (bit - 64));
+    }
+}
+
+/* Ouput large_int into an unsigned int array of size `size`, each element containing `bits` bits */
+INTERNAL void large_uint_array(const large_int *t, unsigned int *uint_array, int size, int bits) {
+    int i, j;
+    uint64_t mask;
+    if (bits <= 0) {
+        bits = 8;
+    } else if (bits > 32) {
+        bits = 32;
+    }
+    mask = ~(((uint64_t) -1) << bits);
+    for (i = 0, j = 0; i < size && j < 64; i++, j += bits) {
+        uint_array[size - 1 - i] = (t->lo >> j) & mask; /* Little-endian order */
+    }
+    if (i < size) {
+        if (j != 64) {
+            j -= 64;
+            /* (first j bits of t->hi) << (bits - j) | (last (bits - j) bits of t->lo) */
+            uint_array[size - i] = ((t->hi & ~((((uint64_t) -1) << j))) << (bits - j)) | (t->lo >> (64 - (bits - j)) & mask);
+        } else {
+            j = 0;
+        }
+        for (; i < size && j < 64; i++, j += bits) {
+            uint_array[size - 1 - i] = (t->hi >> j) & mask;
+        }
+        if (i < size && j != 128) {
+            uint_array[size - 1 - i] = t->hi >> (j - bits) & mask;
+        }
+    }
 }
 
-void hex_dump(short int input_buffer[]) 
-{
-	int i, digit, byte_space;
-
-	byte_space = 1;
-	for(i = 100; i >= 0; i-=4) {
-		digit = 0;
-		digit += 1 * input_buffer[i];
-		digit += 2 * input_buffer[i + 1];
-		digit += 4 * input_buffer[i + 2];
-		digit += 8 * input_buffer[i + 3];
-
-		switch(digit) {
-			case 0: printf("0"); break;
-			case 1: printf("1"); break;
-			case 2: printf("2"); break;
-			case 3: printf("3"); break;
-			case 4: printf("4"); break;
-			case 5: printf("5"); break;
-			case 6: printf("6"); break;
-			case 7: printf("7"); break;
-			case 8: printf("8"); break;
-			case 9: printf("9"); break;
-			case 10: printf("A"); break;
-			case 11: printf("B"); break;
-			case 12: printf("C"); break;
-			case 13: printf("D"); break;
-			case 14: printf("E"); break;
-			case 15: printf("F"); break;
-		}
-		if(byte_space == 1) {
-			byte_space = 0;
-		} else {
-			byte_space = 1;
-			printf(" ");
-		}
-	}
-	printf("\n");
+/* As `large_uint_array()` above, except output to unsigned char array */
+INTERNAL void large_uchar_array(const large_int *t, unsigned char *uchar_array, int size, int bits) {
+    int i;
+#ifndef _MSC_VER
+    unsigned int uint_array[size ? size : 1]; /* Avoid run-time warning if size is 0 */
+#else
+    unsigned int *uint_array = (unsigned int *) _alloca((size ? size : 1) * sizeof(unsigned int));
+#endif
+
+    large_uint_array(t, uint_array, size, bits);
+
+    for (i = 0; i < size; i++) {
+        uchar_array[i] = uint_array[i];
+    }
+}
+
+/* Output formatted large_int to stdout */
+INTERNAL void large_print(large_int *t) {
+    char buf[35]; /* 2 (0x) + 32 (hex) + 1 */
+
+    puts(large_dump(t, buf));
+}
+
+/* Format large_int into buffer, which should be at least 35 chars in size */
+INTERNAL char *large_dump(large_int *t, char *buf) {
+    unsigned int tlo1 = large_lo(t) >> 32;
+    unsigned int tlo0 = large_lo(t) & MASK32;
+    unsigned int thi1 = large_hi(t) >> 32;
+    unsigned int thi0 = large_hi(t) & MASK32;
+
+    if (thi1) {
+        sprintf(buf, "0x%X%08X%08X%08X", thi1, thi0, tlo1, tlo0);
+    } else if (thi0) {
+        sprintf(buf, "0x%X%08X%08X", thi0, tlo1, tlo0);
+    } else if (tlo1) {
+        sprintf(buf, "0x%X%08X", tlo1, tlo0);
+    } else {
+        sprintf(buf, "0x%X", tlo0);
+    }
+    return buf;
 }