backend/large.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317

/* large.c - Handles binary manipulation of large numbers */

/*
    libzint - the open source barcode library
    Copyright (C) 2008 - 2020 Robin Stuart <rstuart114@gmail.com>

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
    are met:

    1. Redistributions of source code must retain the above copyright
       notice, this list of conditions and the following disclaimer.
    2. Redistributions in binary form must reproduce the above copyright
       notice, this list of conditions and the following disclaimer in the
       documentation and/or other materials provided with the distribution.
    3. Neither the name of the project nor the names of its contributors
       may be used to endorse or promote products derived from this software
       without specific prior written permission.

    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    SUCH DAMAGE.
 */
/* vim: set ts=4 sw=4 et : */

/* `large_mul_u64()` and `large_div_u64()` are adapted from articles by F. W. Jacob
 *   https://www.codeproject.com/Tips/618570/UInt-Multiplication-Squaring
 *   "This article, along with any associated source code and files, is licensed under The BSD License"
 *   http://www.codeproject.com/Tips/785014/UInt-Division-Modulus
 *   "This article, along with any associated source code and files, is licensed under The BSD License"
 *
 * These in turn are based on Hacker's Delight (2nd Edition, 2012) by Henry S. Warren, Jr.
 *   "You are free to use, copy, and distribute any of the code on this web site, whether modified by you or not."
 *   https://web.archive.org/web/20190716204559/http://www.hackersdelight.org/permissions.htm
 *
 * `clz_u64()` and other bits and pieces are adapted from r128.h by Alan Hickman (fahickman)
 *   https://github.com/fahickman/r128/blob/master/r128.h
 *   "R128 is released into the public domain. See LICENSE for details." LICENSE is The Unlicense.
 */
#include <stdio.h>
#ifdef _MSC_VER
#include <malloc.h>
#endif
#include "common.h"
#include "large.h"

#define MASK32  0xFFFFFFFF

/* Convert decimal string `s` of (at most) length `length` to 64-bit and place in 128-bit `t` */
INTERNAL void large_load_str_u64(large_int *t, const unsigned char *s, int length) {
    uint64_t val = 0;
    const unsigned char *se = s + length;
    for (; s < se && *s >= '0' && *s <= '9'; s++) {
        val *= 10;
        val += *s - '0';
    }
    t->lo = val;
    t->hi = 0;
}

/* Add 128-bit `s` to 128-bit `t` */
INTERNAL void large_add(large_int *t, const large_int *s) {
    t->lo += s->lo;
    t->hi += s->hi + (t->lo < s->lo);
}

/* Add 64-bit `s` to 128-bit `t` */
INTERNAL void large_add_u64(large_int *t, uint64_t s) {
    t->lo += s;
    if (t->lo < s) {
        t->hi++;
    }
}

/* Subtract 64-bit `s` from 128-bit `t` */
INTERNAL void large_sub_u64(large_int *t, uint64_t s) {
    uint64_t r = t->lo - s;
    if (r > t->lo) {
        t->hi--;
    }
    t->lo = r;
}

/* Multiply 128-bit `t` by 64-bit `s`
 * See Jacob `mult64to128()` and Warren Section 8-2
 * Note '0' denotes low 32-bits, '1' high 32-bits
 * if   p00 == s0 * tlo0
 *      k00 == carry of p00
 *      p01 == s0 * tlo1
 *      k01 == carry of (p01 + k00)
 *      p10 == s1 * tlo0
 *      k10 == carry of p10
 *      p11 == s1 * tlo1 (unmasked, i.e. including unshifted carry if any)
 * then t->lo == (p01 + p10 + k00) << 32 + p00
 * and  t->hi == p11 + k10 + k01 + thi * s
 *
 *      (thi)      tlo1      tlo0
 * x                 s1        s0
 *      -------------------------
 *                            p00
 *      k01        p01 + k00
 *                 p10
 *      p11 + k10
 */
INTERNAL void large_mul_u64(large_int *t, uint64_t s) {
    uint64_t thi = t->hi;
    uint64_t tlo0 = t->lo & MASK32;
    uint64_t tlo1 = t->lo >> 32;

    uint64_t s0 = s & MASK32;
    uint64_t s1 = s >> 32;

    uint64_t tmp = s0 * tlo0; /* p00 (unmasked) */
    uint64_t p00 = tmp & MASK32;
    uint64_t k10;

    tmp = (s1 * tlo0) + (tmp >> 32); /* (p10 + k00) (p10 unmasked) */
    k10 = tmp >> 32;

    tmp = (s0 * tlo1) + (tmp & MASK32); /* (p01 + p10 + k00) (p01 unmasked) */

    t->lo = (tmp << 32) + p00; /* (p01 + p10 + k00) << 32 + p00 (note any carry from unmasked p01 shifted out) */
    t->hi = (s1 * tlo1) + k10 + (tmp >> 32) + thi * s; /* p11 + k10 + k01 + thi * s */
}

/* Count leading zeroes. See Hickman `r128__clz64()` */
STATIC_UNLESS_ZINT_TEST int clz_u64(uint64_t x) {
   uint64_t n = 64, y;
   y = x >> 32; if (y) { n -= 32; x = y; }
   y = x >> 16; if (y) { n -= 16; x = y; }
   y = x >>  8; if (y) { n -=  8; x = y; }
   y = x >>  4; if (y) { n -=  4; x = y; }
   y = x >>  2; if (y) { n -=  2; x = y; }
   y = x >>  1; if (y) { n -=  1; x = y; }
   return (int) (n - x);
}

/* Divide 128-bit dividend `t` by 64-bit divisor `v`
 * See Jacob `divmod128by128/64()` and Warren Section 9–2 (divmu64.c.txt)
 * Note digits are 32-bit parts */
INTERNAL uint64_t large_div_u64(large_int *t, uint64_t v) {
    const uint64_t b = 0x100000000; /* Number base (2**32) */
    uint64_t qhi = 0; /* High digit of returned quotient */

    uint64_t tnhi, tnlo, tnlo1, tnlo0, vn1, vn0; /* Normalized forms of (parts of) t and v */
    uint64_t rnhilo1; /* Remainder after dividing 1st 3 digits of t by v */
    uint64_t qhat1, qhat0; /* Estimated quotient digits */
    uint64_t rhat; /* Remainder of estimated quotient digit */
    uint64_t tmp;
    int norm_shift;

    /* Deal with single-digit (i.e. 32-bit) divisor here */
    if (v < b) {
        qhi = t->hi / v;
        tmp = ((t->hi - qhi * v) << 32) + (t->lo >> 32); /* k * b + tlo1 */
        qhat1 = tmp / v;
        tmp = ((tmp - qhat1 * v) << 32) + (t->lo & MASK32); /* k * b + tlo0 */
        qhat0 = tmp / v;
        t->lo = (qhat1 << 32) | qhat0;
        t->hi = qhi;
        return tmp - qhat0 * v;
    }

    /* Main algorithm requires t->hi < v */
    if (t->hi >= v) {
        qhi = t->hi / v;
        t->hi %= v;
    }

    /* Normalize by shifting v left just enough so that its high-order
     * bit is on, and shift t left the same amount. Note don't need extra
     * high-end digit for dividend as t->hi < v */

    norm_shift = clz_u64(v);
    v <<= norm_shift;
    vn1 = v >> 32;
    vn0 = v & MASK32;

    if (norm_shift > 0) {
        tnhi = (t->hi << norm_shift) | (t->lo >> (64 - norm_shift));
        tnlo = t->lo << norm_shift;
    } else {
        tnhi = t->hi;
        tnlo = t->lo;
    }

    tnlo1 = tnlo >> 32;
    tnlo0 = tnlo & MASK32;

    /* Compute qhat1 estimate */

    qhat1 = tnhi / vn1; /* Divide first digit of v into first 2 digits of t */
    rhat = tnhi % vn1;

    /* Loop until qhat1 one digit and <= (rhat * b + 3rd digit of t) / vn0 */
    for (tmp = qhat1 * vn0; qhat1 >= b || tmp > (rhat << 32) + tnlo1; tmp -= vn0) {
        --qhat1;
        rhat += vn1;
        if (rhat >= b) { /* Must check here as (rhat << 32) would overflow */
            break; /* qhat1 * vn0 < b * b (since vn0 < b) */
        }
    }
    /* Note qhat1 will be exact as have fully divided by 2-digit divisor
     * (can only be too high by 1 (and require "add back" step) if divisor at least 3 digits) */

    rnhilo1 = (tnhi << 32) + tnlo1 - (qhat1 * v); /* Note high digit (if any) of both tnhi and (qhat1 * v) shifted out */

    /* Compute qhat0 estimate */

    qhat0 = rnhilo1 / vn1; /* Divide first digit of v into 2-digit remains of first 3 digits of t */
    rhat = rnhilo1 % vn1;

    /* Loop until qhat0 one digit and <= (rhat * b + 4th digit of t) / vn0 */
    for (tmp = qhat0 * vn0; qhat0 >= b || tmp > (rhat << 32) + tnlo0; tmp -= vn0) {
        --qhat0;
        rhat += vn1;
        if (rhat >= b) {
            break;
        }
    }
    /* Similarly qhat0 will be exact */

    t->lo = (qhat1 << 32) | qhat0;
    t->hi = qhi;

    /* Unnormalize remainder */
    return ((rnhilo1 << 32) + tnlo0 - (qhat0 * v)) >> norm_shift;
}

/* Unset a bit (zero-based) */
INTERNAL void large_unset_bit(large_int *t, int bit) {
    if (bit < 64) {
        t->lo &= ~(((uint64_t) 1) << bit);
    } else if (bit < 128) {
        t->hi &= ~(((uint64_t) 1) << (bit - 64));
    }
}

/* Ouput large_int into an unsigned int array of size `size`, each element containing `bits` bits */
INTERNAL void large_uint_array(const large_int *t, unsigned int *uint_array, int size, int bits) {
    int i, j;
    uint64_t mask;
    if (bits <= 0) {
        bits = 8;
    } else if (bits > 32) {
        bits = 32;
    }
    mask = ~(((uint64_t) -1) << bits);
    for (i = 0, j = 0; i < size && j < 64; i++, j += bits) {
        uint_array[size - 1 - i] = (t->lo >> j) & mask; /* Little-endian order */
    }
    if (i < size) {
        if (j != 64) {
            j -= 64;
            /* (first j bits of t->hi) << (bits - j) | (last (bits - j) bits of t->lo) */
            uint_array[size - i] = ((t->hi & ~((((uint64_t) -1) << j))) << (bits - j)) | (t->lo >> (64 - (bits - j)) & mask);
        } else {
            j = 0;
        }
        for (; i < size && j < 64; i++, j += bits) {
            uint_array[size - 1 - i] = (t->hi >> j) & mask;
        }
        if (i < size && j != 128) {
            uint_array[size - 1 - i] = t->hi >> (j - bits) & mask;
        }
    }
}

/* As `large_uint_array()` above, except output to unsigned char array */
INTERNAL void large_uchar_array(const large_int *t, unsigned char *uchar_array, int size, int bits) {
    int i;
#ifndef _MSC_VER
    unsigned int uint_array[size ? size : 1]; /* Avoid run-time warning if size is 0 */
#else
    unsigned int *uint_array = (unsigned int *) _alloca((size ? size : 1) * sizeof(unsigned int));
#endif

    large_uint_array(t, uint_array, size, bits);

    for (i = 0; i < size; i++) {
        uchar_array[i] = uint_array[i];
    }
}

/* Output formatted large_int to stdout */
INTERNAL void large_print(large_int *t) {
    char buf[35]; /* 2 (0x) + 32 (hex) + 1 */

    puts(large_dump(t, buf));
}

/* Format large_int into buffer, which should be at least 35 chars in size */
INTERNAL char *large_dump(large_int *t, char *buf) {
    unsigned int tlo1 = large_lo(t) >> 32;
    unsigned int tlo0 = large_lo(t) & MASK32;
    unsigned int thi1 = large_hi(t) >> 32;
    unsigned int thi0 = large_hi(t) & MASK32;

    if (thi1) {
        sprintf(buf, "0x%X%08X%08X%08X", thi1, thi0, tlo1, tlo0);
    } else if (thi0) {
        sprintf(buf, "0x%X%08X%08X", thi0, tlo1, tlo0);
    } else if (tlo1) {
        sprintf(buf, "0x%X%08X", tlo1, tlo0);
    } else {
        sprintf(buf, "0x%X", tlo0);
    }
    return buf;
}