csecp256k1

Haskell FFI bindings to bitcoin-core/secp256k1 (docs.ppad.tech/csecp256k1).
git clone git://git.ppad.tech/csecp256k1.git
Log | Files | Refs | README | LICENSE

field_10x26_impl.h (53716B)


      1 /***********************************************************************
      2  * Copyright (c) 2013, 2014 Pieter Wuille                              *
      3  * Distributed under the MIT software license, see the accompanying    *
      4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
      5  ***********************************************************************/
      6 
      7 #ifndef SECP256K1_FIELD_REPR_IMPL_H
      8 #define SECP256K1_FIELD_REPR_IMPL_H
      9 
     10 #include "checkmem.h"
     11 #include "util.h"
     12 #include "field.h"
     13 #include "modinv32_impl.h"
     14 
     15 #ifdef VERIFY
     16 static void haskellsecp256k1_v0_1_0_fe_impl_verify(const haskellsecp256k1_v0_1_0_fe *a) {
     17     const uint32_t *d = a->n;
     18     int m = a->normalized ? 1 : 2 * a->magnitude;
     19     VERIFY_CHECK(d[0] <= 0x3FFFFFFUL * m);
     20     VERIFY_CHECK(d[1] <= 0x3FFFFFFUL * m);
     21     VERIFY_CHECK(d[2] <= 0x3FFFFFFUL * m);
     22     VERIFY_CHECK(d[3] <= 0x3FFFFFFUL * m);
     23     VERIFY_CHECK(d[4] <= 0x3FFFFFFUL * m);
     24     VERIFY_CHECK(d[5] <= 0x3FFFFFFUL * m);
     25     VERIFY_CHECK(d[6] <= 0x3FFFFFFUL * m);
     26     VERIFY_CHECK(d[7] <= 0x3FFFFFFUL * m);
     27     VERIFY_CHECK(d[8] <= 0x3FFFFFFUL * m);
     28     VERIFY_CHECK(d[9] <= 0x03FFFFFUL * m);
     29     if (a->normalized) {
     30         if (d[9] == 0x03FFFFFUL) {
     31             uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2];
     32             if (mid == 0x3FFFFFFUL) {
     33                 VERIFY_CHECK((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL);
     34             }
     35         }
     36     }
     37 }
     38 #endif
     39 
     40 static void haskellsecp256k1_v0_1_0_fe_impl_get_bounds(haskellsecp256k1_v0_1_0_fe *r, int m) {
     41     r->n[0] = 0x3FFFFFFUL * 2 * m;
     42     r->n[1] = 0x3FFFFFFUL * 2 * m;
     43     r->n[2] = 0x3FFFFFFUL * 2 * m;
     44     r->n[3] = 0x3FFFFFFUL * 2 * m;
     45     r->n[4] = 0x3FFFFFFUL * 2 * m;
     46     r->n[5] = 0x3FFFFFFUL * 2 * m;
     47     r->n[6] = 0x3FFFFFFUL * 2 * m;
     48     r->n[7] = 0x3FFFFFFUL * 2 * m;
     49     r->n[8] = 0x3FFFFFFUL * 2 * m;
     50     r->n[9] = 0x03FFFFFUL * 2 * m;
     51 }
     52 
     53 static void haskellsecp256k1_v0_1_0_fe_impl_normalize(haskellsecp256k1_v0_1_0_fe *r) {
     54     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
     55              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
     56 
     57     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
     58     uint32_t m;
     59     uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
     60 
     61     /* The first pass ensures the magnitude is 1, ... */
     62     t0 += x * 0x3D1UL; t1 += (x << 6);
     63     t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
     64     t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
     65     t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
     66     t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
     67     t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
     68     t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
     69     t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
     70     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
     71     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
     72 
     73     /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
     74     VERIFY_CHECK(t9 >> 23 == 0);
     75 
     76     /* At most a single final reduction is needed; check if the value is >= the field characteristic */
     77     x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
     78         & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
     79 
     80     /* Apply the final reduction (for constant-time behaviour, we do it always) */
     81     t0 += x * 0x3D1UL; t1 += (x << 6);
     82     t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
     83     t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
     84     t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
     85     t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
     86     t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
     87     t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
     88     t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
     89     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
     90     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
     91 
     92     /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
     93     VERIFY_CHECK(t9 >> 22 == x);
     94 
     95     /* Mask off the possible multiple of 2^256 from the final reduction */
     96     t9 &= 0x03FFFFFUL;
     97 
     98     r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
     99     r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
    100 }
    101 
    102 static void haskellsecp256k1_v0_1_0_fe_impl_normalize_weak(haskellsecp256k1_v0_1_0_fe *r) {
    103     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
    104              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
    105 
    106     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
    107     uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
    108 
    109     /* The first pass ensures the magnitude is 1, ... */
    110     t0 += x * 0x3D1UL; t1 += (x << 6);
    111     t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    112     t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    113     t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
    114     t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
    115     t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
    116     t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
    117     t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
    118     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
    119     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
    120 
    121     /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
    122     VERIFY_CHECK(t9 >> 23 == 0);
    123 
    124     r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
    125     r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
    126 }
    127 
    128 static void haskellsecp256k1_v0_1_0_fe_impl_normalize_var(haskellsecp256k1_v0_1_0_fe *r) {
    129     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
    130              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
    131 
    132     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
    133     uint32_t m;
    134     uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
    135 
    136     /* The first pass ensures the magnitude is 1, ... */
    137     t0 += x * 0x3D1UL; t1 += (x << 6);
    138     t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    139     t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    140     t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
    141     t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
    142     t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
    143     t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
    144     t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
    145     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
    146     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
    147 
    148     /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
    149     VERIFY_CHECK(t9 >> 23 == 0);
    150 
    151     /* At most a single final reduction is needed; check if the value is >= the field characteristic */
    152     x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
    153         & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
    154 
    155     if (x) {
    156         t0 += 0x3D1UL; t1 += (x << 6);
    157         t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
    158         t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
    159         t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
    160         t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
    161         t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
    162         t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
    163         t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
    164         t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
    165         t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
    166 
    167         /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
    168         VERIFY_CHECK(t9 >> 22 == x);
    169 
    170         /* Mask off the possible multiple of 2^256 from the final reduction */
    171         t9 &= 0x03FFFFFUL;
    172     }
    173 
    174     r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
    175     r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
    176 }
    177 
    178 static int haskellsecp256k1_v0_1_0_fe_impl_normalizes_to_zero(const haskellsecp256k1_v0_1_0_fe *r) {
    179     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
    180              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
    181 
    182     /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
    183     uint32_t z0, z1;
    184 
    185     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
    186     uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
    187 
    188     /* The first pass ensures the magnitude is 1, ... */
    189     t0 += x * 0x3D1UL; t1 += (x << 6);
    190     t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0  = t0; z1  = t0 ^ 0x3D0UL;
    191     t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
    192     t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
    193     t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
    194     t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
    195     t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
    196     t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
    197     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
    198     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
    199                                          z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
    200 
    201     /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
    202     VERIFY_CHECK(t9 >> 23 == 0);
    203 
    204     return (z0 == 0) | (z1 == 0x3FFFFFFUL);
    205 }
    206 
    207 static int haskellsecp256k1_v0_1_0_fe_impl_normalizes_to_zero_var(const haskellsecp256k1_v0_1_0_fe *r) {
    208     uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
    209     uint32_t z0, z1;
    210     uint32_t x;
    211 
    212     t0 = r->n[0];
    213     t9 = r->n[9];
    214 
    215     /* Reduce t9 at the start so there will be at most a single carry from the first pass */
    216     x = t9 >> 22;
    217 
    218     /* The first pass ensures the magnitude is 1, ... */
    219     t0 += x * 0x3D1UL;
    220 
    221     /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */
    222     z0 = t0 & 0x3FFFFFFUL;
    223     z1 = z0 ^ 0x3D0UL;
    224 
    225     /* Fast return path should catch the majority of cases */
    226     if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) {
    227         return 0;
    228     }
    229 
    230     t1 = r->n[1];
    231     t2 = r->n[2];
    232     t3 = r->n[3];
    233     t4 = r->n[4];
    234     t5 = r->n[5];
    235     t6 = r->n[6];
    236     t7 = r->n[7];
    237     t8 = r->n[8];
    238 
    239     t9 &= 0x03FFFFFUL;
    240     t1 += (x << 6);
    241 
    242     t1 += (t0 >> 26);
    243     t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL;
    244     t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2;
    245     t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3;
    246     t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4;
    247     t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5;
    248     t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6;
    249     t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7;
    250     t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8;
    251                                          z0 |= t9; z1 &= t9 ^ 0x3C00000UL;
    252 
    253     /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
    254     VERIFY_CHECK(t9 >> 23 == 0);
    255 
    256     return (z0 == 0) | (z1 == 0x3FFFFFFUL);
    257 }
    258 
    259 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_set_int(haskellsecp256k1_v0_1_0_fe *r, int a) {
    260     r->n[0] = a;
    261     r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
    262 }
    263 
    264 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_fe_impl_is_zero(const haskellsecp256k1_v0_1_0_fe *a) {
    265     const uint32_t *t = a->n;
    266     return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0;
    267 }
    268 
    269 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_fe_impl_is_odd(const haskellsecp256k1_v0_1_0_fe *a) {
    270     return a->n[0] & 1;
    271 }
    272 
    273 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_clear(haskellsecp256k1_v0_1_0_fe *a) {
    274     int i;
    275     for (i=0; i<10; i++) {
    276         a->n[i] = 0;
    277     }
    278 }
    279 
    280 static int haskellsecp256k1_v0_1_0_fe_impl_cmp_var(const haskellsecp256k1_v0_1_0_fe *a, const haskellsecp256k1_v0_1_0_fe *b) {
    281     int i;
    282     for (i = 9; i >= 0; i--) {
    283         if (a->n[i] > b->n[i]) {
    284             return 1;
    285         }
    286         if (a->n[i] < b->n[i]) {
    287             return -1;
    288         }
    289     }
    290     return 0;
    291 }
    292 
    293 static void haskellsecp256k1_v0_1_0_fe_impl_set_b32_mod(haskellsecp256k1_v0_1_0_fe *r, const unsigned char *a) {
    294     r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24);
    295     r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22);
    296     r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20);
    297     r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18);
    298     r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24);
    299     r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22);
    300     r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20);
    301     r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18);
    302     r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24);
    303     r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14);
    304 }
    305 
    306 static int haskellsecp256k1_v0_1_0_fe_impl_set_b32_limit(haskellsecp256k1_v0_1_0_fe *r, const unsigned char *a) {
    307     haskellsecp256k1_v0_1_0_fe_impl_set_b32_mod(r, a);
    308     return !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
    309 }
    310 
    311 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */
    312 static void haskellsecp256k1_v0_1_0_fe_impl_get_b32(unsigned char *r, const haskellsecp256k1_v0_1_0_fe *a) {
    313     r[0] = (a->n[9] >> 14) & 0xff;
    314     r[1] = (a->n[9] >> 6) & 0xff;
    315     r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3);
    316     r[3] = (a->n[8] >> 16) & 0xff;
    317     r[4] = (a->n[8] >> 8) & 0xff;
    318     r[5] = a->n[8] & 0xff;
    319     r[6] = (a->n[7] >> 18) & 0xff;
    320     r[7] = (a->n[7] >> 10) & 0xff;
    321     r[8] = (a->n[7] >> 2) & 0xff;
    322     r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f);
    323     r[10] = (a->n[6] >> 12) & 0xff;
    324     r[11] = (a->n[6] >> 4) & 0xff;
    325     r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf);
    326     r[13] = (a->n[5] >> 14) & 0xff;
    327     r[14] = (a->n[5] >> 6) & 0xff;
    328     r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3);
    329     r[16] = (a->n[4] >> 16) & 0xff;
    330     r[17] = (a->n[4] >> 8) & 0xff;
    331     r[18] = a->n[4] & 0xff;
    332     r[19] = (a->n[3] >> 18) & 0xff;
    333     r[20] = (a->n[3] >> 10) & 0xff;
    334     r[21] = (a->n[3] >> 2) & 0xff;
    335     r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f);
    336     r[23] = (a->n[2] >> 12) & 0xff;
    337     r[24] = (a->n[2] >> 4) & 0xff;
    338     r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf);
    339     r[26] = (a->n[1] >> 14) & 0xff;
    340     r[27] = (a->n[1] >> 6) & 0xff;
    341     r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3);
    342     r[29] = (a->n[0] >> 16) & 0xff;
    343     r[30] = (a->n[0] >> 8) & 0xff;
    344     r[31] = a->n[0] & 0xff;
    345 }
    346 
    347 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_negate_unchecked(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a, int m) {
    348     /* For all legal values of m (0..31), the following properties hold: */
    349     VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
    350     VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
    351     VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m);
    352     VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m);
    353 
    354     /* Due to the properties above, the left hand in the subtractions below is never less than
    355      * the right hand. */
    356     r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0];
    357     r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1];
    358     r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2];
    359     r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3];
    360     r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4];
    361     r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5];
    362     r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6];
    363     r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7];
    364     r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8];
    365     r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9];
    366 }
    367 
    368 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_mul_int_unchecked(haskellsecp256k1_v0_1_0_fe *r, int a) {
    369     r->n[0] *= a;
    370     r->n[1] *= a;
    371     r->n[2] *= a;
    372     r->n[3] *= a;
    373     r->n[4] *= a;
    374     r->n[5] *= a;
    375     r->n[6] *= a;
    376     r->n[7] *= a;
    377     r->n[8] *= a;
    378     r->n[9] *= a;
    379 }
    380 
    381 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_add(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a) {
    382     r->n[0] += a->n[0];
    383     r->n[1] += a->n[1];
    384     r->n[2] += a->n[2];
    385     r->n[3] += a->n[3];
    386     r->n[4] += a->n[4];
    387     r->n[5] += a->n[5];
    388     r->n[6] += a->n[6];
    389     r->n[7] += a->n[7];
    390     r->n[8] += a->n[8];
    391     r->n[9] += a->n[9];
    392 }
    393 
    394 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_add_int(haskellsecp256k1_v0_1_0_fe *r, int a) {
    395     r->n[0] += a;
    396 }
    397 
    398 #if defined(USE_EXTERNAL_ASM)
    399 
    400 /* External assembler implementation */
    401 void haskellsecp256k1_v0_1_0_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b);
    402 void haskellsecp256k1_v0_1_0_fe_sqr_inner(uint32_t *r, const uint32_t *a);
    403 
    404 #else
    405 
    406 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0)
    407 
    408 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
    409     uint64_t c, d;
    410     uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
    411     uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7;
    412     const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
    413 
    414     VERIFY_BITS(a[0], 30);
    415     VERIFY_BITS(a[1], 30);
    416     VERIFY_BITS(a[2], 30);
    417     VERIFY_BITS(a[3], 30);
    418     VERIFY_BITS(a[4], 30);
    419     VERIFY_BITS(a[5], 30);
    420     VERIFY_BITS(a[6], 30);
    421     VERIFY_BITS(a[7], 30);
    422     VERIFY_BITS(a[8], 30);
    423     VERIFY_BITS(a[9], 26);
    424     VERIFY_BITS(b[0], 30);
    425     VERIFY_BITS(b[1], 30);
    426     VERIFY_BITS(b[2], 30);
    427     VERIFY_BITS(b[3], 30);
    428     VERIFY_BITS(b[4], 30);
    429     VERIFY_BITS(b[5], 30);
    430     VERIFY_BITS(b[6], 30);
    431     VERIFY_BITS(b[7], 30);
    432     VERIFY_BITS(b[8], 30);
    433     VERIFY_BITS(b[9], 26);
    434 
    435     /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
    436      *  for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x).
    437      *  for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9)
    438      *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
    439      */
    440 
    441     d  = (uint64_t)a[0] * b[9]
    442        + (uint64_t)a[1] * b[8]
    443        + (uint64_t)a[2] * b[7]
    444        + (uint64_t)a[3] * b[6]
    445        + (uint64_t)a[4] * b[5]
    446        + (uint64_t)a[5] * b[4]
    447        + (uint64_t)a[6] * b[3]
    448        + (uint64_t)a[7] * b[2]
    449        + (uint64_t)a[8] * b[1]
    450        + (uint64_t)a[9] * b[0];
    451     /* VERIFY_BITS(d, 64); */
    452     /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
    453     t9 = d & M; d >>= 26;
    454     VERIFY_BITS(t9, 26);
    455     VERIFY_BITS(d, 38);
    456     /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
    457 
    458     c  = (uint64_t)a[0] * b[0];
    459     VERIFY_BITS(c, 60);
    460     /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
    461     d += (uint64_t)a[1] * b[9]
    462        + (uint64_t)a[2] * b[8]
    463        + (uint64_t)a[3] * b[7]
    464        + (uint64_t)a[4] * b[6]
    465        + (uint64_t)a[5] * b[5]
    466        + (uint64_t)a[6] * b[4]
    467        + (uint64_t)a[7] * b[3]
    468        + (uint64_t)a[8] * b[2]
    469        + (uint64_t)a[9] * b[1];
    470     VERIFY_BITS(d, 63);
    471     /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    472     u0 = d & M; d >>= 26; c += u0 * R0;
    473     VERIFY_BITS(u0, 26);
    474     VERIFY_BITS(d, 37);
    475     VERIFY_BITS(c, 61);
    476     /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    477     t0 = c & M; c >>= 26; c += u0 * R1;
    478     VERIFY_BITS(t0, 26);
    479     VERIFY_BITS(c, 37);
    480     /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    481     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    482 
    483     c += (uint64_t)a[0] * b[1]
    484        + (uint64_t)a[1] * b[0];
    485     VERIFY_BITS(c, 62);
    486     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
    487     d += (uint64_t)a[2] * b[9]
    488        + (uint64_t)a[3] * b[8]
    489        + (uint64_t)a[4] * b[7]
    490        + (uint64_t)a[5] * b[6]
    491        + (uint64_t)a[6] * b[5]
    492        + (uint64_t)a[7] * b[4]
    493        + (uint64_t)a[8] * b[3]
    494        + (uint64_t)a[9] * b[2];
    495     VERIFY_BITS(d, 63);
    496     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    497     u1 = d & M; d >>= 26; c += u1 * R0;
    498     VERIFY_BITS(u1, 26);
    499     VERIFY_BITS(d, 37);
    500     VERIFY_BITS(c, 63);
    501     /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    502     t1 = c & M; c >>= 26; c += u1 * R1;
    503     VERIFY_BITS(t1, 26);
    504     VERIFY_BITS(c, 38);
    505     /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    506     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    507 
    508     c += (uint64_t)a[0] * b[2]
    509        + (uint64_t)a[1] * b[1]
    510        + (uint64_t)a[2] * b[0];
    511     VERIFY_BITS(c, 62);
    512     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    513     d += (uint64_t)a[3] * b[9]
    514        + (uint64_t)a[4] * b[8]
    515        + (uint64_t)a[5] * b[7]
    516        + (uint64_t)a[6] * b[6]
    517        + (uint64_t)a[7] * b[5]
    518        + (uint64_t)a[8] * b[4]
    519        + (uint64_t)a[9] * b[3];
    520     VERIFY_BITS(d, 63);
    521     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    522     u2 = d & M; d >>= 26; c += u2 * R0;
    523     VERIFY_BITS(u2, 26);
    524     VERIFY_BITS(d, 37);
    525     VERIFY_BITS(c, 63);
    526     /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    527     t2 = c & M; c >>= 26; c += u2 * R1;
    528     VERIFY_BITS(t2, 26);
    529     VERIFY_BITS(c, 38);
    530     /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    531     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    532 
    533     c += (uint64_t)a[0] * b[3]
    534        + (uint64_t)a[1] * b[2]
    535        + (uint64_t)a[2] * b[1]
    536        + (uint64_t)a[3] * b[0];
    537     VERIFY_BITS(c, 63);
    538     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    539     d += (uint64_t)a[4] * b[9]
    540        + (uint64_t)a[5] * b[8]
    541        + (uint64_t)a[6] * b[7]
    542        + (uint64_t)a[7] * b[6]
    543        + (uint64_t)a[8] * b[5]
    544        + (uint64_t)a[9] * b[4];
    545     VERIFY_BITS(d, 63);
    546     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    547     u3 = d & M; d >>= 26; c += u3 * R0;
    548     VERIFY_BITS(u3, 26);
    549     VERIFY_BITS(d, 37);
    550     /* VERIFY_BITS(c, 64); */
    551     /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    552     t3 = c & M; c >>= 26; c += u3 * R1;
    553     VERIFY_BITS(t3, 26);
    554     VERIFY_BITS(c, 39);
    555     /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    556     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    557 
    558     c += (uint64_t)a[0] * b[4]
    559        + (uint64_t)a[1] * b[3]
    560        + (uint64_t)a[2] * b[2]
    561        + (uint64_t)a[3] * b[1]
    562        + (uint64_t)a[4] * b[0];
    563     VERIFY_BITS(c, 63);
    564     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    565     d += (uint64_t)a[5] * b[9]
    566        + (uint64_t)a[6] * b[8]
    567        + (uint64_t)a[7] * b[7]
    568        + (uint64_t)a[8] * b[6]
    569        + (uint64_t)a[9] * b[5];
    570     VERIFY_BITS(d, 62);
    571     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    572     u4 = d & M; d >>= 26; c += u4 * R0;
    573     VERIFY_BITS(u4, 26);
    574     VERIFY_BITS(d, 36);
    575     /* VERIFY_BITS(c, 64); */
    576     /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    577     t4 = c & M; c >>= 26; c += u4 * R1;
    578     VERIFY_BITS(t4, 26);
    579     VERIFY_BITS(c, 39);
    580     /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    581     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    582 
    583     c += (uint64_t)a[0] * b[5]
    584        + (uint64_t)a[1] * b[4]
    585        + (uint64_t)a[2] * b[3]
    586        + (uint64_t)a[3] * b[2]
    587        + (uint64_t)a[4] * b[1]
    588        + (uint64_t)a[5] * b[0];
    589     VERIFY_BITS(c, 63);
    590     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    591     d += (uint64_t)a[6] * b[9]
    592        + (uint64_t)a[7] * b[8]
    593        + (uint64_t)a[8] * b[7]
    594        + (uint64_t)a[9] * b[6];
    595     VERIFY_BITS(d, 62);
    596     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    597     u5 = d & M; d >>= 26; c += u5 * R0;
    598     VERIFY_BITS(u5, 26);
    599     VERIFY_BITS(d, 36);
    600     /* VERIFY_BITS(c, 64); */
    601     /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    602     t5 = c & M; c >>= 26; c += u5 * R1;
    603     VERIFY_BITS(t5, 26);
    604     VERIFY_BITS(c, 39);
    605     /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    606     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    607 
    608     c += (uint64_t)a[0] * b[6]
    609        + (uint64_t)a[1] * b[5]
    610        + (uint64_t)a[2] * b[4]
    611        + (uint64_t)a[3] * b[3]
    612        + (uint64_t)a[4] * b[2]
    613        + (uint64_t)a[5] * b[1]
    614        + (uint64_t)a[6] * b[0];
    615     VERIFY_BITS(c, 63);
    616     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    617     d += (uint64_t)a[7] * b[9]
    618        + (uint64_t)a[8] * b[8]
    619        + (uint64_t)a[9] * b[7];
    620     VERIFY_BITS(d, 61);
    621     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    622     u6 = d & M; d >>= 26; c += u6 * R0;
    623     VERIFY_BITS(u6, 26);
    624     VERIFY_BITS(d, 35);
    625     /* VERIFY_BITS(c, 64); */
    626     /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    627     t6 = c & M; c >>= 26; c += u6 * R1;
    628     VERIFY_BITS(t6, 26);
    629     VERIFY_BITS(c, 39);
    630     /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    631     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    632 
    633     c += (uint64_t)a[0] * b[7]
    634        + (uint64_t)a[1] * b[6]
    635        + (uint64_t)a[2] * b[5]
    636        + (uint64_t)a[3] * b[4]
    637        + (uint64_t)a[4] * b[3]
    638        + (uint64_t)a[5] * b[2]
    639        + (uint64_t)a[6] * b[1]
    640        + (uint64_t)a[7] * b[0];
    641     /* VERIFY_BITS(c, 64); */
    642     VERIFY_CHECK(c <= 0x8000007C00000007ULL);
    643     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    644     d += (uint64_t)a[8] * b[9]
    645        + (uint64_t)a[9] * b[8];
    646     VERIFY_BITS(d, 58);
    647     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    648     u7 = d & M; d >>= 26; c += u7 * R0;
    649     VERIFY_BITS(u7, 26);
    650     VERIFY_BITS(d, 32);
    651     /* VERIFY_BITS(c, 64); */
    652     VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
    653     /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    654     t7 = c & M; c >>= 26; c += u7 * R1;
    655     VERIFY_BITS(t7, 26);
    656     VERIFY_BITS(c, 38);
    657     /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    658     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    659 
    660     c += (uint64_t)a[0] * b[8]
    661        + (uint64_t)a[1] * b[7]
    662        + (uint64_t)a[2] * b[6]
    663        + (uint64_t)a[3] * b[5]
    664        + (uint64_t)a[4] * b[4]
    665        + (uint64_t)a[5] * b[3]
    666        + (uint64_t)a[6] * b[2]
    667        + (uint64_t)a[7] * b[1]
    668        + (uint64_t)a[8] * b[0];
    669     /* VERIFY_BITS(c, 64); */
    670     VERIFY_CHECK(c <= 0x9000007B80000008ULL);
    671     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    672     d += (uint64_t)a[9] * b[9];
    673     VERIFY_BITS(d, 57);
    674     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    675     u8 = d & M; d >>= 26; c += u8 * R0;
    676     VERIFY_BITS(u8, 26);
    677     VERIFY_BITS(d, 31);
    678     /* VERIFY_BITS(c, 64); */
    679     VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
    680     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    681 
    682     r[3] = t3;
    683     VERIFY_BITS(r[3], 26);
    684     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    685     r[4] = t4;
    686     VERIFY_BITS(r[4], 26);
    687     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    688     r[5] = t5;
    689     VERIFY_BITS(r[5], 26);
    690     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    691     r[6] = t6;
    692     VERIFY_BITS(r[6], 26);
    693     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    694     r[7] = t7;
    695     VERIFY_BITS(r[7], 26);
    696     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    697 
    698     r[8] = c & M; c >>= 26; c += u8 * R1;
    699     VERIFY_BITS(r[8], 26);
    700     VERIFY_BITS(c, 39);
    701     /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    702     /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    703     c   += d * R0 + t9;
    704     VERIFY_BITS(c, 45);
    705     /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    706     r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
    707     VERIFY_BITS(r[9], 22);
    708     VERIFY_BITS(c, 46);
    709     /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    710     /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    711     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    712 
    713     d    = c * (R0 >> 4) + t0;
    714     VERIFY_BITS(d, 56);
    715     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    716     r[0] = d & M; d >>= 26;
    717     VERIFY_BITS(r[0], 26);
    718     VERIFY_BITS(d, 30);
    719     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    720     d   += c * (R1 >> 4) + t1;
    721     VERIFY_BITS(d, 53);
    722     VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
    723     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    724     /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    725     r[1] = d & M; d >>= 26;
    726     VERIFY_BITS(r[1], 26);
    727     VERIFY_BITS(d, 27);
    728     VERIFY_CHECK(d <= 0x4000000ULL);
    729     /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    730     d   += t2;
    731     VERIFY_BITS(d, 27);
    732     /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    733     r[2] = d;
    734     VERIFY_BITS(r[2], 27);
    735     /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    736 }
    737 
    738 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
    739     uint64_t c, d;
    740     uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8;
    741     uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7;
    742     const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL;
    743 
    744     VERIFY_BITS(a[0], 30);
    745     VERIFY_BITS(a[1], 30);
    746     VERIFY_BITS(a[2], 30);
    747     VERIFY_BITS(a[3], 30);
    748     VERIFY_BITS(a[4], 30);
    749     VERIFY_BITS(a[5], 30);
    750     VERIFY_BITS(a[6], 30);
    751     VERIFY_BITS(a[7], 30);
    752     VERIFY_BITS(a[8], 30);
    753     VERIFY_BITS(a[9], 26);
    754 
    755     /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n.
    756      *  px is a shorthand for sum(a[i]*a[x-i], i=0..x).
    757      *  Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0].
    758      */
    759 
    760     d  = (uint64_t)(a[0]*2) * a[9]
    761        + (uint64_t)(a[1]*2) * a[8]
    762        + (uint64_t)(a[2]*2) * a[7]
    763        + (uint64_t)(a[3]*2) * a[6]
    764        + (uint64_t)(a[4]*2) * a[5];
    765     /* VERIFY_BITS(d, 64); */
    766     /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
    767     t9 = d & M; d >>= 26;
    768     VERIFY_BITS(t9, 26);
    769     VERIFY_BITS(d, 38);
    770     /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */
    771 
    772     c  = (uint64_t)a[0] * a[0];
    773     VERIFY_BITS(c, 60);
    774     /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */
    775     d += (uint64_t)(a[1]*2) * a[9]
    776        + (uint64_t)(a[2]*2) * a[8]
    777        + (uint64_t)(a[3]*2) * a[7]
    778        + (uint64_t)(a[4]*2) * a[6]
    779        + (uint64_t)a[5] * a[5];
    780     VERIFY_BITS(d, 63);
    781     /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    782     u0 = d & M; d >>= 26; c += u0 * R0;
    783     VERIFY_BITS(u0, 26);
    784     VERIFY_BITS(d, 37);
    785     VERIFY_BITS(c, 61);
    786     /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    787     t0 = c & M; c >>= 26; c += u0 * R1;
    788     VERIFY_BITS(t0, 26);
    789     VERIFY_BITS(c, 37);
    790     /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    791     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */
    792 
    793     c += (uint64_t)(a[0]*2) * a[1];
    794     VERIFY_BITS(c, 62);
    795     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */
    796     d += (uint64_t)(a[2]*2) * a[9]
    797        + (uint64_t)(a[3]*2) * a[8]
    798        + (uint64_t)(a[4]*2) * a[7]
    799        + (uint64_t)(a[5]*2) * a[6];
    800     VERIFY_BITS(d, 63);
    801     /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    802     u1 = d & M; d >>= 26; c += u1 * R0;
    803     VERIFY_BITS(u1, 26);
    804     VERIFY_BITS(d, 37);
    805     VERIFY_BITS(c, 63);
    806     /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    807     t1 = c & M; c >>= 26; c += u1 * R1;
    808     VERIFY_BITS(t1, 26);
    809     VERIFY_BITS(c, 38);
    810     /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    811     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */
    812 
    813     c += (uint64_t)(a[0]*2) * a[2]
    814        + (uint64_t)a[1] * a[1];
    815     VERIFY_BITS(c, 62);
    816     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    817     d += (uint64_t)(a[3]*2) * a[9]
    818        + (uint64_t)(a[4]*2) * a[8]
    819        + (uint64_t)(a[5]*2) * a[7]
    820        + (uint64_t)a[6] * a[6];
    821     VERIFY_BITS(d, 63);
    822     /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    823     u2 = d & M; d >>= 26; c += u2 * R0;
    824     VERIFY_BITS(u2, 26);
    825     VERIFY_BITS(d, 37);
    826     VERIFY_BITS(c, 63);
    827     /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    828     t2 = c & M; c >>= 26; c += u2 * R1;
    829     VERIFY_BITS(t2, 26);
    830     VERIFY_BITS(c, 38);
    831     /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    832     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */
    833 
    834     c += (uint64_t)(a[0]*2) * a[3]
    835        + (uint64_t)(a[1]*2) * a[2];
    836     VERIFY_BITS(c, 63);
    837     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    838     d += (uint64_t)(a[4]*2) * a[9]
    839        + (uint64_t)(a[5]*2) * a[8]
    840        + (uint64_t)(a[6]*2) * a[7];
    841     VERIFY_BITS(d, 63);
    842     /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    843     u3 = d & M; d >>= 26; c += u3 * R0;
    844     VERIFY_BITS(u3, 26);
    845     VERIFY_BITS(d, 37);
    846     /* VERIFY_BITS(c, 64); */
    847     /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    848     t3 = c & M; c >>= 26; c += u3 * R1;
    849     VERIFY_BITS(t3, 26);
    850     VERIFY_BITS(c, 39);
    851     /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    852     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */
    853 
    854     c += (uint64_t)(a[0]*2) * a[4]
    855        + (uint64_t)(a[1]*2) * a[3]
    856        + (uint64_t)a[2] * a[2];
    857     VERIFY_BITS(c, 63);
    858     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    859     d += (uint64_t)(a[5]*2) * a[9]
    860        + (uint64_t)(a[6]*2) * a[8]
    861        + (uint64_t)a[7] * a[7];
    862     VERIFY_BITS(d, 62);
    863     /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    864     u4 = d & M; d >>= 26; c += u4 * R0;
    865     VERIFY_BITS(u4, 26);
    866     VERIFY_BITS(d, 36);
    867     /* VERIFY_BITS(c, 64); */
    868     /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    869     t4 = c & M; c >>= 26; c += u4 * R1;
    870     VERIFY_BITS(t4, 26);
    871     VERIFY_BITS(c, 39);
    872     /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    873     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */
    874 
    875     c += (uint64_t)(a[0]*2) * a[5]
    876        + (uint64_t)(a[1]*2) * a[4]
    877        + (uint64_t)(a[2]*2) * a[3];
    878     VERIFY_BITS(c, 63);
    879     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    880     d += (uint64_t)(a[6]*2) * a[9]
    881        + (uint64_t)(a[7]*2) * a[8];
    882     VERIFY_BITS(d, 62);
    883     /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    884     u5 = d & M; d >>= 26; c += u5 * R0;
    885     VERIFY_BITS(u5, 26);
    886     VERIFY_BITS(d, 36);
    887     /* VERIFY_BITS(c, 64); */
    888     /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    889     t5 = c & M; c >>= 26; c += u5 * R1;
    890     VERIFY_BITS(t5, 26);
    891     VERIFY_BITS(c, 39);
    892     /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    893     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */
    894 
    895     c += (uint64_t)(a[0]*2) * a[6]
    896        + (uint64_t)(a[1]*2) * a[5]
    897        + (uint64_t)(a[2]*2) * a[4]
    898        + (uint64_t)a[3] * a[3];
    899     VERIFY_BITS(c, 63);
    900     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    901     d += (uint64_t)(a[7]*2) * a[9]
    902        + (uint64_t)a[8] * a[8];
    903     VERIFY_BITS(d, 61);
    904     /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    905     u6 = d & M; d >>= 26; c += u6 * R0;
    906     VERIFY_BITS(u6, 26);
    907     VERIFY_BITS(d, 35);
    908     /* VERIFY_BITS(c, 64); */
    909     /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    910     t6 = c & M; c >>= 26; c += u6 * R1;
    911     VERIFY_BITS(t6, 26);
    912     VERIFY_BITS(c, 39);
    913     /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    914     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */
    915 
    916     c += (uint64_t)(a[0]*2) * a[7]
    917        + (uint64_t)(a[1]*2) * a[6]
    918        + (uint64_t)(a[2]*2) * a[5]
    919        + (uint64_t)(a[3]*2) * a[4];
    920     /* VERIFY_BITS(c, 64); */
    921     VERIFY_CHECK(c <= 0x8000007C00000007ULL);
    922     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    923     d += (uint64_t)(a[8]*2) * a[9];
    924     VERIFY_BITS(d, 58);
    925     /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    926     u7 = d & M; d >>= 26; c += u7 * R0;
    927     VERIFY_BITS(u7, 26);
    928     VERIFY_BITS(d, 32);
    929     /* VERIFY_BITS(c, 64); */
    930     VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL);
    931     /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    932     t7 = c & M; c >>= 26; c += u7 * R1;
    933     VERIFY_BITS(t7, 26);
    934     VERIFY_BITS(c, 38);
    935     /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    936     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */
    937 
    938     c += (uint64_t)(a[0]*2) * a[8]
    939        + (uint64_t)(a[1]*2) * a[7]
    940        + (uint64_t)(a[2]*2) * a[6]
    941        + (uint64_t)(a[3]*2) * a[5]
    942        + (uint64_t)a[4] * a[4];
    943     /* VERIFY_BITS(c, 64); */
    944     VERIFY_CHECK(c <= 0x9000007B80000008ULL);
    945     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    946     d += (uint64_t)a[9] * a[9];
    947     VERIFY_BITS(d, 57);
    948     /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    949     u8 = d & M; d >>= 26; c += u8 * R0;
    950     VERIFY_BITS(u8, 26);
    951     VERIFY_BITS(d, 31);
    952     /* VERIFY_BITS(c, 64); */
    953     VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL);
    954     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    955 
    956     r[3] = t3;
    957     VERIFY_BITS(r[3], 26);
    958     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    959     r[4] = t4;
    960     VERIFY_BITS(r[4], 26);
    961     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    962     r[5] = t5;
    963     VERIFY_BITS(r[5], 26);
    964     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    965     r[6] = t6;
    966     VERIFY_BITS(r[6], 26);
    967     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    968     r[7] = t7;
    969     VERIFY_BITS(r[7], 26);
    970     /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    971 
    972     r[8] = c & M; c >>= 26; c += u8 * R1;
    973     VERIFY_BITS(r[8], 26);
    974     VERIFY_BITS(c, 39);
    975     /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    976     /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    977     c   += d * R0 + t9;
    978     VERIFY_BITS(c, 45);
    979     /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    980     r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4);
    981     VERIFY_BITS(r[9], 22);
    982     VERIFY_BITS(c, 46);
    983     /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    984     /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    985     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    986 
    987     d    = c * (R0 >> 4) + t0;
    988     VERIFY_BITS(d, 56);
    989     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    990     r[0] = d & M; d >>= 26;
    991     VERIFY_BITS(r[0], 26);
    992     VERIFY_BITS(d, 30);
    993     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    994     d   += c * (R1 >> 4) + t1;
    995     VERIFY_BITS(d, 53);
    996     VERIFY_CHECK(d <= 0x10000003FFFFBFULL);
    997     /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    998     /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
    999     r[1] = d & M; d >>= 26;
   1000     VERIFY_BITS(r[1], 26);
   1001     VERIFY_BITS(d, 27);
   1002     VERIFY_CHECK(d <= 0x4000000ULL);
   1003     /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   1004     d   += t2;
   1005     VERIFY_BITS(d, 27);
   1006     /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   1007     r[2] = d;
   1008     VERIFY_BITS(r[2], 27);
   1009     /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
   1010 }
   1011 #endif
   1012 
   1013 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_mul(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a, const haskellsecp256k1_v0_1_0_fe * SECP256K1_RESTRICT b) {
   1014     haskellsecp256k1_v0_1_0_fe_mul_inner(r->n, a->n, b->n);
   1015 }
   1016 
   1017 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_sqr(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a) {
   1018     haskellsecp256k1_v0_1_0_fe_sqr_inner(r->n, a->n);
   1019 }
   1020 
   1021 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_cmov(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a, int flag) {
   1022     uint32_t mask0, mask1;
   1023     volatile int vflag = flag;
   1024     SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
   1025     mask0 = vflag + ~((uint32_t)0);
   1026     mask1 = ~mask0;
   1027     r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
   1028     r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
   1029     r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
   1030     r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
   1031     r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
   1032     r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
   1033     r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
   1034     r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
   1035     r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1);
   1036     r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1);
   1037 }
   1038 
   1039 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_fe_impl_half(haskellsecp256k1_v0_1_0_fe *r) {
   1040     uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
   1041              t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
   1042     uint32_t one = (uint32_t)1;
   1043     uint32_t mask = -(t0 & one) >> 6;
   1044 
   1045     /* Bounds analysis (over the rationals).
   1046      *
   1047      * Let m = r->magnitude
   1048      *     C = 0x3FFFFFFUL * 2
   1049      *     D = 0x03FFFFFUL * 2
   1050      *
   1051      * Initial bounds: t0..t8 <= C * m
   1052      *                     t9 <= D * m
   1053      */
   1054 
   1055     t0 += 0x3FFFC2FUL & mask;
   1056     t1 += 0x3FFFFBFUL & mask;
   1057     t2 += mask;
   1058     t3 += mask;
   1059     t4 += mask;
   1060     t5 += mask;
   1061     t6 += mask;
   1062     t7 += mask;
   1063     t8 += mask;
   1064     t9 += mask >> 4;
   1065 
   1066     VERIFY_CHECK((t0 & one) == 0);
   1067 
   1068     /* t0..t8: added <= C/2
   1069      *     t9: added <= D/2
   1070      *
   1071      * Current bounds: t0..t8 <= C * (m + 1/2)
   1072      *                     t9 <= D * (m + 1/2)
   1073      */
   1074 
   1075     r->n[0] = (t0 >> 1) + ((t1 & one) << 25);
   1076     r->n[1] = (t1 >> 1) + ((t2 & one) << 25);
   1077     r->n[2] = (t2 >> 1) + ((t3 & one) << 25);
   1078     r->n[3] = (t3 >> 1) + ((t4 & one) << 25);
   1079     r->n[4] = (t4 >> 1) + ((t5 & one) << 25);
   1080     r->n[5] = (t5 >> 1) + ((t6 & one) << 25);
   1081     r->n[6] = (t6 >> 1) + ((t7 & one) << 25);
   1082     r->n[7] = (t7 >> 1) + ((t8 & one) << 25);
   1083     r->n[8] = (t8 >> 1) + ((t9 & one) << 25);
   1084     r->n[9] = (t9 >> 1);
   1085 
   1086     /* t0..t8: shifted right and added <= C/4 + 1/2
   1087      *     t9: shifted right
   1088      *
   1089      * Current bounds: t0..t8 <= C * (m/2 + 1/2)
   1090      *                     t9 <= D * (m/2 + 1/4)
   1091      *
   1092      * Therefore the output magnitude (M) has to be set such that:
   1093      *     t0..t8: C * M >= C * (m/2 + 1/2)
   1094      *         t9: D * M >= D * (m/2 + 1/4)
   1095      *
   1096      * It suffices for all limbs that, for any input magnitude m:
   1097      *     M >= m/2 + 1/2
   1098      *
   1099      * and since we want the smallest such integer value for M:
   1100      *     M == floor(m/2) + 1
   1101      */
   1102 }
   1103 
   1104 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_fe_storage_cmov(haskellsecp256k1_v0_1_0_fe_storage *r, const haskellsecp256k1_v0_1_0_fe_storage *a, int flag) {
   1105     uint32_t mask0, mask1;
   1106     volatile int vflag = flag;
   1107     SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n));
   1108     mask0 = vflag + ~((uint32_t)0);
   1109     mask1 = ~mask0;
   1110     r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1);
   1111     r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1);
   1112     r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1);
   1113     r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1);
   1114     r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1);
   1115     r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1);
   1116     r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1);
   1117     r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1);
   1118 }
   1119 
   1120 static void haskellsecp256k1_v0_1_0_fe_impl_to_storage(haskellsecp256k1_v0_1_0_fe_storage *r, const haskellsecp256k1_v0_1_0_fe *a) {
   1121     r->n[0] = a->n[0] | a->n[1] << 26;
   1122     r->n[1] = a->n[1] >> 6 | a->n[2] << 20;
   1123     r->n[2] = a->n[2] >> 12 | a->n[3] << 14;
   1124     r->n[3] = a->n[3] >> 18 | a->n[4] << 8;
   1125     r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28;
   1126     r->n[5] = a->n[6] >> 4 | a->n[7] << 22;
   1127     r->n[6] = a->n[7] >> 10 | a->n[8] << 16;
   1128     r->n[7] = a->n[8] >> 16 | a->n[9] << 10;
   1129 }
   1130 
   1131 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_fe_impl_from_storage(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe_storage *a) {
   1132     r->n[0] = a->n[0] & 0x3FFFFFFUL;
   1133     r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL);
   1134     r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL);
   1135     r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL);
   1136     r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL);
   1137     r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL;
   1138     r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL);
   1139     r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL);
   1140     r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL);
   1141     r->n[9] = a->n[7] >> 10;
   1142 }
   1143 
   1144 static void haskellsecp256k1_v0_1_0_fe_from_signed30(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_modinv32_signed30 *a) {
   1145     const uint32_t M26 = UINT32_MAX >> 6;
   1146     const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4],
   1147                    a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8];
   1148 
   1149     /* The output from haskellsecp256k1_v0_1_0_modinv32{_var} should be normalized to range [0,modulus), and
   1150      * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8).
   1151      */
   1152     VERIFY_CHECK(a0 >> 30 == 0);
   1153     VERIFY_CHECK(a1 >> 30 == 0);
   1154     VERIFY_CHECK(a2 >> 30 == 0);
   1155     VERIFY_CHECK(a3 >> 30 == 0);
   1156     VERIFY_CHECK(a4 >> 30 == 0);
   1157     VERIFY_CHECK(a5 >> 30 == 0);
   1158     VERIFY_CHECK(a6 >> 30 == 0);
   1159     VERIFY_CHECK(a7 >> 30 == 0);
   1160     VERIFY_CHECK(a8 >> 16 == 0);
   1161 
   1162     r->n[0] =  a0                   & M26;
   1163     r->n[1] = (a0 >> 26 | a1 <<  4) & M26;
   1164     r->n[2] = (a1 >> 22 | a2 <<  8) & M26;
   1165     r->n[3] = (a2 >> 18 | a3 << 12) & M26;
   1166     r->n[4] = (a3 >> 14 | a4 << 16) & M26;
   1167     r->n[5] = (a4 >> 10 | a5 << 20) & M26;
   1168     r->n[6] = (a5 >>  6 | a6 << 24) & M26;
   1169     r->n[7] = (a6 >>  2           ) & M26;
   1170     r->n[8] = (a6 >> 28 | a7 <<  2) & M26;
   1171     r->n[9] = (a7 >> 24 | a8 <<  6);
   1172 }
   1173 
   1174 static void haskellsecp256k1_v0_1_0_fe_to_signed30(haskellsecp256k1_v0_1_0_modinv32_signed30 *r, const haskellsecp256k1_v0_1_0_fe *a) {
   1175     const uint32_t M30 = UINT32_MAX >> 2;
   1176     const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4],
   1177                    a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9];
   1178 
   1179     r->v[0] = (a0       | a1 << 26) & M30;
   1180     r->v[1] = (a1 >>  4 | a2 << 22) & M30;
   1181     r->v[2] = (a2 >>  8 | a3 << 18) & M30;
   1182     r->v[3] = (a3 >> 12 | a4 << 14) & M30;
   1183     r->v[4] = (a4 >> 16 | a5 << 10) & M30;
   1184     r->v[5] = (a5 >> 20 | a6 <<  6) & M30;
   1185     r->v[6] = (a6 >> 24 | a7 <<  2
   1186                         | a8 << 28) & M30;
   1187     r->v[7] = (a8 >>  2 | a9 << 24) & M30;
   1188     r->v[8] =  a9 >>  6;
   1189 }
   1190 
   1191 static const haskellsecp256k1_v0_1_0_modinv32_modinfo haskellsecp256k1_v0_1_0_const_modinfo_fe = {
   1192     {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}},
   1193     0x2DDACACFL
   1194 };
   1195 
   1196 static void haskellsecp256k1_v0_1_0_fe_impl_inv(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *x) {
   1197     haskellsecp256k1_v0_1_0_fe tmp = *x;
   1198     haskellsecp256k1_v0_1_0_modinv32_signed30 s;
   1199 
   1200     haskellsecp256k1_v0_1_0_fe_normalize(&tmp);
   1201     haskellsecp256k1_v0_1_0_fe_to_signed30(&s, &tmp);
   1202     haskellsecp256k1_v0_1_0_modinv32(&s, &haskellsecp256k1_v0_1_0_const_modinfo_fe);
   1203     haskellsecp256k1_v0_1_0_fe_from_signed30(r, &s);
   1204 }
   1205 
   1206 static void haskellsecp256k1_v0_1_0_fe_impl_inv_var(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *x) {
   1207     haskellsecp256k1_v0_1_0_fe tmp = *x;
   1208     haskellsecp256k1_v0_1_0_modinv32_signed30 s;
   1209 
   1210     haskellsecp256k1_v0_1_0_fe_normalize_var(&tmp);
   1211     haskellsecp256k1_v0_1_0_fe_to_signed30(&s, &tmp);
   1212     haskellsecp256k1_v0_1_0_modinv32_var(&s, &haskellsecp256k1_v0_1_0_const_modinfo_fe);
   1213     haskellsecp256k1_v0_1_0_fe_from_signed30(r, &s);
   1214 }
   1215 
   1216 static int haskellsecp256k1_v0_1_0_fe_impl_is_square_var(const haskellsecp256k1_v0_1_0_fe *x) {
   1217     haskellsecp256k1_v0_1_0_fe tmp;
   1218     haskellsecp256k1_v0_1_0_modinv32_signed30 s;
   1219     int jac, ret;
   1220 
   1221     tmp = *x;
   1222     haskellsecp256k1_v0_1_0_fe_normalize_var(&tmp);
   1223     /* haskellsecp256k1_v0_1_0_jacobi32_maybe_var cannot deal with input 0. */
   1224     if (haskellsecp256k1_v0_1_0_fe_is_zero(&tmp)) return 1;
   1225     haskellsecp256k1_v0_1_0_fe_to_signed30(&s, &tmp);
   1226     jac = haskellsecp256k1_v0_1_0_jacobi32_maybe_var(&s, &haskellsecp256k1_v0_1_0_const_modinfo_fe);
   1227     if (jac == 0) {
   1228         /* haskellsecp256k1_v0_1_0_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back
   1229          * to computing a square root. This should be extremely rare with random
   1230          * input (except in VERIFY mode, where a lower iteration count is used). */
   1231         haskellsecp256k1_v0_1_0_fe dummy;
   1232         ret = haskellsecp256k1_v0_1_0_fe_sqrt(&dummy, &tmp);
   1233     } else {
   1234         ret = jac >= 0;
   1235     }
   1236     return ret;
   1237 }
   1238 
   1239 #endif /* SECP256K1_FIELD_REPR_IMPL_H */