csecp256k1

Haskell FFI bindings to bitcoin-core/secp256k1 (docs.ppad.tech/csecp256k1).
git clone git://git.ppad.tech/csecp256k1.git
Log | Files | Refs | README | LICENSE

scalar_4x64_impl.h (35385B)


      1 /***********************************************************************
      2  * Copyright (c) 2013, 2014 Pieter Wuille                              *
      3  * Distributed under the MIT software license, see the accompanying    *
      4  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
      5  ***********************************************************************/
      6 
      7 #ifndef SECP256K1_SCALAR_REPR_IMPL_H
      8 #define SECP256K1_SCALAR_REPR_IMPL_H
      9 
     10 #include "checkmem.h"
     11 #include "int128.h"
     12 #include "modinv64_impl.h"
     13 #include "util.h"
     14 
     15 /* Limbs of the secp256k1 order. */
     16 #define SECP256K1_N_0 ((uint64_t)0xBFD25E8CD0364141ULL)
     17 #define SECP256K1_N_1 ((uint64_t)0xBAAEDCE6AF48A03BULL)
     18 #define SECP256K1_N_2 ((uint64_t)0xFFFFFFFFFFFFFFFEULL)
     19 #define SECP256K1_N_3 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
     20 
     21 /* Limbs of 2^256 minus the secp256k1 order. */
     22 #define SECP256K1_N_C_0 (~SECP256K1_N_0 + 1)
     23 #define SECP256K1_N_C_1 (~SECP256K1_N_1)
     24 #define SECP256K1_N_C_2 (1)
     25 
     26 /* Limbs of half the secp256k1 order. */
     27 #define SECP256K1_N_H_0 ((uint64_t)0xDFE92F46681B20A0ULL)
     28 #define SECP256K1_N_H_1 ((uint64_t)0x5D576E7357A4501DULL)
     29 #define SECP256K1_N_H_2 ((uint64_t)0xFFFFFFFFFFFFFFFFULL)
     30 #define SECP256K1_N_H_3 ((uint64_t)0x7FFFFFFFFFFFFFFFULL)
     31 
     32 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_scalar_clear(haskellsecp256k1_v0_1_0_scalar *r) {
     33     r->d[0] = 0;
     34     r->d[1] = 0;
     35     r->d[2] = 0;
     36     r->d[3] = 0;
     37 }
     38 
     39 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_scalar_set_int(haskellsecp256k1_v0_1_0_scalar *r, unsigned int v) {
     40     r->d[0] = v;
     41     r->d[1] = 0;
     42     r->d[2] = 0;
     43     r->d[3] = 0;
     44 
     45     SECP256K1_SCALAR_VERIFY(r);
     46 }
     47 
     48 SECP256K1_INLINE static unsigned int haskellsecp256k1_v0_1_0_scalar_get_bits(const haskellsecp256k1_v0_1_0_scalar *a, unsigned int offset, unsigned int count) {
     49     SECP256K1_SCALAR_VERIFY(a);
     50     VERIFY_CHECK((offset + count - 1) >> 6 == offset >> 6);
     51 
     52     return (a->d[offset >> 6] >> (offset & 0x3F)) & ((((uint64_t)1) << count) - 1);
     53 }
     54 
     55 SECP256K1_INLINE static unsigned int haskellsecp256k1_v0_1_0_scalar_get_bits_var(const haskellsecp256k1_v0_1_0_scalar *a, unsigned int offset, unsigned int count) {
     56     SECP256K1_SCALAR_VERIFY(a);
     57     VERIFY_CHECK(count < 32);
     58     VERIFY_CHECK(offset + count <= 256);
     59 
     60     if ((offset + count - 1) >> 6 == offset >> 6) {
     61         return haskellsecp256k1_v0_1_0_scalar_get_bits(a, offset, count);
     62     } else {
     63         VERIFY_CHECK((offset >> 6) + 1 < 4);
     64         return ((a->d[offset >> 6] >> (offset & 0x3F)) | (a->d[(offset >> 6) + 1] << (64 - (offset & 0x3F)))) & ((((uint64_t)1) << count) - 1);
     65     }
     66 }
     67 
     68 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_scalar_check_overflow(const haskellsecp256k1_v0_1_0_scalar *a) {
     69     int yes = 0;
     70     int no = 0;
     71     no |= (a->d[3] < SECP256K1_N_3); /* No need for a > check. */
     72     no |= (a->d[2] < SECP256K1_N_2);
     73     yes |= (a->d[2] > SECP256K1_N_2) & ~no;
     74     no |= (a->d[1] < SECP256K1_N_1);
     75     yes |= (a->d[1] > SECP256K1_N_1) & ~no;
     76     yes |= (a->d[0] >= SECP256K1_N_0) & ~no;
     77     return yes;
     78 }
     79 
     80 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_scalar_reduce(haskellsecp256k1_v0_1_0_scalar *r, unsigned int overflow) {
     81     haskellsecp256k1_v0_1_0_uint128 t;
     82     VERIFY_CHECK(overflow <= 1);
     83 
     84     haskellsecp256k1_v0_1_0_u128_from_u64(&t, r->d[0]);
     85     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, overflow * SECP256K1_N_C_0);
     86     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
     87     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[1]);
     88     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, overflow * SECP256K1_N_C_1);
     89     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
     90     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[2]);
     91     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, overflow * SECP256K1_N_C_2);
     92     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
     93     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[3]);
     94     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&t);
     95 
     96     SECP256K1_SCALAR_VERIFY(r);
     97     return overflow;
     98 }
     99 
    100 static int haskellsecp256k1_v0_1_0_scalar_add(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *a, const haskellsecp256k1_v0_1_0_scalar *b) {
    101     int overflow;
    102     haskellsecp256k1_v0_1_0_uint128 t;
    103     SECP256K1_SCALAR_VERIFY(a);
    104     SECP256K1_SCALAR_VERIFY(b);
    105 
    106     haskellsecp256k1_v0_1_0_u128_from_u64(&t, a->d[0]);
    107     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, b->d[0]);
    108     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    109     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, a->d[1]);
    110     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, b->d[1]);
    111     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    112     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, a->d[2]);
    113     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, b->d[2]);
    114     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    115     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, a->d[3]);
    116     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, b->d[3]);
    117     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    118     overflow = haskellsecp256k1_v0_1_0_u128_to_u64(&t) + haskellsecp256k1_v0_1_0_scalar_check_overflow(r);
    119     VERIFY_CHECK(overflow == 0 || overflow == 1);
    120     haskellsecp256k1_v0_1_0_scalar_reduce(r, overflow);
    121 
    122     SECP256K1_SCALAR_VERIFY(r);
    123     return overflow;
    124 }
    125 
    126 static void haskellsecp256k1_v0_1_0_scalar_cadd_bit(haskellsecp256k1_v0_1_0_scalar *r, unsigned int bit, int flag) {
    127     haskellsecp256k1_v0_1_0_uint128 t;
    128     volatile int vflag = flag;
    129     SECP256K1_SCALAR_VERIFY(r);
    130     VERIFY_CHECK(bit < 256);
    131 
    132     bit += ((uint32_t) vflag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
    133     haskellsecp256k1_v0_1_0_u128_from_u64(&t, r->d[0]);
    134     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
    135     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    136     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[1]);
    137     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
    138     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    139     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[2]);
    140     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 2)) << (bit & 0x3F));
    141     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    142     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[3]);
    143     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ((uint64_t)((bit >> 6) == 3)) << (bit & 0x3F));
    144     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&t);
    145 
    146     SECP256K1_SCALAR_VERIFY(r);
    147     VERIFY_CHECK(haskellsecp256k1_v0_1_0_u128_hi_u64(&t) == 0);
    148 }
    149 
    150 static void haskellsecp256k1_v0_1_0_scalar_set_b32(haskellsecp256k1_v0_1_0_scalar *r, const unsigned char *b32, int *overflow) {
    151     int over;
    152     r->d[0] = haskellsecp256k1_v0_1_0_read_be64(&b32[24]);
    153     r->d[1] = haskellsecp256k1_v0_1_0_read_be64(&b32[16]);
    154     r->d[2] = haskellsecp256k1_v0_1_0_read_be64(&b32[8]);
    155     r->d[3] = haskellsecp256k1_v0_1_0_read_be64(&b32[0]);
    156     over = haskellsecp256k1_v0_1_0_scalar_reduce(r, haskellsecp256k1_v0_1_0_scalar_check_overflow(r));
    157     if (overflow) {
    158         *overflow = over;
    159     }
    160 
    161     SECP256K1_SCALAR_VERIFY(r);
    162 }
    163 
    164 static void haskellsecp256k1_v0_1_0_scalar_get_b32(unsigned char *bin, const haskellsecp256k1_v0_1_0_scalar* a) {
    165     SECP256K1_SCALAR_VERIFY(a);
    166 
    167     haskellsecp256k1_v0_1_0_write_be64(&bin[0],  a->d[3]);
    168     haskellsecp256k1_v0_1_0_write_be64(&bin[8],  a->d[2]);
    169     haskellsecp256k1_v0_1_0_write_be64(&bin[16], a->d[1]);
    170     haskellsecp256k1_v0_1_0_write_be64(&bin[24], a->d[0]);
    171 }
    172 
    173 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_scalar_is_zero(const haskellsecp256k1_v0_1_0_scalar *a) {
    174     SECP256K1_SCALAR_VERIFY(a);
    175 
    176     return (a->d[0] | a->d[1] | a->d[2] | a->d[3]) == 0;
    177 }
    178 
    179 static void haskellsecp256k1_v0_1_0_scalar_negate(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *a) {
    180     uint64_t nonzero = 0xFFFFFFFFFFFFFFFFULL * (haskellsecp256k1_v0_1_0_scalar_is_zero(a) == 0);
    181     haskellsecp256k1_v0_1_0_uint128 t;
    182     SECP256K1_SCALAR_VERIFY(a);
    183 
    184     haskellsecp256k1_v0_1_0_u128_from_u64(&t, ~a->d[0]);
    185     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_0 + 1);
    186     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero; haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    187     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ~a->d[1]);
    188     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_1);
    189     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero; haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    190     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ~a->d[2]);
    191     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_2);
    192     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero; haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    193     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, ~a->d[3]);
    194     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_3);
    195     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero;
    196 
    197     SECP256K1_SCALAR_VERIFY(r);
    198 }
    199 
    200 static void haskellsecp256k1_v0_1_0_scalar_half(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *a) {
    201     /* Writing `/` for field division and `//` for integer division, we compute
    202      *
    203      *   a/2 = (a - (a&1))/2 + (a&1)/2
    204      *       = (a >> 1) + (a&1 ?    1/2 : 0)
    205      *       = (a >> 1) + (a&1 ? n//2+1 : 0),
    206      *
    207      * where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
    208      * For n//2, we have the constants SECP256K1_N_H_0, ...
    209      *
    210      * This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
    211      * - the left summand is:  a >> 1 = (a - a&1)/2 = (n-2-1)//2           = (n-3)//2
    212      * - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
    213      * Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
    214      */
    215     uint64_t mask = -(uint64_t)(a->d[0] & 1U);
    216     haskellsecp256k1_v0_1_0_uint128 t;
    217     SECP256K1_SCALAR_VERIFY(a);
    218 
    219     haskellsecp256k1_v0_1_0_u128_from_u64(&t, (a->d[0] >> 1) | (a->d[1] << 63));
    220     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, (SECP256K1_N_H_0 + 1U) & mask);
    221     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    222     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, (a->d[1] >> 1) | (a->d[2] << 63));
    223     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_H_1 & mask);
    224     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    225     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, (a->d[2] >> 1) | (a->d[3] << 63));
    226     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_H_2 & mask);
    227     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&t); haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    228     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) + (a->d[3] >> 1) + (SECP256K1_N_H_3 & mask);
    229 #ifdef VERIFY
    230     /* The line above only computed the bottom 64 bits of r->d[3]; redo the computation
    231      * in full 128 bits to make sure the top 64 bits are indeed zero. */
    232     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, a->d[3] >> 1);
    233     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_H_3 & mask);
    234     haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    235     VERIFY_CHECK(haskellsecp256k1_v0_1_0_u128_to_u64(&t) == 0);
    236 
    237     SECP256K1_SCALAR_VERIFY(r);
    238 #endif
    239 }
    240 
    241 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_scalar_is_one(const haskellsecp256k1_v0_1_0_scalar *a) {
    242     SECP256K1_SCALAR_VERIFY(a);
    243 
    244     return ((a->d[0] ^ 1) | a->d[1] | a->d[2] | a->d[3]) == 0;
    245 }
    246 
    247 static int haskellsecp256k1_v0_1_0_scalar_is_high(const haskellsecp256k1_v0_1_0_scalar *a) {
    248     int yes = 0;
    249     int no = 0;
    250     SECP256K1_SCALAR_VERIFY(a);
    251 
    252     no |= (a->d[3] < SECP256K1_N_H_3);
    253     yes |= (a->d[3] > SECP256K1_N_H_3) & ~no;
    254     no |= (a->d[2] < SECP256K1_N_H_2) & ~yes; /* No need for a > check. */
    255     no |= (a->d[1] < SECP256K1_N_H_1) & ~yes;
    256     yes |= (a->d[1] > SECP256K1_N_H_1) & ~no;
    257     yes |= (a->d[0] > SECP256K1_N_H_0) & ~no;
    258     return yes;
    259 }
    260 
    261 static int haskellsecp256k1_v0_1_0_scalar_cond_negate(haskellsecp256k1_v0_1_0_scalar *r, int flag) {
    262     /* If we are flag = 0, mask = 00...00 and this is a no-op;
    263      * if we are flag = 1, mask = 11...11 and this is identical to haskellsecp256k1_v0_1_0_scalar_negate */
    264     volatile int vflag = flag;
    265     uint64_t mask = -vflag;
    266     uint64_t nonzero = (haskellsecp256k1_v0_1_0_scalar_is_zero(r) != 0) - 1;
    267     haskellsecp256k1_v0_1_0_uint128 t;
    268     SECP256K1_SCALAR_VERIFY(r);
    269 
    270     haskellsecp256k1_v0_1_0_u128_from_u64(&t, r->d[0] ^ mask);
    271     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, (SECP256K1_N_0 + 1) & mask);
    272     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero; haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    273     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[1] ^ mask);
    274     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_1 & mask);
    275     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero; haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    276     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[2] ^ mask);
    277     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_2 & mask);
    278     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero; haskellsecp256k1_v0_1_0_u128_rshift(&t, 64);
    279     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, r->d[3] ^ mask);
    280     haskellsecp256k1_v0_1_0_u128_accum_u64(&t, SECP256K1_N_3 & mask);
    281     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&t) & nonzero;
    282 
    283     SECP256K1_SCALAR_VERIFY(r);
    284     return 2 * (mask == 0) - 1;
    285 }
    286 
    287 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
    288 
    289 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
    290 #define muladd(a,b) { \
    291     uint64_t tl, th; \
    292     { \
    293         haskellsecp256k1_v0_1_0_uint128 t; \
    294         haskellsecp256k1_v0_1_0_u128_mul(&t, a, b); \
    295         th = haskellsecp256k1_v0_1_0_u128_hi_u64(&t);  /* at most 0xFFFFFFFFFFFFFFFE */ \
    296         tl = haskellsecp256k1_v0_1_0_u128_to_u64(&t); \
    297     } \
    298     c0 += tl;                 /* overflow is handled on the next line */ \
    299     th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
    300     c1 += th;                 /* overflow is handled on the next line */ \
    301     c2 += (c1 < th);          /* never overflows by contract (verified in the next line) */ \
    302     VERIFY_CHECK((c1 >= th) || (c2 != 0)); \
    303 }
    304 
    305 /** Add a*b to the number defined by (c0,c1). c1 must never overflow. */
    306 #define muladd_fast(a,b) { \
    307     uint64_t tl, th; \
    308     { \
    309         haskellsecp256k1_v0_1_0_uint128 t; \
    310         haskellsecp256k1_v0_1_0_u128_mul(&t, a, b); \
    311         th = haskellsecp256k1_v0_1_0_u128_hi_u64(&t);  /* at most 0xFFFFFFFFFFFFFFFE */ \
    312         tl = haskellsecp256k1_v0_1_0_u128_to_u64(&t); \
    313     } \
    314     c0 += tl;                 /* overflow is handled on the next line */ \
    315     th += (c0 < tl);          /* at most 0xFFFFFFFFFFFFFFFF */ \
    316     c1 += th;                 /* never overflows by contract (verified in the next line) */ \
    317     VERIFY_CHECK(c1 >= th); \
    318 }
    319 
    320 /** Add a to the number defined by (c0,c1,c2). c2 must never overflow. */
    321 #define sumadd(a) { \
    322     unsigned int over; \
    323     c0 += (a);                  /* overflow is handled on the next line */ \
    324     over = (c0 < (a));         \
    325     c1 += over;                 /* overflow is handled on the next line */ \
    326     c2 += (c1 < over);          /* never overflows by contract */ \
    327 }
    328 
    329 /** Add a to the number defined by (c0,c1). c1 must never overflow, c2 must be zero. */
    330 #define sumadd_fast(a) { \
    331     c0 += (a);                 /* overflow is handled on the next line */ \
    332     c1 += (c0 < (a));          /* never overflows by contract (verified the next line) */ \
    333     VERIFY_CHECK((c1 != 0) | (c0 >= (a))); \
    334     VERIFY_CHECK(c2 == 0); \
    335 }
    336 
    337 /** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. */
    338 #define extract(n) { \
    339     (n) = c0; \
    340     c0 = c1; \
    341     c1 = c2; \
    342     c2 = 0; \
    343 }
    344 
    345 /** Extract the lowest 64 bits of (c0,c1,c2) into n, and left shift the number 64 bits. c2 is required to be zero. */
    346 #define extract_fast(n) { \
    347     (n) = c0; \
    348     c0 = c1; \
    349     c1 = 0; \
    350     VERIFY_CHECK(c2 == 0); \
    351 }
    352 
    353 static void haskellsecp256k1_v0_1_0_scalar_reduce_512(haskellsecp256k1_v0_1_0_scalar *r, const uint64_t *l) {
    354 #ifdef USE_ASM_X86_64
    355     /* Reduce 512 bits into 385. */
    356     uint64_t m0, m1, m2, m3, m4, m5, m6;
    357     uint64_t p0, p1, p2, p3, p4;
    358     uint64_t c;
    359 
    360     __asm__ __volatile__(
    361     /* Preload. */
    362     "movq 32(%%rsi), %%r11\n"
    363     "movq 40(%%rsi), %%r12\n"
    364     "movq 48(%%rsi), %%r13\n"
    365     "movq 56(%%rsi), %%r14\n"
    366     /* Initialize r8,r9,r10 */
    367     "movq 0(%%rsi), %%r8\n"
    368     "xorq %%r9, %%r9\n"
    369     "xorq %%r10, %%r10\n"
    370     /* (r8,r9) += n0 * c0 */
    371     "movq %8, %%rax\n"
    372     "mulq %%r11\n"
    373     "addq %%rax, %%r8\n"
    374     "adcq %%rdx, %%r9\n"
    375     /* extract m0 */
    376     "movq %%r8, %q0\n"
    377     "xorq %%r8, %%r8\n"
    378     /* (r9,r10) += l1 */
    379     "addq 8(%%rsi), %%r9\n"
    380     "adcq $0, %%r10\n"
    381     /* (r9,r10,r8) += n1 * c0 */
    382     "movq %8, %%rax\n"
    383     "mulq %%r12\n"
    384     "addq %%rax, %%r9\n"
    385     "adcq %%rdx, %%r10\n"
    386     "adcq $0, %%r8\n"
    387     /* (r9,r10,r8) += n0 * c1 */
    388     "movq %9, %%rax\n"
    389     "mulq %%r11\n"
    390     "addq %%rax, %%r9\n"
    391     "adcq %%rdx, %%r10\n"
    392     "adcq $0, %%r8\n"
    393     /* extract m1 */
    394     "movq %%r9, %q1\n"
    395     "xorq %%r9, %%r9\n"
    396     /* (r10,r8,r9) += l2 */
    397     "addq 16(%%rsi), %%r10\n"
    398     "adcq $0, %%r8\n"
    399     "adcq $0, %%r9\n"
    400     /* (r10,r8,r9) += n2 * c0 */
    401     "movq %8, %%rax\n"
    402     "mulq %%r13\n"
    403     "addq %%rax, %%r10\n"
    404     "adcq %%rdx, %%r8\n"
    405     "adcq $0, %%r9\n"
    406     /* (r10,r8,r9) += n1 * c1 */
    407     "movq %9, %%rax\n"
    408     "mulq %%r12\n"
    409     "addq %%rax, %%r10\n"
    410     "adcq %%rdx, %%r8\n"
    411     "adcq $0, %%r9\n"
    412     /* (r10,r8,r9) += n0 */
    413     "addq %%r11, %%r10\n"
    414     "adcq $0, %%r8\n"
    415     "adcq $0, %%r9\n"
    416     /* extract m2 */
    417     "movq %%r10, %q2\n"
    418     "xorq %%r10, %%r10\n"
    419     /* (r8,r9,r10) += l3 */
    420     "addq 24(%%rsi), %%r8\n"
    421     "adcq $0, %%r9\n"
    422     "adcq $0, %%r10\n"
    423     /* (r8,r9,r10) += n3 * c0 */
    424     "movq %8, %%rax\n"
    425     "mulq %%r14\n"
    426     "addq %%rax, %%r8\n"
    427     "adcq %%rdx, %%r9\n"
    428     "adcq $0, %%r10\n"
    429     /* (r8,r9,r10) += n2 * c1 */
    430     "movq %9, %%rax\n"
    431     "mulq %%r13\n"
    432     "addq %%rax, %%r8\n"
    433     "adcq %%rdx, %%r9\n"
    434     "adcq $0, %%r10\n"
    435     /* (r8,r9,r10) += n1 */
    436     "addq %%r12, %%r8\n"
    437     "adcq $0, %%r9\n"
    438     "adcq $0, %%r10\n"
    439     /* extract m3 */
    440     "movq %%r8, %q3\n"
    441     "xorq %%r8, %%r8\n"
    442     /* (r9,r10,r8) += n3 * c1 */
    443     "movq %9, %%rax\n"
    444     "mulq %%r14\n"
    445     "addq %%rax, %%r9\n"
    446     "adcq %%rdx, %%r10\n"
    447     "adcq $0, %%r8\n"
    448     /* (r9,r10,r8) += n2 */
    449     "addq %%r13, %%r9\n"
    450     "adcq $0, %%r10\n"
    451     "adcq $0, %%r8\n"
    452     /* extract m4 */
    453     "movq %%r9, %q4\n"
    454     /* (r10,r8) += n3 */
    455     "addq %%r14, %%r10\n"
    456     "adcq $0, %%r8\n"
    457     /* extract m5 */
    458     "movq %%r10, %q5\n"
    459     /* extract m6 */
    460     "movq %%r8, %q6\n"
    461     : "=&g"(m0), "=&g"(m1), "=&g"(m2), "=g"(m3), "=g"(m4), "=g"(m5), "=g"(m6)
    462     : "S"(l), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
    463     : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "cc");
    464 
    465     /* Reduce 385 bits into 258. */
    466     __asm__ __volatile__(
    467     /* Preload */
    468     "movq %q9, %%r11\n"
    469     "movq %q10, %%r12\n"
    470     "movq %q11, %%r13\n"
    471     /* Initialize (r8,r9,r10) */
    472     "movq %q5, %%r8\n"
    473     "xorq %%r9, %%r9\n"
    474     "xorq %%r10, %%r10\n"
    475     /* (r8,r9) += m4 * c0 */
    476     "movq %12, %%rax\n"
    477     "mulq %%r11\n"
    478     "addq %%rax, %%r8\n"
    479     "adcq %%rdx, %%r9\n"
    480     /* extract p0 */
    481     "movq %%r8, %q0\n"
    482     "xorq %%r8, %%r8\n"
    483     /* (r9,r10) += m1 */
    484     "addq %q6, %%r9\n"
    485     "adcq $0, %%r10\n"
    486     /* (r9,r10,r8) += m5 * c0 */
    487     "movq %12, %%rax\n"
    488     "mulq %%r12\n"
    489     "addq %%rax, %%r9\n"
    490     "adcq %%rdx, %%r10\n"
    491     "adcq $0, %%r8\n"
    492     /* (r9,r10,r8) += m4 * c1 */
    493     "movq %13, %%rax\n"
    494     "mulq %%r11\n"
    495     "addq %%rax, %%r9\n"
    496     "adcq %%rdx, %%r10\n"
    497     "adcq $0, %%r8\n"
    498     /* extract p1 */
    499     "movq %%r9, %q1\n"
    500     "xorq %%r9, %%r9\n"
    501     /* (r10,r8,r9) += m2 */
    502     "addq %q7, %%r10\n"
    503     "adcq $0, %%r8\n"
    504     "adcq $0, %%r9\n"
    505     /* (r10,r8,r9) += m6 * c0 */
    506     "movq %12, %%rax\n"
    507     "mulq %%r13\n"
    508     "addq %%rax, %%r10\n"
    509     "adcq %%rdx, %%r8\n"
    510     "adcq $0, %%r9\n"
    511     /* (r10,r8,r9) += m5 * c1 */
    512     "movq %13, %%rax\n"
    513     "mulq %%r12\n"
    514     "addq %%rax, %%r10\n"
    515     "adcq %%rdx, %%r8\n"
    516     "adcq $0, %%r9\n"
    517     /* (r10,r8,r9) += m4 */
    518     "addq %%r11, %%r10\n"
    519     "adcq $0, %%r8\n"
    520     "adcq $0, %%r9\n"
    521     /* extract p2 */
    522     "movq %%r10, %q2\n"
    523     /* (r8,r9) += m3 */
    524     "addq %q8, %%r8\n"
    525     "adcq $0, %%r9\n"
    526     /* (r8,r9) += m6 * c1 */
    527     "movq %13, %%rax\n"
    528     "mulq %%r13\n"
    529     "addq %%rax, %%r8\n"
    530     "adcq %%rdx, %%r9\n"
    531     /* (r8,r9) += m5 */
    532     "addq %%r12, %%r8\n"
    533     "adcq $0, %%r9\n"
    534     /* extract p3 */
    535     "movq %%r8, %q3\n"
    536     /* (r9) += m6 */
    537     "addq %%r13, %%r9\n"
    538     /* extract p4 */
    539     "movq %%r9, %q4\n"
    540     : "=&g"(p0), "=&g"(p1), "=&g"(p2), "=g"(p3), "=g"(p4)
    541     : "g"(m0), "g"(m1), "g"(m2), "g"(m3), "g"(m4), "g"(m5), "g"(m6), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
    542     : "rax", "rdx", "r8", "r9", "r10", "r11", "r12", "r13", "cc");
    543 
    544     /* Reduce 258 bits into 256. */
    545     __asm__ __volatile__(
    546     /* Preload */
    547     "movq %q5, %%r10\n"
    548     /* (rax,rdx) = p4 * c0 */
    549     "movq %7, %%rax\n"
    550     "mulq %%r10\n"
    551     /* (rax,rdx) += p0 */
    552     "addq %q1, %%rax\n"
    553     "adcq $0, %%rdx\n"
    554     /* extract r0 */
    555     "movq %%rax, 0(%q6)\n"
    556     /* Move to (r8,r9) */
    557     "movq %%rdx, %%r8\n"
    558     "xorq %%r9, %%r9\n"
    559     /* (r8,r9) += p1 */
    560     "addq %q2, %%r8\n"
    561     "adcq $0, %%r9\n"
    562     /* (r8,r9) += p4 * c1 */
    563     "movq %8, %%rax\n"
    564     "mulq %%r10\n"
    565     "addq %%rax, %%r8\n"
    566     "adcq %%rdx, %%r9\n"
    567     /* Extract r1 */
    568     "movq %%r8, 8(%q6)\n"
    569     "xorq %%r8, %%r8\n"
    570     /* (r9,r8) += p4 */
    571     "addq %%r10, %%r9\n"
    572     "adcq $0, %%r8\n"
    573     /* (r9,r8) += p2 */
    574     "addq %q3, %%r9\n"
    575     "adcq $0, %%r8\n"
    576     /* Extract r2 */
    577     "movq %%r9, 16(%q6)\n"
    578     "xorq %%r9, %%r9\n"
    579     /* (r8,r9) += p3 */
    580     "addq %q4, %%r8\n"
    581     "adcq $0, %%r9\n"
    582     /* Extract r3 */
    583     "movq %%r8, 24(%q6)\n"
    584     /* Extract c */
    585     "movq %%r9, %q0\n"
    586     : "=g"(c)
    587     : "g"(p0), "g"(p1), "g"(p2), "g"(p3), "g"(p4), "D"(r), "i"(SECP256K1_N_C_0), "i"(SECP256K1_N_C_1)
    588     : "rax", "rdx", "r8", "r9", "r10", "cc", "memory");
    589 #else
    590     haskellsecp256k1_v0_1_0_uint128 c128;
    591     uint64_t c, c0, c1, c2;
    592     uint64_t n0 = l[4], n1 = l[5], n2 = l[6], n3 = l[7];
    593     uint64_t m0, m1, m2, m3, m4, m5;
    594     uint32_t m6;
    595     uint64_t p0, p1, p2, p3;
    596     uint32_t p4;
    597 
    598     /* Reduce 512 bits into 385. */
    599     /* m[0..6] = l[0..3] + n[0..3] * SECP256K1_N_C. */
    600     c0 = l[0]; c1 = 0; c2 = 0;
    601     muladd_fast(n0, SECP256K1_N_C_0);
    602     extract_fast(m0);
    603     sumadd_fast(l[1]);
    604     muladd(n1, SECP256K1_N_C_0);
    605     muladd(n0, SECP256K1_N_C_1);
    606     extract(m1);
    607     sumadd(l[2]);
    608     muladd(n2, SECP256K1_N_C_0);
    609     muladd(n1, SECP256K1_N_C_1);
    610     sumadd(n0);
    611     extract(m2);
    612     sumadd(l[3]);
    613     muladd(n3, SECP256K1_N_C_0);
    614     muladd(n2, SECP256K1_N_C_1);
    615     sumadd(n1);
    616     extract(m3);
    617     muladd(n3, SECP256K1_N_C_1);
    618     sumadd(n2);
    619     extract(m4);
    620     sumadd_fast(n3);
    621     extract_fast(m5);
    622     VERIFY_CHECK(c0 <= 1);
    623     m6 = c0;
    624 
    625     /* Reduce 385 bits into 258. */
    626     /* p[0..4] = m[0..3] + m[4..6] * SECP256K1_N_C. */
    627     c0 = m0; c1 = 0; c2 = 0;
    628     muladd_fast(m4, SECP256K1_N_C_0);
    629     extract_fast(p0);
    630     sumadd_fast(m1);
    631     muladd(m5, SECP256K1_N_C_0);
    632     muladd(m4, SECP256K1_N_C_1);
    633     extract(p1);
    634     sumadd(m2);
    635     muladd(m6, SECP256K1_N_C_0);
    636     muladd(m5, SECP256K1_N_C_1);
    637     sumadd(m4);
    638     extract(p2);
    639     sumadd_fast(m3);
    640     muladd_fast(m6, SECP256K1_N_C_1);
    641     sumadd_fast(m5);
    642     extract_fast(p3);
    643     p4 = c0 + m6;
    644     VERIFY_CHECK(p4 <= 2);
    645 
    646     /* Reduce 258 bits into 256. */
    647     /* r[0..3] = p[0..3] + p[4] * SECP256K1_N_C. */
    648     haskellsecp256k1_v0_1_0_u128_from_u64(&c128, p0);
    649     haskellsecp256k1_v0_1_0_u128_accum_mul(&c128, SECP256K1_N_C_0, p4);
    650     r->d[0] = haskellsecp256k1_v0_1_0_u128_to_u64(&c128); haskellsecp256k1_v0_1_0_u128_rshift(&c128, 64);
    651     haskellsecp256k1_v0_1_0_u128_accum_u64(&c128, p1);
    652     haskellsecp256k1_v0_1_0_u128_accum_mul(&c128, SECP256K1_N_C_1, p4);
    653     r->d[1] = haskellsecp256k1_v0_1_0_u128_to_u64(&c128); haskellsecp256k1_v0_1_0_u128_rshift(&c128, 64);
    654     haskellsecp256k1_v0_1_0_u128_accum_u64(&c128, p2);
    655     haskellsecp256k1_v0_1_0_u128_accum_u64(&c128, p4);
    656     r->d[2] = haskellsecp256k1_v0_1_0_u128_to_u64(&c128); haskellsecp256k1_v0_1_0_u128_rshift(&c128, 64);
    657     haskellsecp256k1_v0_1_0_u128_accum_u64(&c128, p3);
    658     r->d[3] = haskellsecp256k1_v0_1_0_u128_to_u64(&c128);
    659     c = haskellsecp256k1_v0_1_0_u128_hi_u64(&c128);
    660 #endif
    661 
    662     /* Final reduction of r. */
    663     haskellsecp256k1_v0_1_0_scalar_reduce(r, c + haskellsecp256k1_v0_1_0_scalar_check_overflow(r));
    664 }
    665 
    666 static void haskellsecp256k1_v0_1_0_scalar_mul_512(uint64_t l[8], const haskellsecp256k1_v0_1_0_scalar *a, const haskellsecp256k1_v0_1_0_scalar *b) {
    667 #ifdef USE_ASM_X86_64
    668     const uint64_t *pb = b->d;
    669     __asm__ __volatile__(
    670     /* Preload */
    671     "movq 0(%%rdi), %%r15\n"
    672     "movq 8(%%rdi), %%rbx\n"
    673     "movq 16(%%rdi), %%rcx\n"
    674     "movq 0(%%rdx), %%r11\n"
    675     "movq 8(%%rdx), %%r12\n"
    676     "movq 16(%%rdx), %%r13\n"
    677     "movq 24(%%rdx), %%r14\n"
    678     /* (rax,rdx) = a0 * b0 */
    679     "movq %%r15, %%rax\n"
    680     "mulq %%r11\n"
    681     /* Extract l0 */
    682     "movq %%rax, 0(%%rsi)\n"
    683     /* (r8,r9,r10) = (rdx) */
    684     "movq %%rdx, %%r8\n"
    685     "xorq %%r9, %%r9\n"
    686     "xorq %%r10, %%r10\n"
    687     /* (r8,r9,r10) += a0 * b1 */
    688     "movq %%r15, %%rax\n"
    689     "mulq %%r12\n"
    690     "addq %%rax, %%r8\n"
    691     "adcq %%rdx, %%r9\n"
    692     "adcq $0, %%r10\n"
    693     /* (r8,r9,r10) += a1 * b0 */
    694     "movq %%rbx, %%rax\n"
    695     "mulq %%r11\n"
    696     "addq %%rax, %%r8\n"
    697     "adcq %%rdx, %%r9\n"
    698     "adcq $0, %%r10\n"
    699     /* Extract l1 */
    700     "movq %%r8, 8(%%rsi)\n"
    701     "xorq %%r8, %%r8\n"
    702     /* (r9,r10,r8) += a0 * b2 */
    703     "movq %%r15, %%rax\n"
    704     "mulq %%r13\n"
    705     "addq %%rax, %%r9\n"
    706     "adcq %%rdx, %%r10\n"
    707     "adcq $0, %%r8\n"
    708     /* (r9,r10,r8) += a1 * b1 */
    709     "movq %%rbx, %%rax\n"
    710     "mulq %%r12\n"
    711     "addq %%rax, %%r9\n"
    712     "adcq %%rdx, %%r10\n"
    713     "adcq $0, %%r8\n"
    714     /* (r9,r10,r8) += a2 * b0 */
    715     "movq %%rcx, %%rax\n"
    716     "mulq %%r11\n"
    717     "addq %%rax, %%r9\n"
    718     "adcq %%rdx, %%r10\n"
    719     "adcq $0, %%r8\n"
    720     /* Extract l2 */
    721     "movq %%r9, 16(%%rsi)\n"
    722     "xorq %%r9, %%r9\n"
    723     /* (r10,r8,r9) += a0 * b3 */
    724     "movq %%r15, %%rax\n"
    725     "mulq %%r14\n"
    726     "addq %%rax, %%r10\n"
    727     "adcq %%rdx, %%r8\n"
    728     "adcq $0, %%r9\n"
    729     /* Preload a3 */
    730     "movq 24(%%rdi), %%r15\n"
    731     /* (r10,r8,r9) += a1 * b2 */
    732     "movq %%rbx, %%rax\n"
    733     "mulq %%r13\n"
    734     "addq %%rax, %%r10\n"
    735     "adcq %%rdx, %%r8\n"
    736     "adcq $0, %%r9\n"
    737     /* (r10,r8,r9) += a2 * b1 */
    738     "movq %%rcx, %%rax\n"
    739     "mulq %%r12\n"
    740     "addq %%rax, %%r10\n"
    741     "adcq %%rdx, %%r8\n"
    742     "adcq $0, %%r9\n"
    743     /* (r10,r8,r9) += a3 * b0 */
    744     "movq %%r15, %%rax\n"
    745     "mulq %%r11\n"
    746     "addq %%rax, %%r10\n"
    747     "adcq %%rdx, %%r8\n"
    748     "adcq $0, %%r9\n"
    749     /* Extract l3 */
    750     "movq %%r10, 24(%%rsi)\n"
    751     "xorq %%r10, %%r10\n"
    752     /* (r8,r9,r10) += a1 * b3 */
    753     "movq %%rbx, %%rax\n"
    754     "mulq %%r14\n"
    755     "addq %%rax, %%r8\n"
    756     "adcq %%rdx, %%r9\n"
    757     "adcq $0, %%r10\n"
    758     /* (r8,r9,r10) += a2 * b2 */
    759     "movq %%rcx, %%rax\n"
    760     "mulq %%r13\n"
    761     "addq %%rax, %%r8\n"
    762     "adcq %%rdx, %%r9\n"
    763     "adcq $0, %%r10\n"
    764     /* (r8,r9,r10) += a3 * b1 */
    765     "movq %%r15, %%rax\n"
    766     "mulq %%r12\n"
    767     "addq %%rax, %%r8\n"
    768     "adcq %%rdx, %%r9\n"
    769     "adcq $0, %%r10\n"
    770     /* Extract l4 */
    771     "movq %%r8, 32(%%rsi)\n"
    772     "xorq %%r8, %%r8\n"
    773     /* (r9,r10,r8) += a2 * b3 */
    774     "movq %%rcx, %%rax\n"
    775     "mulq %%r14\n"
    776     "addq %%rax, %%r9\n"
    777     "adcq %%rdx, %%r10\n"
    778     "adcq $0, %%r8\n"
    779     /* (r9,r10,r8) += a3 * b2 */
    780     "movq %%r15, %%rax\n"
    781     "mulq %%r13\n"
    782     "addq %%rax, %%r9\n"
    783     "adcq %%rdx, %%r10\n"
    784     "adcq $0, %%r8\n"
    785     /* Extract l5 */
    786     "movq %%r9, 40(%%rsi)\n"
    787     /* (r10,r8) += a3 * b3 */
    788     "movq %%r15, %%rax\n"
    789     "mulq %%r14\n"
    790     "addq %%rax, %%r10\n"
    791     "adcq %%rdx, %%r8\n"
    792     /* Extract l6 */
    793     "movq %%r10, 48(%%rsi)\n"
    794     /* Extract l7 */
    795     "movq %%r8, 56(%%rsi)\n"
    796     : "+d"(pb)
    797     : "S"(l), "D"(a->d)
    798     : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory");
    799 #else
    800     /* 160 bit accumulator. */
    801     uint64_t c0 = 0, c1 = 0;
    802     uint32_t c2 = 0;
    803 
    804     /* l[0..7] = a[0..3] * b[0..3]. */
    805     muladd_fast(a->d[0], b->d[0]);
    806     extract_fast(l[0]);
    807     muladd(a->d[0], b->d[1]);
    808     muladd(a->d[1], b->d[0]);
    809     extract(l[1]);
    810     muladd(a->d[0], b->d[2]);
    811     muladd(a->d[1], b->d[1]);
    812     muladd(a->d[2], b->d[0]);
    813     extract(l[2]);
    814     muladd(a->d[0], b->d[3]);
    815     muladd(a->d[1], b->d[2]);
    816     muladd(a->d[2], b->d[1]);
    817     muladd(a->d[3], b->d[0]);
    818     extract(l[3]);
    819     muladd(a->d[1], b->d[3]);
    820     muladd(a->d[2], b->d[2]);
    821     muladd(a->d[3], b->d[1]);
    822     extract(l[4]);
    823     muladd(a->d[2], b->d[3]);
    824     muladd(a->d[3], b->d[2]);
    825     extract(l[5]);
    826     muladd_fast(a->d[3], b->d[3]);
    827     extract_fast(l[6]);
    828     VERIFY_CHECK(c1 == 0);
    829     l[7] = c0;
    830 #endif
    831 }
    832 
    833 #undef sumadd
    834 #undef sumadd_fast
    835 #undef muladd
    836 #undef muladd_fast
    837 #undef extract
    838 #undef extract_fast
    839 
    840 static void haskellsecp256k1_v0_1_0_scalar_mul(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *a, const haskellsecp256k1_v0_1_0_scalar *b) {
    841     uint64_t l[8];
    842     SECP256K1_SCALAR_VERIFY(a);
    843     SECP256K1_SCALAR_VERIFY(b);
    844 
    845     haskellsecp256k1_v0_1_0_scalar_mul_512(l, a, b);
    846     haskellsecp256k1_v0_1_0_scalar_reduce_512(r, l);
    847 
    848     SECP256K1_SCALAR_VERIFY(r);
    849 }
    850 
    851 static void haskellsecp256k1_v0_1_0_scalar_split_128(haskellsecp256k1_v0_1_0_scalar *r1, haskellsecp256k1_v0_1_0_scalar *r2, const haskellsecp256k1_v0_1_0_scalar *k) {
    852     SECP256K1_SCALAR_VERIFY(k);
    853 
    854     r1->d[0] = k->d[0];
    855     r1->d[1] = k->d[1];
    856     r1->d[2] = 0;
    857     r1->d[3] = 0;
    858     r2->d[0] = k->d[2];
    859     r2->d[1] = k->d[3];
    860     r2->d[2] = 0;
    861     r2->d[3] = 0;
    862 
    863     SECP256K1_SCALAR_VERIFY(r1);
    864     SECP256K1_SCALAR_VERIFY(r2);
    865 }
    866 
    867 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_scalar_eq(const haskellsecp256k1_v0_1_0_scalar *a, const haskellsecp256k1_v0_1_0_scalar *b) {
    868     SECP256K1_SCALAR_VERIFY(a);
    869     SECP256K1_SCALAR_VERIFY(b);
    870 
    871     return ((a->d[0] ^ b->d[0]) | (a->d[1] ^ b->d[1]) | (a->d[2] ^ b->d[2]) | (a->d[3] ^ b->d[3])) == 0;
    872 }
    873 
    874 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_scalar_mul_shift_var(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *a, const haskellsecp256k1_v0_1_0_scalar *b, unsigned int shift) {
    875     uint64_t l[8];
    876     unsigned int shiftlimbs;
    877     unsigned int shiftlow;
    878     unsigned int shifthigh;
    879     SECP256K1_SCALAR_VERIFY(a);
    880     SECP256K1_SCALAR_VERIFY(b);
    881     VERIFY_CHECK(shift >= 256);
    882 
    883     haskellsecp256k1_v0_1_0_scalar_mul_512(l, a, b);
    884     shiftlimbs = shift >> 6;
    885     shiftlow = shift & 0x3F;
    886     shifthigh = 64 - shiftlow;
    887     r->d[0] = shift < 512 ? (l[0 + shiftlimbs] >> shiftlow | (shift < 448 && shiftlow ? (l[1 + shiftlimbs] << shifthigh) : 0)) : 0;
    888     r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
    889     r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
    890     r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
    891     haskellsecp256k1_v0_1_0_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
    892 
    893     SECP256K1_SCALAR_VERIFY(r);
    894 }
    895 
    896 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_scalar_cmov(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *a, int flag) {
    897     uint64_t mask0, mask1;
    898     volatile int vflag = flag;
    899     SECP256K1_SCALAR_VERIFY(a);
    900     SECP256K1_CHECKMEM_CHECK_VERIFY(r->d, sizeof(r->d));
    901 
    902     mask0 = vflag + ~((uint64_t)0);
    903     mask1 = ~mask0;
    904     r->d[0] = (r->d[0] & mask0) | (a->d[0] & mask1);
    905     r->d[1] = (r->d[1] & mask0) | (a->d[1] & mask1);
    906     r->d[2] = (r->d[2] & mask0) | (a->d[2] & mask1);
    907     r->d[3] = (r->d[3] & mask0) | (a->d[3] & mask1);
    908 
    909     SECP256K1_SCALAR_VERIFY(r);
    910 }
    911 
    912 static void haskellsecp256k1_v0_1_0_scalar_from_signed62(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_modinv64_signed62 *a) {
    913     const uint64_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4];
    914 
    915     /* The output from haskellsecp256k1_v0_1_0_modinv64{_var} should be normalized to range [0,modulus), and
    916      * have limbs in [0,2^62). The modulus is < 2^256, so the top limb must be below 2^(256-62*4).
    917      */
    918     VERIFY_CHECK(a0 >> 62 == 0);
    919     VERIFY_CHECK(a1 >> 62 == 0);
    920     VERIFY_CHECK(a2 >> 62 == 0);
    921     VERIFY_CHECK(a3 >> 62 == 0);
    922     VERIFY_CHECK(a4 >> 8 == 0);
    923 
    924     r->d[0] = a0      | a1 << 62;
    925     r->d[1] = a1 >> 2 | a2 << 60;
    926     r->d[2] = a2 >> 4 | a3 << 58;
    927     r->d[3] = a3 >> 6 | a4 << 56;
    928 
    929     SECP256K1_SCALAR_VERIFY(r);
    930 }
    931 
    932 static void haskellsecp256k1_v0_1_0_scalar_to_signed62(haskellsecp256k1_v0_1_0_modinv64_signed62 *r, const haskellsecp256k1_v0_1_0_scalar *a) {
    933     const uint64_t M62 = UINT64_MAX >> 2;
    934     const uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3];
    935     SECP256K1_SCALAR_VERIFY(a);
    936 
    937     r->v[0] =  a0                   & M62;
    938     r->v[1] = (a0 >> 62 | a1 <<  2) & M62;
    939     r->v[2] = (a1 >> 60 | a2 <<  4) & M62;
    940     r->v[3] = (a2 >> 58 | a3 <<  6) & M62;
    941     r->v[4] =  a3 >> 56;
    942 }
    943 
    944 static const haskellsecp256k1_v0_1_0_modinv64_modinfo haskellsecp256k1_v0_1_0_const_modinfo_scalar = {
    945     {{0x3FD25E8CD0364141LL, 0x2ABB739ABD2280EELL, -0x15LL, 0, 256}},
    946     0x34F20099AA774EC1LL
    947 };
    948 
    949 static void haskellsecp256k1_v0_1_0_scalar_inverse(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *x) {
    950     haskellsecp256k1_v0_1_0_modinv64_signed62 s;
    951 #ifdef VERIFY
    952     int zero_in = haskellsecp256k1_v0_1_0_scalar_is_zero(x);
    953 #endif
    954     SECP256K1_SCALAR_VERIFY(x);
    955 
    956     haskellsecp256k1_v0_1_0_scalar_to_signed62(&s, x);
    957     haskellsecp256k1_v0_1_0_modinv64(&s, &haskellsecp256k1_v0_1_0_const_modinfo_scalar);
    958     haskellsecp256k1_v0_1_0_scalar_from_signed62(r, &s);
    959 
    960     SECP256K1_SCALAR_VERIFY(r);
    961     VERIFY_CHECK(haskellsecp256k1_v0_1_0_scalar_is_zero(r) == zero_in);
    962 }
    963 
    964 static void haskellsecp256k1_v0_1_0_scalar_inverse_var(haskellsecp256k1_v0_1_0_scalar *r, const haskellsecp256k1_v0_1_0_scalar *x) {
    965     haskellsecp256k1_v0_1_0_modinv64_signed62 s;
    966 #ifdef VERIFY
    967     int zero_in = haskellsecp256k1_v0_1_0_scalar_is_zero(x);
    968 #endif
    969     SECP256K1_SCALAR_VERIFY(x);
    970 
    971     haskellsecp256k1_v0_1_0_scalar_to_signed62(&s, x);
    972     haskellsecp256k1_v0_1_0_modinv64_var(&s, &haskellsecp256k1_v0_1_0_const_modinfo_scalar);
    973     haskellsecp256k1_v0_1_0_scalar_from_signed62(r, &s);
    974 
    975     SECP256K1_SCALAR_VERIFY(r);
    976     VERIFY_CHECK(haskellsecp256k1_v0_1_0_scalar_is_zero(r) == zero_in);
    977 }
    978 
    979 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_scalar_is_even(const haskellsecp256k1_v0_1_0_scalar *a) {
    980     SECP256K1_SCALAR_VERIFY(a);
    981 
    982     return !(a->d[0] & 1);
    983 }
    984 
    985 #endif /* SECP256K1_SCALAR_REPR_IMPL_H */