field_10x26_impl.h (53716B)
1 /*********************************************************************** 2 * Copyright (c) 2013, 2014 Pieter Wuille * 3 * Distributed under the MIT software license, see the accompanying * 4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.* 5 ***********************************************************************/ 6 7 #ifndef SECP256K1_FIELD_REPR_IMPL_H 8 #define SECP256K1_FIELD_REPR_IMPL_H 9 10 #include "checkmem.h" 11 #include "util.h" 12 #include "field.h" 13 #include "modinv32_impl.h" 14 15 #ifdef VERIFY 16 static void haskellsecp256k1_v0_1_0_fe_impl_verify(const haskellsecp256k1_v0_1_0_fe *a) { 17 const uint32_t *d = a->n; 18 int m = a->normalized ? 1 : 2 * a->magnitude; 19 VERIFY_CHECK(d[0] <= 0x3FFFFFFUL * m); 20 VERIFY_CHECK(d[1] <= 0x3FFFFFFUL * m); 21 VERIFY_CHECK(d[2] <= 0x3FFFFFFUL * m); 22 VERIFY_CHECK(d[3] <= 0x3FFFFFFUL * m); 23 VERIFY_CHECK(d[4] <= 0x3FFFFFFUL * m); 24 VERIFY_CHECK(d[5] <= 0x3FFFFFFUL * m); 25 VERIFY_CHECK(d[6] <= 0x3FFFFFFUL * m); 26 VERIFY_CHECK(d[7] <= 0x3FFFFFFUL * m); 27 VERIFY_CHECK(d[8] <= 0x3FFFFFFUL * m); 28 VERIFY_CHECK(d[9] <= 0x03FFFFFUL * m); 29 if (a->normalized) { 30 if (d[9] == 0x03FFFFFUL) { 31 uint32_t mid = d[8] & d[7] & d[6] & d[5] & d[4] & d[3] & d[2]; 32 if (mid == 0x3FFFFFFUL) { 33 VERIFY_CHECK((d[1] + 0x40UL + ((d[0] + 0x3D1UL) >> 26)) <= 0x3FFFFFFUL); 34 } 35 } 36 } 37 } 38 #endif 39 40 static void haskellsecp256k1_v0_1_0_fe_impl_get_bounds(haskellsecp256k1_v0_1_0_fe *r, int m) { 41 r->n[0] = 0x3FFFFFFUL * 2 * m; 42 r->n[1] = 0x3FFFFFFUL * 2 * m; 43 r->n[2] = 0x3FFFFFFUL * 2 * m; 44 r->n[3] = 0x3FFFFFFUL * 2 * m; 45 r->n[4] = 0x3FFFFFFUL * 2 * m; 46 r->n[5] = 0x3FFFFFFUL * 2 * m; 47 r->n[6] = 0x3FFFFFFUL * 2 * m; 48 r->n[7] = 0x3FFFFFFUL * 2 * m; 49 r->n[8] = 0x3FFFFFFUL * 2 * m; 50 r->n[9] = 0x03FFFFFUL * 2 * m; 51 } 52 53 static void haskellsecp256k1_v0_1_0_fe_impl_normalize(haskellsecp256k1_v0_1_0_fe *r) { 54 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 55 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 56 57 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 58 uint32_t m; 59 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 60 61 /* The first pass ensures the magnitude is 1, ... */ 62 t0 += x * 0x3D1UL; t1 += (x << 6); 63 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 64 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 65 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 66 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 67 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 68 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 69 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 70 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 71 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 72 73 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 74 VERIFY_CHECK(t9 >> 23 == 0); 75 76 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 77 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 78 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 79 80 /* Apply the final reduction (for constant-time behaviour, we do it always) */ 81 t0 += x * 0x3D1UL; t1 += (x << 6); 82 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 83 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 84 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 85 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 86 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 87 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 88 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 89 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 90 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 91 92 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 93 VERIFY_CHECK(t9 >> 22 == x); 94 95 /* Mask off the possible multiple of 2^256 from the final reduction */ 96 t9 &= 0x03FFFFFUL; 97 98 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 99 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 100 } 101 102 static void haskellsecp256k1_v0_1_0_fe_impl_normalize_weak(haskellsecp256k1_v0_1_0_fe *r) { 103 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 104 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 105 106 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 107 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 108 109 /* The first pass ensures the magnitude is 1, ... */ 110 t0 += x * 0x3D1UL; t1 += (x << 6); 111 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 112 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 113 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 114 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 115 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 116 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 117 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 118 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 119 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 120 121 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 122 VERIFY_CHECK(t9 >> 23 == 0); 123 124 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 125 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 126 } 127 128 static void haskellsecp256k1_v0_1_0_fe_impl_normalize_var(haskellsecp256k1_v0_1_0_fe *r) { 129 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 130 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 131 132 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 133 uint32_t m; 134 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 135 136 /* The first pass ensures the magnitude is 1, ... */ 137 t0 += x * 0x3D1UL; t1 += (x << 6); 138 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 139 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 140 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2; 141 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3; 142 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4; 143 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5; 144 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6; 145 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7; 146 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8; 147 148 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 149 VERIFY_CHECK(t9 >> 23 == 0); 150 151 /* At most a single final reduction is needed; check if the value is >= the field characteristic */ 152 x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL) 153 & ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 154 155 if (x) { 156 t0 += 0x3D1UL; t1 += (x << 6); 157 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; 158 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; 159 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; 160 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; 161 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; 162 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; 163 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; 164 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; 165 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; 166 167 /* If t9 didn't carry to bit 22 already, then it should have after any final reduction */ 168 VERIFY_CHECK(t9 >> 22 == x); 169 170 /* Mask off the possible multiple of 2^256 from the final reduction */ 171 t9 &= 0x03FFFFFUL; 172 } 173 174 r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4; 175 r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9; 176 } 177 178 static int haskellsecp256k1_v0_1_0_fe_impl_normalizes_to_zero(const haskellsecp256k1_v0_1_0_fe *r) { 179 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 180 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 181 182 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 183 uint32_t z0, z1; 184 185 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 186 uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL; 187 188 /* The first pass ensures the magnitude is 1, ... */ 189 t0 += x * 0x3D1UL; t1 += (x << 6); 190 t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL; z0 = t0; z1 = t0 ^ 0x3D0UL; 191 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 192 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 193 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 194 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 195 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 196 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 197 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 198 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 199 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 200 201 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 202 VERIFY_CHECK(t9 >> 23 == 0); 203 204 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 205 } 206 207 static int haskellsecp256k1_v0_1_0_fe_impl_normalizes_to_zero_var(const haskellsecp256k1_v0_1_0_fe *r) { 208 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; 209 uint32_t z0, z1; 210 uint32_t x; 211 212 t0 = r->n[0]; 213 t9 = r->n[9]; 214 215 /* Reduce t9 at the start so there will be at most a single carry from the first pass */ 216 x = t9 >> 22; 217 218 /* The first pass ensures the magnitude is 1, ... */ 219 t0 += x * 0x3D1UL; 220 221 /* z0 tracks a possible raw value of 0, z1 tracks a possible raw value of P */ 222 z0 = t0 & 0x3FFFFFFUL; 223 z1 = z0 ^ 0x3D0UL; 224 225 /* Fast return path should catch the majority of cases */ 226 if ((z0 != 0UL) & (z1 != 0x3FFFFFFUL)) { 227 return 0; 228 } 229 230 t1 = r->n[1]; 231 t2 = r->n[2]; 232 t3 = r->n[3]; 233 t4 = r->n[4]; 234 t5 = r->n[5]; 235 t6 = r->n[6]; 236 t7 = r->n[7]; 237 t8 = r->n[8]; 238 239 t9 &= 0x03FFFFFUL; 240 t1 += (x << 6); 241 242 t1 += (t0 >> 26); 243 t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL; z0 |= t1; z1 &= t1 ^ 0x40UL; 244 t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; z0 |= t2; z1 &= t2; 245 t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; z0 |= t3; z1 &= t3; 246 t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; z0 |= t4; z1 &= t4; 247 t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; z0 |= t5; z1 &= t5; 248 t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; z0 |= t6; z1 &= t6; 249 t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; z0 |= t7; z1 &= t7; 250 t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; z0 |= t8; z1 &= t8; 251 z0 |= t9; z1 &= t9 ^ 0x3C00000UL; 252 253 /* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */ 254 VERIFY_CHECK(t9 >> 23 == 0); 255 256 return (z0 == 0) | (z1 == 0x3FFFFFFUL); 257 } 258 259 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_set_int(haskellsecp256k1_v0_1_0_fe *r, int a) { 260 r->n[0] = a; 261 r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0; 262 } 263 264 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_fe_impl_is_zero(const haskellsecp256k1_v0_1_0_fe *a) { 265 const uint32_t *t = a->n; 266 return (t[0] | t[1] | t[2] | t[3] | t[4] | t[5] | t[6] | t[7] | t[8] | t[9]) == 0; 267 } 268 269 SECP256K1_INLINE static int haskellsecp256k1_v0_1_0_fe_impl_is_odd(const haskellsecp256k1_v0_1_0_fe *a) { 270 return a->n[0] & 1; 271 } 272 273 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_clear(haskellsecp256k1_v0_1_0_fe *a) { 274 int i; 275 for (i=0; i<10; i++) { 276 a->n[i] = 0; 277 } 278 } 279 280 static int haskellsecp256k1_v0_1_0_fe_impl_cmp_var(const haskellsecp256k1_v0_1_0_fe *a, const haskellsecp256k1_v0_1_0_fe *b) { 281 int i; 282 for (i = 9; i >= 0; i--) { 283 if (a->n[i] > b->n[i]) { 284 return 1; 285 } 286 if (a->n[i] < b->n[i]) { 287 return -1; 288 } 289 } 290 return 0; 291 } 292 293 static void haskellsecp256k1_v0_1_0_fe_impl_set_b32_mod(haskellsecp256k1_v0_1_0_fe *r, const unsigned char *a) { 294 r->n[0] = (uint32_t)a[31] | ((uint32_t)a[30] << 8) | ((uint32_t)a[29] << 16) | ((uint32_t)(a[28] & 0x3) << 24); 295 r->n[1] = (uint32_t)((a[28] >> 2) & 0x3f) | ((uint32_t)a[27] << 6) | ((uint32_t)a[26] << 14) | ((uint32_t)(a[25] & 0xf) << 22); 296 r->n[2] = (uint32_t)((a[25] >> 4) & 0xf) | ((uint32_t)a[24] << 4) | ((uint32_t)a[23] << 12) | ((uint32_t)(a[22] & 0x3f) << 20); 297 r->n[3] = (uint32_t)((a[22] >> 6) & 0x3) | ((uint32_t)a[21] << 2) | ((uint32_t)a[20] << 10) | ((uint32_t)a[19] << 18); 298 r->n[4] = (uint32_t)a[18] | ((uint32_t)a[17] << 8) | ((uint32_t)a[16] << 16) | ((uint32_t)(a[15] & 0x3) << 24); 299 r->n[5] = (uint32_t)((a[15] >> 2) & 0x3f) | ((uint32_t)a[14] << 6) | ((uint32_t)a[13] << 14) | ((uint32_t)(a[12] & 0xf) << 22); 300 r->n[6] = (uint32_t)((a[12] >> 4) & 0xf) | ((uint32_t)a[11] << 4) | ((uint32_t)a[10] << 12) | ((uint32_t)(a[9] & 0x3f) << 20); 301 r->n[7] = (uint32_t)((a[9] >> 6) & 0x3) | ((uint32_t)a[8] << 2) | ((uint32_t)a[7] << 10) | ((uint32_t)a[6] << 18); 302 r->n[8] = (uint32_t)a[5] | ((uint32_t)a[4] << 8) | ((uint32_t)a[3] << 16) | ((uint32_t)(a[2] & 0x3) << 24); 303 r->n[9] = (uint32_t)((a[2] >> 2) & 0x3f) | ((uint32_t)a[1] << 6) | ((uint32_t)a[0] << 14); 304 } 305 306 static int haskellsecp256k1_v0_1_0_fe_impl_set_b32_limit(haskellsecp256k1_v0_1_0_fe *r, const unsigned char *a) { 307 haskellsecp256k1_v0_1_0_fe_impl_set_b32_mod(r, a); 308 return !((r->n[9] == 0x3FFFFFUL) & ((r->n[8] & r->n[7] & r->n[6] & r->n[5] & r->n[4] & r->n[3] & r->n[2]) == 0x3FFFFFFUL) & ((r->n[1] + 0x40UL + ((r->n[0] + 0x3D1UL) >> 26)) > 0x3FFFFFFUL)); 309 } 310 311 /** Convert a field element to a 32-byte big endian value. Requires the input to be normalized */ 312 static void haskellsecp256k1_v0_1_0_fe_impl_get_b32(unsigned char *r, const haskellsecp256k1_v0_1_0_fe *a) { 313 r[0] = (a->n[9] >> 14) & 0xff; 314 r[1] = (a->n[9] >> 6) & 0xff; 315 r[2] = ((a->n[9] & 0x3F) << 2) | ((a->n[8] >> 24) & 0x3); 316 r[3] = (a->n[8] >> 16) & 0xff; 317 r[4] = (a->n[8] >> 8) & 0xff; 318 r[5] = a->n[8] & 0xff; 319 r[6] = (a->n[7] >> 18) & 0xff; 320 r[7] = (a->n[7] >> 10) & 0xff; 321 r[8] = (a->n[7] >> 2) & 0xff; 322 r[9] = ((a->n[7] & 0x3) << 6) | ((a->n[6] >> 20) & 0x3f); 323 r[10] = (a->n[6] >> 12) & 0xff; 324 r[11] = (a->n[6] >> 4) & 0xff; 325 r[12] = ((a->n[6] & 0xf) << 4) | ((a->n[5] >> 22) & 0xf); 326 r[13] = (a->n[5] >> 14) & 0xff; 327 r[14] = (a->n[5] >> 6) & 0xff; 328 r[15] = ((a->n[5] & 0x3f) << 2) | ((a->n[4] >> 24) & 0x3); 329 r[16] = (a->n[4] >> 16) & 0xff; 330 r[17] = (a->n[4] >> 8) & 0xff; 331 r[18] = a->n[4] & 0xff; 332 r[19] = (a->n[3] >> 18) & 0xff; 333 r[20] = (a->n[3] >> 10) & 0xff; 334 r[21] = (a->n[3] >> 2) & 0xff; 335 r[22] = ((a->n[3] & 0x3) << 6) | ((a->n[2] >> 20) & 0x3f); 336 r[23] = (a->n[2] >> 12) & 0xff; 337 r[24] = (a->n[2] >> 4) & 0xff; 338 r[25] = ((a->n[2] & 0xf) << 4) | ((a->n[1] >> 22) & 0xf); 339 r[26] = (a->n[1] >> 14) & 0xff; 340 r[27] = (a->n[1] >> 6) & 0xff; 341 r[28] = ((a->n[1] & 0x3f) << 2) | ((a->n[0] >> 24) & 0x3); 342 r[29] = (a->n[0] >> 16) & 0xff; 343 r[30] = (a->n[0] >> 8) & 0xff; 344 r[31] = a->n[0] & 0xff; 345 } 346 347 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_negate_unchecked(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a, int m) { 348 /* For all legal values of m (0..31), the following properties hold: */ 349 VERIFY_CHECK(0x3FFFC2FUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); 350 VERIFY_CHECK(0x3FFFFBFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); 351 VERIFY_CHECK(0x3FFFFFFUL * 2 * (m + 1) >= 0x3FFFFFFUL * 2 * m); 352 VERIFY_CHECK(0x03FFFFFUL * 2 * (m + 1) >= 0x03FFFFFUL * 2 * m); 353 354 /* Due to the properties above, the left hand in the subtractions below is never less than 355 * the right hand. */ 356 r->n[0] = 0x3FFFC2FUL * 2 * (m + 1) - a->n[0]; 357 r->n[1] = 0x3FFFFBFUL * 2 * (m + 1) - a->n[1]; 358 r->n[2] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[2]; 359 r->n[3] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[3]; 360 r->n[4] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[4]; 361 r->n[5] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[5]; 362 r->n[6] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[6]; 363 r->n[7] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[7]; 364 r->n[8] = 0x3FFFFFFUL * 2 * (m + 1) - a->n[8]; 365 r->n[9] = 0x03FFFFFUL * 2 * (m + 1) - a->n[9]; 366 } 367 368 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_mul_int_unchecked(haskellsecp256k1_v0_1_0_fe *r, int a) { 369 r->n[0] *= a; 370 r->n[1] *= a; 371 r->n[2] *= a; 372 r->n[3] *= a; 373 r->n[4] *= a; 374 r->n[5] *= a; 375 r->n[6] *= a; 376 r->n[7] *= a; 377 r->n[8] *= a; 378 r->n[9] *= a; 379 } 380 381 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_add(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a) { 382 r->n[0] += a->n[0]; 383 r->n[1] += a->n[1]; 384 r->n[2] += a->n[2]; 385 r->n[3] += a->n[3]; 386 r->n[4] += a->n[4]; 387 r->n[5] += a->n[5]; 388 r->n[6] += a->n[6]; 389 r->n[7] += a->n[7]; 390 r->n[8] += a->n[8]; 391 r->n[9] += a->n[9]; 392 } 393 394 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_add_int(haskellsecp256k1_v0_1_0_fe *r, int a) { 395 r->n[0] += a; 396 } 397 398 #if defined(USE_EXTERNAL_ASM) 399 400 /* External assembler implementation */ 401 void haskellsecp256k1_v0_1_0_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b); 402 void haskellsecp256k1_v0_1_0_fe_sqr_inner(uint32_t *r, const uint32_t *a); 403 404 #else 405 406 #define VERIFY_BITS(x, n) VERIFY_CHECK(((x) >> (n)) == 0) 407 408 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) { 409 uint64_t c, d; 410 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 411 uint32_t t9, t1, t0, t2, t3, t4, t5, t6, t7; 412 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 413 414 VERIFY_BITS(a[0], 30); 415 VERIFY_BITS(a[1], 30); 416 VERIFY_BITS(a[2], 30); 417 VERIFY_BITS(a[3], 30); 418 VERIFY_BITS(a[4], 30); 419 VERIFY_BITS(a[5], 30); 420 VERIFY_BITS(a[6], 30); 421 VERIFY_BITS(a[7], 30); 422 VERIFY_BITS(a[8], 30); 423 VERIFY_BITS(a[9], 26); 424 VERIFY_BITS(b[0], 30); 425 VERIFY_BITS(b[1], 30); 426 VERIFY_BITS(b[2], 30); 427 VERIFY_BITS(b[3], 30); 428 VERIFY_BITS(b[4], 30); 429 VERIFY_BITS(b[5], 30); 430 VERIFY_BITS(b[6], 30); 431 VERIFY_BITS(b[7], 30); 432 VERIFY_BITS(b[8], 30); 433 VERIFY_BITS(b[9], 26); 434 435 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 436 * for 0 <= x <= 9, px is a shorthand for sum(a[i]*b[x-i], i=0..x). 437 * for 9 <= x <= 18, px is a shorthand for sum(a[i]*b[x-i], i=(x-9)..9) 438 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 439 */ 440 441 d = (uint64_t)a[0] * b[9] 442 + (uint64_t)a[1] * b[8] 443 + (uint64_t)a[2] * b[7] 444 + (uint64_t)a[3] * b[6] 445 + (uint64_t)a[4] * b[5] 446 + (uint64_t)a[5] * b[4] 447 + (uint64_t)a[6] * b[3] 448 + (uint64_t)a[7] * b[2] 449 + (uint64_t)a[8] * b[1] 450 + (uint64_t)a[9] * b[0]; 451 /* VERIFY_BITS(d, 64); */ 452 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 453 t9 = d & M; d >>= 26; 454 VERIFY_BITS(t9, 26); 455 VERIFY_BITS(d, 38); 456 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 457 458 c = (uint64_t)a[0] * b[0]; 459 VERIFY_BITS(c, 60); 460 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 461 d += (uint64_t)a[1] * b[9] 462 + (uint64_t)a[2] * b[8] 463 + (uint64_t)a[3] * b[7] 464 + (uint64_t)a[4] * b[6] 465 + (uint64_t)a[5] * b[5] 466 + (uint64_t)a[6] * b[4] 467 + (uint64_t)a[7] * b[3] 468 + (uint64_t)a[8] * b[2] 469 + (uint64_t)a[9] * b[1]; 470 VERIFY_BITS(d, 63); 471 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 472 u0 = d & M; d >>= 26; c += u0 * R0; 473 VERIFY_BITS(u0, 26); 474 VERIFY_BITS(d, 37); 475 VERIFY_BITS(c, 61); 476 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 477 t0 = c & M; c >>= 26; c += u0 * R1; 478 VERIFY_BITS(t0, 26); 479 VERIFY_BITS(c, 37); 480 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 481 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 482 483 c += (uint64_t)a[0] * b[1] 484 + (uint64_t)a[1] * b[0]; 485 VERIFY_BITS(c, 62); 486 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 487 d += (uint64_t)a[2] * b[9] 488 + (uint64_t)a[3] * b[8] 489 + (uint64_t)a[4] * b[7] 490 + (uint64_t)a[5] * b[6] 491 + (uint64_t)a[6] * b[5] 492 + (uint64_t)a[7] * b[4] 493 + (uint64_t)a[8] * b[3] 494 + (uint64_t)a[9] * b[2]; 495 VERIFY_BITS(d, 63); 496 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 497 u1 = d & M; d >>= 26; c += u1 * R0; 498 VERIFY_BITS(u1, 26); 499 VERIFY_BITS(d, 37); 500 VERIFY_BITS(c, 63); 501 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 502 t1 = c & M; c >>= 26; c += u1 * R1; 503 VERIFY_BITS(t1, 26); 504 VERIFY_BITS(c, 38); 505 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 506 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 507 508 c += (uint64_t)a[0] * b[2] 509 + (uint64_t)a[1] * b[1] 510 + (uint64_t)a[2] * b[0]; 511 VERIFY_BITS(c, 62); 512 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 513 d += (uint64_t)a[3] * b[9] 514 + (uint64_t)a[4] * b[8] 515 + (uint64_t)a[5] * b[7] 516 + (uint64_t)a[6] * b[6] 517 + (uint64_t)a[7] * b[5] 518 + (uint64_t)a[8] * b[4] 519 + (uint64_t)a[9] * b[3]; 520 VERIFY_BITS(d, 63); 521 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 522 u2 = d & M; d >>= 26; c += u2 * R0; 523 VERIFY_BITS(u2, 26); 524 VERIFY_BITS(d, 37); 525 VERIFY_BITS(c, 63); 526 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 527 t2 = c & M; c >>= 26; c += u2 * R1; 528 VERIFY_BITS(t2, 26); 529 VERIFY_BITS(c, 38); 530 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 531 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 532 533 c += (uint64_t)a[0] * b[3] 534 + (uint64_t)a[1] * b[2] 535 + (uint64_t)a[2] * b[1] 536 + (uint64_t)a[3] * b[0]; 537 VERIFY_BITS(c, 63); 538 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 539 d += (uint64_t)a[4] * b[9] 540 + (uint64_t)a[5] * b[8] 541 + (uint64_t)a[6] * b[7] 542 + (uint64_t)a[7] * b[6] 543 + (uint64_t)a[8] * b[5] 544 + (uint64_t)a[9] * b[4]; 545 VERIFY_BITS(d, 63); 546 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 547 u3 = d & M; d >>= 26; c += u3 * R0; 548 VERIFY_BITS(u3, 26); 549 VERIFY_BITS(d, 37); 550 /* VERIFY_BITS(c, 64); */ 551 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 552 t3 = c & M; c >>= 26; c += u3 * R1; 553 VERIFY_BITS(t3, 26); 554 VERIFY_BITS(c, 39); 555 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 556 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 557 558 c += (uint64_t)a[0] * b[4] 559 + (uint64_t)a[1] * b[3] 560 + (uint64_t)a[2] * b[2] 561 + (uint64_t)a[3] * b[1] 562 + (uint64_t)a[4] * b[0]; 563 VERIFY_BITS(c, 63); 564 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 565 d += (uint64_t)a[5] * b[9] 566 + (uint64_t)a[6] * b[8] 567 + (uint64_t)a[7] * b[7] 568 + (uint64_t)a[8] * b[6] 569 + (uint64_t)a[9] * b[5]; 570 VERIFY_BITS(d, 62); 571 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 572 u4 = d & M; d >>= 26; c += u4 * R0; 573 VERIFY_BITS(u4, 26); 574 VERIFY_BITS(d, 36); 575 /* VERIFY_BITS(c, 64); */ 576 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 577 t4 = c & M; c >>= 26; c += u4 * R1; 578 VERIFY_BITS(t4, 26); 579 VERIFY_BITS(c, 39); 580 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 581 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 582 583 c += (uint64_t)a[0] * b[5] 584 + (uint64_t)a[1] * b[4] 585 + (uint64_t)a[2] * b[3] 586 + (uint64_t)a[3] * b[2] 587 + (uint64_t)a[4] * b[1] 588 + (uint64_t)a[5] * b[0]; 589 VERIFY_BITS(c, 63); 590 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 591 d += (uint64_t)a[6] * b[9] 592 + (uint64_t)a[7] * b[8] 593 + (uint64_t)a[8] * b[7] 594 + (uint64_t)a[9] * b[6]; 595 VERIFY_BITS(d, 62); 596 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 597 u5 = d & M; d >>= 26; c += u5 * R0; 598 VERIFY_BITS(u5, 26); 599 VERIFY_BITS(d, 36); 600 /* VERIFY_BITS(c, 64); */ 601 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 602 t5 = c & M; c >>= 26; c += u5 * R1; 603 VERIFY_BITS(t5, 26); 604 VERIFY_BITS(c, 39); 605 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 606 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 607 608 c += (uint64_t)a[0] * b[6] 609 + (uint64_t)a[1] * b[5] 610 + (uint64_t)a[2] * b[4] 611 + (uint64_t)a[3] * b[3] 612 + (uint64_t)a[4] * b[2] 613 + (uint64_t)a[5] * b[1] 614 + (uint64_t)a[6] * b[0]; 615 VERIFY_BITS(c, 63); 616 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 617 d += (uint64_t)a[7] * b[9] 618 + (uint64_t)a[8] * b[8] 619 + (uint64_t)a[9] * b[7]; 620 VERIFY_BITS(d, 61); 621 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 622 u6 = d & M; d >>= 26; c += u6 * R0; 623 VERIFY_BITS(u6, 26); 624 VERIFY_BITS(d, 35); 625 /* VERIFY_BITS(c, 64); */ 626 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 627 t6 = c & M; c >>= 26; c += u6 * R1; 628 VERIFY_BITS(t6, 26); 629 VERIFY_BITS(c, 39); 630 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 631 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 632 633 c += (uint64_t)a[0] * b[7] 634 + (uint64_t)a[1] * b[6] 635 + (uint64_t)a[2] * b[5] 636 + (uint64_t)a[3] * b[4] 637 + (uint64_t)a[4] * b[3] 638 + (uint64_t)a[5] * b[2] 639 + (uint64_t)a[6] * b[1] 640 + (uint64_t)a[7] * b[0]; 641 /* VERIFY_BITS(c, 64); */ 642 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 643 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 644 d += (uint64_t)a[8] * b[9] 645 + (uint64_t)a[9] * b[8]; 646 VERIFY_BITS(d, 58); 647 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 648 u7 = d & M; d >>= 26; c += u7 * R0; 649 VERIFY_BITS(u7, 26); 650 VERIFY_BITS(d, 32); 651 /* VERIFY_BITS(c, 64); */ 652 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 653 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 654 t7 = c & M; c >>= 26; c += u7 * R1; 655 VERIFY_BITS(t7, 26); 656 VERIFY_BITS(c, 38); 657 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 658 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 659 660 c += (uint64_t)a[0] * b[8] 661 + (uint64_t)a[1] * b[7] 662 + (uint64_t)a[2] * b[6] 663 + (uint64_t)a[3] * b[5] 664 + (uint64_t)a[4] * b[4] 665 + (uint64_t)a[5] * b[3] 666 + (uint64_t)a[6] * b[2] 667 + (uint64_t)a[7] * b[1] 668 + (uint64_t)a[8] * b[0]; 669 /* VERIFY_BITS(c, 64); */ 670 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 671 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 672 d += (uint64_t)a[9] * b[9]; 673 VERIFY_BITS(d, 57); 674 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 675 u8 = d & M; d >>= 26; c += u8 * R0; 676 VERIFY_BITS(u8, 26); 677 VERIFY_BITS(d, 31); 678 /* VERIFY_BITS(c, 64); */ 679 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 680 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 681 682 r[3] = t3; 683 VERIFY_BITS(r[3], 26); 684 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 685 r[4] = t4; 686 VERIFY_BITS(r[4], 26); 687 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 688 r[5] = t5; 689 VERIFY_BITS(r[5], 26); 690 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 691 r[6] = t6; 692 VERIFY_BITS(r[6], 26); 693 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 694 r[7] = t7; 695 VERIFY_BITS(r[7], 26); 696 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 697 698 r[8] = c & M; c >>= 26; c += u8 * R1; 699 VERIFY_BITS(r[8], 26); 700 VERIFY_BITS(c, 39); 701 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 702 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 703 c += d * R0 + t9; 704 VERIFY_BITS(c, 45); 705 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 706 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 707 VERIFY_BITS(r[9], 22); 708 VERIFY_BITS(c, 46); 709 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 710 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 711 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 712 713 d = c * (R0 >> 4) + t0; 714 VERIFY_BITS(d, 56); 715 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 716 r[0] = d & M; d >>= 26; 717 VERIFY_BITS(r[0], 26); 718 VERIFY_BITS(d, 30); 719 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 720 d += c * (R1 >> 4) + t1; 721 VERIFY_BITS(d, 53); 722 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 723 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 724 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 725 r[1] = d & M; d >>= 26; 726 VERIFY_BITS(r[1], 26); 727 VERIFY_BITS(d, 27); 728 VERIFY_CHECK(d <= 0x4000000ULL); 729 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 730 d += t2; 731 VERIFY_BITS(d, 27); 732 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 733 r[2] = d; 734 VERIFY_BITS(r[2], 27); 735 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 736 } 737 738 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_sqr_inner(uint32_t *r, const uint32_t *a) { 739 uint64_t c, d; 740 uint64_t u0, u1, u2, u3, u4, u5, u6, u7, u8; 741 uint32_t t9, t0, t1, t2, t3, t4, t5, t6, t7; 742 const uint32_t M = 0x3FFFFFFUL, R0 = 0x3D10UL, R1 = 0x400UL; 743 744 VERIFY_BITS(a[0], 30); 745 VERIFY_BITS(a[1], 30); 746 VERIFY_BITS(a[2], 30); 747 VERIFY_BITS(a[3], 30); 748 VERIFY_BITS(a[4], 30); 749 VERIFY_BITS(a[5], 30); 750 VERIFY_BITS(a[6], 30); 751 VERIFY_BITS(a[7], 30); 752 VERIFY_BITS(a[8], 30); 753 VERIFY_BITS(a[9], 26); 754 755 /** [... a b c] is a shorthand for ... + a<<52 + b<<26 + c<<0 mod n. 756 * px is a shorthand for sum(a[i]*a[x-i], i=0..x). 757 * Note that [x 0 0 0 0 0 0 0 0 0 0] = [x*R1 x*R0]. 758 */ 759 760 d = (uint64_t)(a[0]*2) * a[9] 761 + (uint64_t)(a[1]*2) * a[8] 762 + (uint64_t)(a[2]*2) * a[7] 763 + (uint64_t)(a[3]*2) * a[6] 764 + (uint64_t)(a[4]*2) * a[5]; 765 /* VERIFY_BITS(d, 64); */ 766 /* [d 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 767 t9 = d & M; d >>= 26; 768 VERIFY_BITS(t9, 26); 769 VERIFY_BITS(d, 38); 770 /* [d t9 0 0 0 0 0 0 0 0 0] = [p9 0 0 0 0 0 0 0 0 0] */ 771 772 c = (uint64_t)a[0] * a[0]; 773 VERIFY_BITS(c, 60); 774 /* [d t9 0 0 0 0 0 0 0 0 c] = [p9 0 0 0 0 0 0 0 0 p0] */ 775 d += (uint64_t)(a[1]*2) * a[9] 776 + (uint64_t)(a[2]*2) * a[8] 777 + (uint64_t)(a[3]*2) * a[7] 778 + (uint64_t)(a[4]*2) * a[6] 779 + (uint64_t)a[5] * a[5]; 780 VERIFY_BITS(d, 63); 781 /* [d t9 0 0 0 0 0 0 0 0 c] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 782 u0 = d & M; d >>= 26; c += u0 * R0; 783 VERIFY_BITS(u0, 26); 784 VERIFY_BITS(d, 37); 785 VERIFY_BITS(c, 61); 786 /* [d u0 t9 0 0 0 0 0 0 0 0 c-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 787 t0 = c & M; c >>= 26; c += u0 * R1; 788 VERIFY_BITS(t0, 26); 789 VERIFY_BITS(c, 37); 790 /* [d u0 t9 0 0 0 0 0 0 0 c-u0*R1 t0-u0*R0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 791 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 0 p0] */ 792 793 c += (uint64_t)(a[0]*2) * a[1]; 794 VERIFY_BITS(c, 62); 795 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p10 p9 0 0 0 0 0 0 0 p1 p0] */ 796 d += (uint64_t)(a[2]*2) * a[9] 797 + (uint64_t)(a[3]*2) * a[8] 798 + (uint64_t)(a[4]*2) * a[7] 799 + (uint64_t)(a[5]*2) * a[6]; 800 VERIFY_BITS(d, 63); 801 /* [d 0 t9 0 0 0 0 0 0 0 c t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 802 u1 = d & M; d >>= 26; c += u1 * R0; 803 VERIFY_BITS(u1, 26); 804 VERIFY_BITS(d, 37); 805 VERIFY_BITS(c, 63); 806 /* [d u1 0 t9 0 0 0 0 0 0 0 c-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 807 t1 = c & M; c >>= 26; c += u1 * R1; 808 VERIFY_BITS(t1, 26); 809 VERIFY_BITS(c, 38); 810 /* [d u1 0 t9 0 0 0 0 0 0 c-u1*R1 t1-u1*R0 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 811 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 0 p1 p0] */ 812 813 c += (uint64_t)(a[0]*2) * a[2] 814 + (uint64_t)a[1] * a[1]; 815 VERIFY_BITS(c, 62); 816 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 817 d += (uint64_t)(a[3]*2) * a[9] 818 + (uint64_t)(a[4]*2) * a[8] 819 + (uint64_t)(a[5]*2) * a[7] 820 + (uint64_t)a[6] * a[6]; 821 VERIFY_BITS(d, 63); 822 /* [d 0 0 t9 0 0 0 0 0 0 c t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 823 u2 = d & M; d >>= 26; c += u2 * R0; 824 VERIFY_BITS(u2, 26); 825 VERIFY_BITS(d, 37); 826 VERIFY_BITS(c, 63); 827 /* [d u2 0 0 t9 0 0 0 0 0 0 c-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 828 t2 = c & M; c >>= 26; c += u2 * R1; 829 VERIFY_BITS(t2, 26); 830 VERIFY_BITS(c, 38); 831 /* [d u2 0 0 t9 0 0 0 0 0 c-u2*R1 t2-u2*R0 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 832 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 0 p2 p1 p0] */ 833 834 c += (uint64_t)(a[0]*2) * a[3] 835 + (uint64_t)(a[1]*2) * a[2]; 836 VERIFY_BITS(c, 63); 837 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 838 d += (uint64_t)(a[4]*2) * a[9] 839 + (uint64_t)(a[5]*2) * a[8] 840 + (uint64_t)(a[6]*2) * a[7]; 841 VERIFY_BITS(d, 63); 842 /* [d 0 0 0 t9 0 0 0 0 0 c t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 843 u3 = d & M; d >>= 26; c += u3 * R0; 844 VERIFY_BITS(u3, 26); 845 VERIFY_BITS(d, 37); 846 /* VERIFY_BITS(c, 64); */ 847 /* [d u3 0 0 0 t9 0 0 0 0 0 c-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 848 t3 = c & M; c >>= 26; c += u3 * R1; 849 VERIFY_BITS(t3, 26); 850 VERIFY_BITS(c, 39); 851 /* [d u3 0 0 0 t9 0 0 0 0 c-u3*R1 t3-u3*R0 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 852 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 0 p3 p2 p1 p0] */ 853 854 c += (uint64_t)(a[0]*2) * a[4] 855 + (uint64_t)(a[1]*2) * a[3] 856 + (uint64_t)a[2] * a[2]; 857 VERIFY_BITS(c, 63); 858 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 859 d += (uint64_t)(a[5]*2) * a[9] 860 + (uint64_t)(a[6]*2) * a[8] 861 + (uint64_t)a[7] * a[7]; 862 VERIFY_BITS(d, 62); 863 /* [d 0 0 0 0 t9 0 0 0 0 c t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 864 u4 = d & M; d >>= 26; c += u4 * R0; 865 VERIFY_BITS(u4, 26); 866 VERIFY_BITS(d, 36); 867 /* VERIFY_BITS(c, 64); */ 868 /* [d u4 0 0 0 0 t9 0 0 0 0 c-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 869 t4 = c & M; c >>= 26; c += u4 * R1; 870 VERIFY_BITS(t4, 26); 871 VERIFY_BITS(c, 39); 872 /* [d u4 0 0 0 0 t9 0 0 0 c-u4*R1 t4-u4*R0 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 873 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 0 p4 p3 p2 p1 p0] */ 874 875 c += (uint64_t)(a[0]*2) * a[5] 876 + (uint64_t)(a[1]*2) * a[4] 877 + (uint64_t)(a[2]*2) * a[3]; 878 VERIFY_BITS(c, 63); 879 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 880 d += (uint64_t)(a[6]*2) * a[9] 881 + (uint64_t)(a[7]*2) * a[8]; 882 VERIFY_BITS(d, 62); 883 /* [d 0 0 0 0 0 t9 0 0 0 c t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 884 u5 = d & M; d >>= 26; c += u5 * R0; 885 VERIFY_BITS(u5, 26); 886 VERIFY_BITS(d, 36); 887 /* VERIFY_BITS(c, 64); */ 888 /* [d u5 0 0 0 0 0 t9 0 0 0 c-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 889 t5 = c & M; c >>= 26; c += u5 * R1; 890 VERIFY_BITS(t5, 26); 891 VERIFY_BITS(c, 39); 892 /* [d u5 0 0 0 0 0 t9 0 0 c-u5*R1 t5-u5*R0 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 893 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 0 p5 p4 p3 p2 p1 p0] */ 894 895 c += (uint64_t)(a[0]*2) * a[6] 896 + (uint64_t)(a[1]*2) * a[5] 897 + (uint64_t)(a[2]*2) * a[4] 898 + (uint64_t)a[3] * a[3]; 899 VERIFY_BITS(c, 63); 900 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 901 d += (uint64_t)(a[7]*2) * a[9] 902 + (uint64_t)a[8] * a[8]; 903 VERIFY_BITS(d, 61); 904 /* [d 0 0 0 0 0 0 t9 0 0 c t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 905 u6 = d & M; d >>= 26; c += u6 * R0; 906 VERIFY_BITS(u6, 26); 907 VERIFY_BITS(d, 35); 908 /* VERIFY_BITS(c, 64); */ 909 /* [d u6 0 0 0 0 0 0 t9 0 0 c-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 910 t6 = c & M; c >>= 26; c += u6 * R1; 911 VERIFY_BITS(t6, 26); 912 VERIFY_BITS(c, 39); 913 /* [d u6 0 0 0 0 0 0 t9 0 c-u6*R1 t6-u6*R0 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 914 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 0 p6 p5 p4 p3 p2 p1 p0] */ 915 916 c += (uint64_t)(a[0]*2) * a[7] 917 + (uint64_t)(a[1]*2) * a[6] 918 + (uint64_t)(a[2]*2) * a[5] 919 + (uint64_t)(a[3]*2) * a[4]; 920 /* VERIFY_BITS(c, 64); */ 921 VERIFY_CHECK(c <= 0x8000007C00000007ULL); 922 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 923 d += (uint64_t)(a[8]*2) * a[9]; 924 VERIFY_BITS(d, 58); 925 /* [d 0 0 0 0 0 0 0 t9 0 c t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 926 u7 = d & M; d >>= 26; c += u7 * R0; 927 VERIFY_BITS(u7, 26); 928 VERIFY_BITS(d, 32); 929 /* VERIFY_BITS(c, 64); */ 930 VERIFY_CHECK(c <= 0x800001703FFFC2F7ULL); 931 /* [d u7 0 0 0 0 0 0 0 t9 0 c-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 932 t7 = c & M; c >>= 26; c += u7 * R1; 933 VERIFY_BITS(t7, 26); 934 VERIFY_BITS(c, 38); 935 /* [d u7 0 0 0 0 0 0 0 t9 c-u7*R1 t7-u7*R0 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 936 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 0 p7 p6 p5 p4 p3 p2 p1 p0] */ 937 938 c += (uint64_t)(a[0]*2) * a[8] 939 + (uint64_t)(a[1]*2) * a[7] 940 + (uint64_t)(a[2]*2) * a[6] 941 + (uint64_t)(a[3]*2) * a[5] 942 + (uint64_t)a[4] * a[4]; 943 /* VERIFY_BITS(c, 64); */ 944 VERIFY_CHECK(c <= 0x9000007B80000008ULL); 945 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 946 d += (uint64_t)a[9] * a[9]; 947 VERIFY_BITS(d, 57); 948 /* [d 0 0 0 0 0 0 0 0 t9 c t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 949 u8 = d & M; d >>= 26; c += u8 * R0; 950 VERIFY_BITS(u8, 26); 951 VERIFY_BITS(d, 31); 952 /* VERIFY_BITS(c, 64); */ 953 VERIFY_CHECK(c <= 0x9000016FBFFFC2F8ULL); 954 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 t3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 955 956 r[3] = t3; 957 VERIFY_BITS(r[3], 26); 958 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 t4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 959 r[4] = t4; 960 VERIFY_BITS(r[4], 26); 961 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 t5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 962 r[5] = t5; 963 VERIFY_BITS(r[5], 26); 964 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 t6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 965 r[6] = t6; 966 VERIFY_BITS(r[6], 26); 967 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 t7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 968 r[7] = t7; 969 VERIFY_BITS(r[7], 26); 970 /* [d u8 0 0 0 0 0 0 0 0 t9 c-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 971 972 r[8] = c & M; c >>= 26; c += u8 * R1; 973 VERIFY_BITS(r[8], 26); 974 VERIFY_BITS(c, 39); 975 /* [d u8 0 0 0 0 0 0 0 0 t9+c-u8*R1 r8-u8*R0 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 976 /* [d 0 0 0 0 0 0 0 0 0 t9+c r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 977 c += d * R0 + t9; 978 VERIFY_BITS(c, 45); 979 /* [d 0 0 0 0 0 0 0 0 0 c-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 980 r[9] = c & (M >> 4); c >>= 22; c += d * (R1 << 4); 981 VERIFY_BITS(r[9], 22); 982 VERIFY_BITS(c, 46); 983 /* [d 0 0 0 0 0 0 0 0 r9+((c-d*R1<<4)<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 984 /* [d 0 0 0 0 0 0 0 -d*R1 r9+(c<<22)-d*R0 r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 985 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 t0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 986 987 d = c * (R0 >> 4) + t0; 988 VERIFY_BITS(d, 56); 989 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1 d-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 990 r[0] = d & M; d >>= 26; 991 VERIFY_BITS(r[0], 26); 992 VERIFY_BITS(d, 30); 993 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 t1+d r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 994 d += c * (R1 >> 4) + t1; 995 VERIFY_BITS(d, 53); 996 VERIFY_CHECK(d <= 0x10000003FFFFBFULL); 997 /* [r9+(c<<22) r8 r7 r6 r5 r4 r3 t2 d-c*R1>>4 r0-c*R0>>4] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 998 /* [r9 r8 r7 r6 r5 r4 r3 t2 d r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 999 r[1] = d & M; d >>= 26; 1000 VERIFY_BITS(r[1], 26); 1001 VERIFY_BITS(d, 27); 1002 VERIFY_CHECK(d <= 0x4000000ULL); 1003 /* [r9 r8 r7 r6 r5 r4 r3 t2+d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1004 d += t2; 1005 VERIFY_BITS(d, 27); 1006 /* [r9 r8 r7 r6 r5 r4 r3 d r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1007 r[2] = d; 1008 VERIFY_BITS(r[2], 27); 1009 /* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */ 1010 } 1011 #endif 1012 1013 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_mul(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a, const haskellsecp256k1_v0_1_0_fe * SECP256K1_RESTRICT b) { 1014 haskellsecp256k1_v0_1_0_fe_mul_inner(r->n, a->n, b->n); 1015 } 1016 1017 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_sqr(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a) { 1018 haskellsecp256k1_v0_1_0_fe_sqr_inner(r->n, a->n); 1019 } 1020 1021 SECP256K1_INLINE static void haskellsecp256k1_v0_1_0_fe_impl_cmov(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *a, int flag) { 1022 uint32_t mask0, mask1; 1023 volatile int vflag = flag; 1024 SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n)); 1025 mask0 = vflag + ~((uint32_t)0); 1026 mask1 = ~mask0; 1027 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1028 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1029 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1030 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1031 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1032 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1033 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1034 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1035 r->n[8] = (r->n[8] & mask0) | (a->n[8] & mask1); 1036 r->n[9] = (r->n[9] & mask0) | (a->n[9] & mask1); 1037 } 1038 1039 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_fe_impl_half(haskellsecp256k1_v0_1_0_fe *r) { 1040 uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4], 1041 t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9]; 1042 uint32_t one = (uint32_t)1; 1043 uint32_t mask = -(t0 & one) >> 6; 1044 1045 /* Bounds analysis (over the rationals). 1046 * 1047 * Let m = r->magnitude 1048 * C = 0x3FFFFFFUL * 2 1049 * D = 0x03FFFFFUL * 2 1050 * 1051 * Initial bounds: t0..t8 <= C * m 1052 * t9 <= D * m 1053 */ 1054 1055 t0 += 0x3FFFC2FUL & mask; 1056 t1 += 0x3FFFFBFUL & mask; 1057 t2 += mask; 1058 t3 += mask; 1059 t4 += mask; 1060 t5 += mask; 1061 t6 += mask; 1062 t7 += mask; 1063 t8 += mask; 1064 t9 += mask >> 4; 1065 1066 VERIFY_CHECK((t0 & one) == 0); 1067 1068 /* t0..t8: added <= C/2 1069 * t9: added <= D/2 1070 * 1071 * Current bounds: t0..t8 <= C * (m + 1/2) 1072 * t9 <= D * (m + 1/2) 1073 */ 1074 1075 r->n[0] = (t0 >> 1) + ((t1 & one) << 25); 1076 r->n[1] = (t1 >> 1) + ((t2 & one) << 25); 1077 r->n[2] = (t2 >> 1) + ((t3 & one) << 25); 1078 r->n[3] = (t3 >> 1) + ((t4 & one) << 25); 1079 r->n[4] = (t4 >> 1) + ((t5 & one) << 25); 1080 r->n[5] = (t5 >> 1) + ((t6 & one) << 25); 1081 r->n[6] = (t6 >> 1) + ((t7 & one) << 25); 1082 r->n[7] = (t7 >> 1) + ((t8 & one) << 25); 1083 r->n[8] = (t8 >> 1) + ((t9 & one) << 25); 1084 r->n[9] = (t9 >> 1); 1085 1086 /* t0..t8: shifted right and added <= C/4 + 1/2 1087 * t9: shifted right 1088 * 1089 * Current bounds: t0..t8 <= C * (m/2 + 1/2) 1090 * t9 <= D * (m/2 + 1/4) 1091 * 1092 * Therefore the output magnitude (M) has to be set such that: 1093 * t0..t8: C * M >= C * (m/2 + 1/2) 1094 * t9: D * M >= D * (m/2 + 1/4) 1095 * 1096 * It suffices for all limbs that, for any input magnitude m: 1097 * M >= m/2 + 1/2 1098 * 1099 * and since we want the smallest such integer value for M: 1100 * M == floor(m/2) + 1 1101 */ 1102 } 1103 1104 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_fe_storage_cmov(haskellsecp256k1_v0_1_0_fe_storage *r, const haskellsecp256k1_v0_1_0_fe_storage *a, int flag) { 1105 uint32_t mask0, mask1; 1106 volatile int vflag = flag; 1107 SECP256K1_CHECKMEM_CHECK_VERIFY(r->n, sizeof(r->n)); 1108 mask0 = vflag + ~((uint32_t)0); 1109 mask1 = ~mask0; 1110 r->n[0] = (r->n[0] & mask0) | (a->n[0] & mask1); 1111 r->n[1] = (r->n[1] & mask0) | (a->n[1] & mask1); 1112 r->n[2] = (r->n[2] & mask0) | (a->n[2] & mask1); 1113 r->n[3] = (r->n[3] & mask0) | (a->n[3] & mask1); 1114 r->n[4] = (r->n[4] & mask0) | (a->n[4] & mask1); 1115 r->n[5] = (r->n[5] & mask0) | (a->n[5] & mask1); 1116 r->n[6] = (r->n[6] & mask0) | (a->n[6] & mask1); 1117 r->n[7] = (r->n[7] & mask0) | (a->n[7] & mask1); 1118 } 1119 1120 static void haskellsecp256k1_v0_1_0_fe_impl_to_storage(haskellsecp256k1_v0_1_0_fe_storage *r, const haskellsecp256k1_v0_1_0_fe *a) { 1121 r->n[0] = a->n[0] | a->n[1] << 26; 1122 r->n[1] = a->n[1] >> 6 | a->n[2] << 20; 1123 r->n[2] = a->n[2] >> 12 | a->n[3] << 14; 1124 r->n[3] = a->n[3] >> 18 | a->n[4] << 8; 1125 r->n[4] = a->n[4] >> 24 | a->n[5] << 2 | a->n[6] << 28; 1126 r->n[5] = a->n[6] >> 4 | a->n[7] << 22; 1127 r->n[6] = a->n[7] >> 10 | a->n[8] << 16; 1128 r->n[7] = a->n[8] >> 16 | a->n[9] << 10; 1129 } 1130 1131 static SECP256K1_INLINE void haskellsecp256k1_v0_1_0_fe_impl_from_storage(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe_storage *a) { 1132 r->n[0] = a->n[0] & 0x3FFFFFFUL; 1133 r->n[1] = a->n[0] >> 26 | ((a->n[1] << 6) & 0x3FFFFFFUL); 1134 r->n[2] = a->n[1] >> 20 | ((a->n[2] << 12) & 0x3FFFFFFUL); 1135 r->n[3] = a->n[2] >> 14 | ((a->n[3] << 18) & 0x3FFFFFFUL); 1136 r->n[4] = a->n[3] >> 8 | ((a->n[4] << 24) & 0x3FFFFFFUL); 1137 r->n[5] = (a->n[4] >> 2) & 0x3FFFFFFUL; 1138 r->n[6] = a->n[4] >> 28 | ((a->n[5] << 4) & 0x3FFFFFFUL); 1139 r->n[7] = a->n[5] >> 22 | ((a->n[6] << 10) & 0x3FFFFFFUL); 1140 r->n[8] = a->n[6] >> 16 | ((a->n[7] << 16) & 0x3FFFFFFUL); 1141 r->n[9] = a->n[7] >> 10; 1142 } 1143 1144 static void haskellsecp256k1_v0_1_0_fe_from_signed30(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_modinv32_signed30 *a) { 1145 const uint32_t M26 = UINT32_MAX >> 6; 1146 const uint32_t a0 = a->v[0], a1 = a->v[1], a2 = a->v[2], a3 = a->v[3], a4 = a->v[4], 1147 a5 = a->v[5], a6 = a->v[6], a7 = a->v[7], a8 = a->v[8]; 1148 1149 /* The output from haskellsecp256k1_v0_1_0_modinv32{_var} should be normalized to range [0,modulus), and 1150 * have limbs in [0,2^30). The modulus is < 2^256, so the top limb must be below 2^(256-30*8). 1151 */ 1152 VERIFY_CHECK(a0 >> 30 == 0); 1153 VERIFY_CHECK(a1 >> 30 == 0); 1154 VERIFY_CHECK(a2 >> 30 == 0); 1155 VERIFY_CHECK(a3 >> 30 == 0); 1156 VERIFY_CHECK(a4 >> 30 == 0); 1157 VERIFY_CHECK(a5 >> 30 == 0); 1158 VERIFY_CHECK(a6 >> 30 == 0); 1159 VERIFY_CHECK(a7 >> 30 == 0); 1160 VERIFY_CHECK(a8 >> 16 == 0); 1161 1162 r->n[0] = a0 & M26; 1163 r->n[1] = (a0 >> 26 | a1 << 4) & M26; 1164 r->n[2] = (a1 >> 22 | a2 << 8) & M26; 1165 r->n[3] = (a2 >> 18 | a3 << 12) & M26; 1166 r->n[4] = (a3 >> 14 | a4 << 16) & M26; 1167 r->n[5] = (a4 >> 10 | a5 << 20) & M26; 1168 r->n[6] = (a5 >> 6 | a6 << 24) & M26; 1169 r->n[7] = (a6 >> 2 ) & M26; 1170 r->n[8] = (a6 >> 28 | a7 << 2) & M26; 1171 r->n[9] = (a7 >> 24 | a8 << 6); 1172 } 1173 1174 static void haskellsecp256k1_v0_1_0_fe_to_signed30(haskellsecp256k1_v0_1_0_modinv32_signed30 *r, const haskellsecp256k1_v0_1_0_fe *a) { 1175 const uint32_t M30 = UINT32_MAX >> 2; 1176 const uint64_t a0 = a->n[0], a1 = a->n[1], a2 = a->n[2], a3 = a->n[3], a4 = a->n[4], 1177 a5 = a->n[5], a6 = a->n[6], a7 = a->n[7], a8 = a->n[8], a9 = a->n[9]; 1178 1179 r->v[0] = (a0 | a1 << 26) & M30; 1180 r->v[1] = (a1 >> 4 | a2 << 22) & M30; 1181 r->v[2] = (a2 >> 8 | a3 << 18) & M30; 1182 r->v[3] = (a3 >> 12 | a4 << 14) & M30; 1183 r->v[4] = (a4 >> 16 | a5 << 10) & M30; 1184 r->v[5] = (a5 >> 20 | a6 << 6) & M30; 1185 r->v[6] = (a6 >> 24 | a7 << 2 1186 | a8 << 28) & M30; 1187 r->v[7] = (a8 >> 2 | a9 << 24) & M30; 1188 r->v[8] = a9 >> 6; 1189 } 1190 1191 static const haskellsecp256k1_v0_1_0_modinv32_modinfo haskellsecp256k1_v0_1_0_const_modinfo_fe = { 1192 {{-0x3D1, -4, 0, 0, 0, 0, 0, 0, 65536}}, 1193 0x2DDACACFL 1194 }; 1195 1196 static void haskellsecp256k1_v0_1_0_fe_impl_inv(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *x) { 1197 haskellsecp256k1_v0_1_0_fe tmp = *x; 1198 haskellsecp256k1_v0_1_0_modinv32_signed30 s; 1199 1200 haskellsecp256k1_v0_1_0_fe_normalize(&tmp); 1201 haskellsecp256k1_v0_1_0_fe_to_signed30(&s, &tmp); 1202 haskellsecp256k1_v0_1_0_modinv32(&s, &haskellsecp256k1_v0_1_0_const_modinfo_fe); 1203 haskellsecp256k1_v0_1_0_fe_from_signed30(r, &s); 1204 } 1205 1206 static void haskellsecp256k1_v0_1_0_fe_impl_inv_var(haskellsecp256k1_v0_1_0_fe *r, const haskellsecp256k1_v0_1_0_fe *x) { 1207 haskellsecp256k1_v0_1_0_fe tmp = *x; 1208 haskellsecp256k1_v0_1_0_modinv32_signed30 s; 1209 1210 haskellsecp256k1_v0_1_0_fe_normalize_var(&tmp); 1211 haskellsecp256k1_v0_1_0_fe_to_signed30(&s, &tmp); 1212 haskellsecp256k1_v0_1_0_modinv32_var(&s, &haskellsecp256k1_v0_1_0_const_modinfo_fe); 1213 haskellsecp256k1_v0_1_0_fe_from_signed30(r, &s); 1214 } 1215 1216 static int haskellsecp256k1_v0_1_0_fe_impl_is_square_var(const haskellsecp256k1_v0_1_0_fe *x) { 1217 haskellsecp256k1_v0_1_0_fe tmp; 1218 haskellsecp256k1_v0_1_0_modinv32_signed30 s; 1219 int jac, ret; 1220 1221 tmp = *x; 1222 haskellsecp256k1_v0_1_0_fe_normalize_var(&tmp); 1223 /* haskellsecp256k1_v0_1_0_jacobi32_maybe_var cannot deal with input 0. */ 1224 if (haskellsecp256k1_v0_1_0_fe_is_zero(&tmp)) return 1; 1225 haskellsecp256k1_v0_1_0_fe_to_signed30(&s, &tmp); 1226 jac = haskellsecp256k1_v0_1_0_jacobi32_maybe_var(&s, &haskellsecp256k1_v0_1_0_const_modinfo_fe); 1227 if (jac == 0) { 1228 /* haskellsecp256k1_v0_1_0_jacobi32_maybe_var failed to compute the Jacobi symbol. Fall back 1229 * to computing a square root. This should be extremely rare with random 1230 * input (except in VERIFY mode, where a lower iteration count is used). */ 1231 haskellsecp256k1_v0_1_0_fe dummy; 1232 ret = haskellsecp256k1_v0_1_0_fe_sqrt(&dummy, &tmp); 1233 } else { 1234 ret = jac >= 0; 1235 } 1236 return ret; 1237 } 1238 1239 #endif /* SECP256K1_FIELD_REPR_IMPL_H */