fixed

Pure Haskell large fixed-width integers and Montgomery arithmetic.
git clone git://git.ppad.tech/fixed.git
Log | Files | Refs | README | LICENSE

commit 33d61325056e4e3622768b153faaaa57c90cefbc
parent fd8ecca55626601b8c55357966519ad2fb426888
Author: Jared Tobin <jared@jtobin.io>
Date:   Fri, 19 Dec 2025 12:28:49 -0330

lib: use unrolled sqrt#

Improves the allocation story dramatically.

Diffstat:
Aetc/generate_sqrt.sh | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/Numeric/Montgomery/Secp256k1/Curve.hs | 523++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 572 insertions(+), 5 deletions(-)

diff --git a/etc/generate_sqrt.sh b/etc/generate_sqrt.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash + +# generates a constant-time haskell function for performing modular +# square root with montgomery arithmetic on the secp256k1 field prime. +# +# tonelli-shanks is used. since p = 3 mod 4 for secp256k1, the square +# root simplifies to a single exponentiation: +# +# sqrt(a) = a ^ ((p + 1) / 4) mod p +# +# one proceeds through the (fixed, known) bit-string of the exponent +# in MSB order, montgomery-squaring an accumulator each time, and +# montgomery-multiplying on every '1' bit. this script generates a +# function consisting of this loop, unrolled. +# +# since the square-and-multiply schedule is fixed, then given +# constant-time 'sqr#' and 'mul#", 'sqrt#' is also constant-time by +# construction. + +# for tonelli-shanks on secp256k1, p = 3 mod 4, so we compute: +# +# sqrt(a) = a ^ ((p + 1) / 4) mod p +# +# where p is the secp256k1 field prime: +# +# p = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F +# +# and the exponent (p + 1) / 4 is: +# +# e = 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c + +exponent="0011111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111110111111111111111111111100001100" + +echo "-- generated by etc/generate_sqrt.sh" +echo "sqrt#" +echo " :: (# Limb, Limb, Limb, Limb #)" +echo " -> (# Limb, Limb, Limb, Limb #)" +echo "sqrt# a =" +echo " let !t0 = (# Limb 0x1000003D1##, Limb 0##, Limb 0##, Limb 0## #)" + +label=1 + +for ((i = 0; i < ${#exponent}; i++)); do + echo " !t""$label"" = sqr# t""$((label-1))" + if [[ "${exponent:i:1}" == "1" ]]; then + label=$((label+1)) + echo " !t""$label"" = mul# a t""$((label-1))" + fi + label=$((label+1)) +done + +echo " !r = t""$((label-1))" +echo " in r" +echo '{-# INLINE sqrt# #-}' diff --git a/lib/Numeric/Montgomery/Secp256k1/Curve.hs b/lib/Numeric/Montgomery/Secp256k1/Curve.hs @@ -52,6 +52,7 @@ module Numeric.Montgomery.Secp256k1.Curve ( , inv , inv# , sqrt + , sqrt# , exp , odd# , odd @@ -996,13 +997,525 @@ inv (Montgomery w) = Montgomery (inv# w) -- >>> (*) <$> sqrt 15 <*> sqrt 15 -- Just 15 sqrt :: Montgomery -> Maybe Montgomery -sqrt n = - let !e0 = 0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffbfffff0c - !rv = exp n e0 - in if C.decide (eq (sqr rv) n) - then Just $! rv +sqrt (Montgomery n) = + let !rv = sqrt# n + in if C.decide (WW.eq# (sqr# rv) n) + then Just $! Montgomery rv else Nothing +-- generated by etc/generate_sqrt.sh +sqrt# + :: (# Limb, Limb, Limb, Limb #) + -> (# Limb, Limb, Limb, Limb #) +sqrt# a = + let !t0 = (# Limb 0x1000003D1##, Limb 0##, Limb 0##, Limb 0## #) + !t1 = sqr# t0 + !t2 = sqr# t1 + !t3 = sqr# t2 + !t4 = mul# a t3 + !t5 = sqr# t4 + !t6 = mul# a t5 + !t7 = sqr# t6 + !t8 = mul# a t7 + !t9 = sqr# t8 + !t10 = mul# a t9 + !t11 = sqr# t10 + !t12 = mul# a t11 + !t13 = sqr# t12 + !t14 = mul# a t13 + !t15 = sqr# t14 + !t16 = mul# a t15 + !t17 = sqr# t16 + !t18 = mul# a t17 + !t19 = sqr# t18 + !t20 = mul# a t19 + !t21 = sqr# t20 + !t22 = mul# a t21 + !t23 = sqr# t22 + !t24 = mul# a t23 + !t25 = sqr# t24 + !t26 = mul# a t25 + !t27 = sqr# t26 + !t28 = mul# a t27 + !t29 = sqr# t28 + !t30 = mul# a t29 + !t31 = sqr# t30 + !t32 = mul# a t31 + !t33 = sqr# t32 + !t34 = mul# a t33 + !t35 = sqr# t34 + !t36 = mul# a t35 + !t37 = sqr# t36 + !t38 = mul# a t37 + !t39 = sqr# t38 + !t40 = mul# a t39 + !t41 = sqr# t40 + !t42 = mul# a t41 + !t43 = sqr# t42 + !t44 = mul# a t43 + !t45 = sqr# t44 + !t46 = mul# a t45 + !t47 = sqr# t46 + !t48 = mul# a t47 + !t49 = sqr# t48 + !t50 = mul# a t49 + !t51 = sqr# t50 + !t52 = mul# a t51 + !t53 = sqr# t52 + !t54 = mul# a t53 + !t55 = sqr# t54 + !t56 = mul# a t55 + !t57 = sqr# t56 + !t58 = mul# a t57 + !t59 = sqr# t58 + !t60 = mul# a t59 + !t61 = sqr# t60 + !t62 = mul# a t61 + !t63 = sqr# t62 + !t64 = mul# a t63 + !t65 = sqr# t64 + !t66 = mul# a t65 + !t67 = sqr# t66 + !t68 = mul# a t67 + !t69 = sqr# t68 + !t70 = mul# a t69 + !t71 = sqr# t70 + !t72 = mul# a t71 + !t73 = sqr# t72 + !t74 = mul# a t73 + !t75 = sqr# t74 + !t76 = mul# a t75 + !t77 = sqr# t76 + !t78 = mul# a t77 + !t79 = sqr# t78 + !t80 = mul# a t79 + !t81 = sqr# t80 + !t82 = mul# a t81 + !t83 = sqr# t82 + !t84 = mul# a t83 + !t85 = sqr# t84 + !t86 = mul# a t85 + !t87 = sqr# t86 + !t88 = mul# a t87 + !t89 = sqr# t88 + !t90 = mul# a t89 + !t91 = sqr# t90 + !t92 = mul# a t91 + !t93 = sqr# t92 + !t94 = mul# a t93 + !t95 = sqr# t94 + !t96 = mul# a t95 + !t97 = sqr# t96 + !t98 = mul# a t97 + !t99 = sqr# t98 + !t100 = mul# a t99 + !t101 = sqr# t100 + !t102 = mul# a t101 + !t103 = sqr# t102 + !t104 = mul# a t103 + !t105 = sqr# t104 + !t106 = mul# a t105 + !t107 = sqr# t106 + !t108 = mul# a t107 + !t109 = sqr# t108 + !t110 = mul# a t109 + !t111 = sqr# t110 + !t112 = mul# a t111 + !t113 = sqr# t112 + !t114 = mul# a t113 + !t115 = sqr# t114 + !t116 = mul# a t115 + !t117 = sqr# t116 + !t118 = mul# a t117 + !t119 = sqr# t118 + !t120 = mul# a t119 + !t121 = sqr# t120 + !t122 = mul# a t121 + !t123 = sqr# t122 + !t124 = mul# a t123 + !t125 = sqr# t124 + !t126 = mul# a t125 + !t127 = sqr# t126 + !t128 = mul# a t127 + !t129 = sqr# t128 + !t130 = mul# a t129 + !t131 = sqr# t130 + !t132 = mul# a t131 + !t133 = sqr# t132 + !t134 = mul# a t133 + !t135 = sqr# t134 + !t136 = mul# a t135 + !t137 = sqr# t136 + !t138 = mul# a t137 + !t139 = sqr# t138 + !t140 = mul# a t139 + !t141 = sqr# t140 + !t142 = mul# a t141 + !t143 = sqr# t142 + !t144 = mul# a t143 + !t145 = sqr# t144 + !t146 = mul# a t145 + !t147 = sqr# t146 + !t148 = mul# a t147 + !t149 = sqr# t148 + !t150 = mul# a t149 + !t151 = sqr# t150 + !t152 = mul# a t151 + !t153 = sqr# t152 + !t154 = mul# a t153 + !t155 = sqr# t154 + !t156 = mul# a t155 + !t157 = sqr# t156 + !t158 = mul# a t157 + !t159 = sqr# t158 + !t160 = mul# a t159 + !t161 = sqr# t160 + !t162 = mul# a t161 + !t163 = sqr# t162 + !t164 = mul# a t163 + !t165 = sqr# t164 + !t166 = mul# a t165 + !t167 = sqr# t166 + !t168 = mul# a t167 + !t169 = sqr# t168 + !t170 = mul# a t169 + !t171 = sqr# t170 + !t172 = mul# a t171 + !t173 = sqr# t172 + !t174 = mul# a t173 + !t175 = sqr# t174 + !t176 = mul# a t175 + !t177 = sqr# t176 + !t178 = mul# a t177 + !t179 = sqr# t178 + !t180 = mul# a t179 + !t181 = sqr# t180 + !t182 = mul# a t181 + !t183 = sqr# t182 + !t184 = mul# a t183 + !t185 = sqr# t184 + !t186 = mul# a t185 + !t187 = sqr# t186 + !t188 = mul# a t187 + !t189 = sqr# t188 + !t190 = mul# a t189 + !t191 = sqr# t190 + !t192 = mul# a t191 + !t193 = sqr# t192 + !t194 = mul# a t193 + !t195 = sqr# t194 + !t196 = mul# a t195 + !t197 = sqr# t196 + !t198 = mul# a t197 + !t199 = sqr# t198 + !t200 = mul# a t199 + !t201 = sqr# t200 + !t202 = mul# a t201 + !t203 = sqr# t202 + !t204 = mul# a t203 + !t205 = sqr# t204 + !t206 = mul# a t205 + !t207 = sqr# t206 + !t208 = mul# a t207 + !t209 = sqr# t208 + !t210 = mul# a t209 + !t211 = sqr# t210 + !t212 = mul# a t211 + !t213 = sqr# t212 + !t214 = mul# a t213 + !t215 = sqr# t214 + !t216 = mul# a t215 + !t217 = sqr# t216 + !t218 = mul# a t217 + !t219 = sqr# t218 + !t220 = mul# a t219 + !t221 = sqr# t220 + !t222 = mul# a t221 + !t223 = sqr# t222 + !t224 = mul# a t223 + !t225 = sqr# t224 + !t226 = mul# a t225 + !t227 = sqr# t226 + !t228 = mul# a t227 + !t229 = sqr# t228 + !t230 = mul# a t229 + !t231 = sqr# t230 + !t232 = mul# a t231 + !t233 = sqr# t232 + !t234 = mul# a t233 + !t235 = sqr# t234 + !t236 = mul# a t235 + !t237 = sqr# t236 + !t238 = mul# a t237 + !t239 = sqr# t238 + !t240 = mul# a t239 + !t241 = sqr# t240 + !t242 = mul# a t241 + !t243 = sqr# t242 + !t244 = mul# a t243 + !t245 = sqr# t244 + !t246 = mul# a t245 + !t247 = sqr# t246 + !t248 = mul# a t247 + !t249 = sqr# t248 + !t250 = mul# a t249 + !t251 = sqr# t250 + !t252 = mul# a t251 + !t253 = sqr# t252 + !t254 = mul# a t253 + !t255 = sqr# t254 + !t256 = mul# a t255 + !t257 = sqr# t256 + !t258 = mul# a t257 + !t259 = sqr# t258 + !t260 = mul# a t259 + !t261 = sqr# t260 + !t262 = mul# a t261 + !t263 = sqr# t262 + !t264 = mul# a t263 + !t265 = sqr# t264 + !t266 = mul# a t265 + !t267 = sqr# t266 + !t268 = mul# a t267 + !t269 = sqr# t268 + !t270 = mul# a t269 + !t271 = sqr# t270 + !t272 = mul# a t271 + !t273 = sqr# t272 + !t274 = mul# a t273 + !t275 = sqr# t274 + !t276 = mul# a t275 + !t277 = sqr# t276 + !t278 = mul# a t277 + !t279 = sqr# t278 + !t280 = mul# a t279 + !t281 = sqr# t280 + !t282 = mul# a t281 + !t283 = sqr# t282 + !t284 = mul# a t283 + !t285 = sqr# t284 + !t286 = mul# a t285 + !t287 = sqr# t286 + !t288 = mul# a t287 + !t289 = sqr# t288 + !t290 = mul# a t289 + !t291 = sqr# t290 + !t292 = mul# a t291 + !t293 = sqr# t292 + !t294 = mul# a t293 + !t295 = sqr# t294 + !t296 = mul# a t295 + !t297 = sqr# t296 + !t298 = mul# a t297 + !t299 = sqr# t298 + !t300 = mul# a t299 + !t301 = sqr# t300 + !t302 = mul# a t301 + !t303 = sqr# t302 + !t304 = mul# a t303 + !t305 = sqr# t304 + !t306 = mul# a t305 + !t307 = sqr# t306 + !t308 = mul# a t307 + !t309 = sqr# t308 + !t310 = mul# a t309 + !t311 = sqr# t310 + !t312 = mul# a t311 + !t313 = sqr# t312 + !t314 = mul# a t313 + !t315 = sqr# t314 + !t316 = mul# a t315 + !t317 = sqr# t316 + !t318 = mul# a t317 + !t319 = sqr# t318 + !t320 = mul# a t319 + !t321 = sqr# t320 + !t322 = mul# a t321 + !t323 = sqr# t322 + !t324 = mul# a t323 + !t325 = sqr# t324 + !t326 = mul# a t325 + !t327 = sqr# t326 + !t328 = mul# a t327 + !t329 = sqr# t328 + !t330 = mul# a t329 + !t331 = sqr# t330 + !t332 = mul# a t331 + !t333 = sqr# t332 + !t334 = mul# a t333 + !t335 = sqr# t334 + !t336 = mul# a t335 + !t337 = sqr# t336 + !t338 = mul# a t337 + !t339 = sqr# t338 + !t340 = mul# a t339 + !t341 = sqr# t340 + !t342 = mul# a t341 + !t343 = sqr# t342 + !t344 = mul# a t343 + !t345 = sqr# t344 + !t346 = mul# a t345 + !t347 = sqr# t346 + !t348 = mul# a t347 + !t349 = sqr# t348 + !t350 = mul# a t349 + !t351 = sqr# t350 + !t352 = mul# a t351 + !t353 = sqr# t352 + !t354 = mul# a t353 + !t355 = sqr# t354 + !t356 = mul# a t355 + !t357 = sqr# t356 + !t358 = mul# a t357 + !t359 = sqr# t358 + !t360 = mul# a t359 + !t361 = sqr# t360 + !t362 = mul# a t361 + !t363 = sqr# t362 + !t364 = mul# a t363 + !t365 = sqr# t364 + !t366 = mul# a t365 + !t367 = sqr# t366 + !t368 = mul# a t367 + !t369 = sqr# t368 + !t370 = mul# a t369 + !t371 = sqr# t370 + !t372 = mul# a t371 + !t373 = sqr# t372 + !t374 = mul# a t373 + !t375 = sqr# t374 + !t376 = mul# a t375 + !t377 = sqr# t376 + !t378 = mul# a t377 + !t379 = sqr# t378 + !t380 = mul# a t379 + !t381 = sqr# t380 + !t382 = mul# a t381 + !t383 = sqr# t382 + !t384 = mul# a t383 + !t385 = sqr# t384 + !t386 = mul# a t385 + !t387 = sqr# t386 + !t388 = mul# a t387 + !t389 = sqr# t388 + !t390 = mul# a t389 + !t391 = sqr# t390 + !t392 = mul# a t391 + !t393 = sqr# t392 + !t394 = mul# a t393 + !t395 = sqr# t394 + !t396 = mul# a t395 + !t397 = sqr# t396 + !t398 = mul# a t397 + !t399 = sqr# t398 + !t400 = mul# a t399 + !t401 = sqr# t400 + !t402 = mul# a t401 + !t403 = sqr# t402 + !t404 = mul# a t403 + !t405 = sqr# t404 + !t406 = mul# a t405 + !t407 = sqr# t406 + !t408 = mul# a t407 + !t409 = sqr# t408 + !t410 = mul# a t409 + !t411 = sqr# t410 + !t412 = mul# a t411 + !t413 = sqr# t412 + !t414 = mul# a t413 + !t415 = sqr# t414 + !t416 = mul# a t415 + !t417 = sqr# t416 + !t418 = mul# a t417 + !t419 = sqr# t418 + !t420 = mul# a t419 + !t421 = sqr# t420 + !t422 = mul# a t421 + !t423 = sqr# t422 + !t424 = mul# a t423 + !t425 = sqr# t424 + !t426 = mul# a t425 + !t427 = sqr# t426 + !t428 = mul# a t427 + !t429 = sqr# t428 + !t430 = mul# a t429 + !t431 = sqr# t430 + !t432 = mul# a t431 + !t433 = sqr# t432 + !t434 = mul# a t433 + !t435 = sqr# t434 + !t436 = mul# a t435 + !t437 = sqr# t436 + !t438 = mul# a t437 + !t439 = sqr# t438 + !t440 = mul# a t439 + !t441 = sqr# t440 + !t442 = mul# a t441 + !t443 = sqr# t442 + !t444 = mul# a t443 + !t445 = sqr# t444 + !t446 = mul# a t445 + !t447 = sqr# t446 + !t448 = mul# a t447 + !t449 = sqr# t448 + !t450 = sqr# t449 + !t451 = mul# a t450 + !t452 = sqr# t451 + !t453 = mul# a t452 + !t454 = sqr# t453 + !t455 = mul# a t454 + !t456 = sqr# t455 + !t457 = mul# a t456 + !t458 = sqr# t457 + !t459 = mul# a t458 + !t460 = sqr# t459 + !t461 = mul# a t460 + !t462 = sqr# t461 + !t463 = mul# a t462 + !t464 = sqr# t463 + !t465 = mul# a t464 + !t466 = sqr# t465 + !t467 = mul# a t466 + !t468 = sqr# t467 + !t469 = mul# a t468 + !t470 = sqr# t469 + !t471 = mul# a t470 + !t472 = sqr# t471 + !t473 = mul# a t472 + !t474 = sqr# t473 + !t475 = mul# a t474 + !t476 = sqr# t475 + !t477 = mul# a t476 + !t478 = sqr# t477 + !t479 = mul# a t478 + !t480 = sqr# t479 + !t481 = mul# a t480 + !t482 = sqr# t481 + !t483 = mul# a t482 + !t484 = sqr# t483 + !t485 = mul# a t484 + !t486 = sqr# t485 + !t487 = mul# a t486 + !t488 = sqr# t487 + !t489 = mul# a t488 + !t490 = sqr# t489 + !t491 = mul# a t490 + !t492 = sqr# t491 + !t493 = mul# a t492 + !t494 = sqr# t493 + !t495 = sqr# t494 + !t496 = sqr# t495 + !t497 = sqr# t496 + !t498 = sqr# t497 + !t499 = mul# a t498 + !t500 = sqr# t499 + !t501 = mul# a t500 + !t502 = sqr# t501 + !t503 = sqr# t502 + !r = t503 + in r +{-# INLINE sqrt# #-} + -- XX want unboxed variants -- | Exponentiation in the Montgomery domain.