fixed

Pure Haskell large fixed-width integers and Montgomery arithmetic.
git clone git://git.ppad.tech/fixed.git
Log | Files | Refs | README | LICENSE

Scalar.hs (29474B)


      1 {-# LANGUAGE BangPatterns #-}
      2 {-# LANGUAGE MagicHash #-}
      3 {-# LANGUAGE NumericUnderscores #-}
      4 {-# LANGUAGE PatternSynonyms #-}
      5 {-# LANGUAGE ViewPatterns #-}
      6 {-# LANGUAGE UnboxedSums #-}
      7 {-# LANGUAGE UnboxedTuples #-}
      8 {-# LANGUAGE UnliftedNewtypes #-}
      9 
     10 -- |
     11 -- Module: Numeric.Montgomery.Secp256k1.Scalar
     12 -- Copyright: (c) 2025 Jared Tobin
     13 -- License: MIT
     14 -- Maintainer: Jared Tobin <jared@ppad.tech>
     15 --
     16 -- Montgomery form 'Wider' words, as well as arithmetic operations, with
     17 -- domain derived from the secp256k1 elliptic curve scalar group order.
     18 
     19 module Numeric.Montgomery.Secp256k1.Scalar (
     20   -- * Montgomery form, secp256k1 scalar group order modulus
     21     Montgomery(..)
     22   , render
     23   , to
     24   , from
     25   , zero
     26   , one
     27 
     28   -- * Comparison
     29   , eq
     30   , eq_vartime
     31 
     32   -- * Reduction and retrieval
     33   , redc
     34   , redc#
     35   , retr
     36   , retr#
     37 
     38   -- * Constant-time selection
     39   , select
     40   , select#
     41 
     42   -- * Montgomery arithmetic
     43   , add
     44   , add#
     45   , sub
     46   , sub#
     47   , mul
     48   , mul#
     49   , sqr
     50   , sqr#
     51   , neg
     52   , neg#
     53   , inv
     54   , inv#
     55   , exp
     56   , exp#
     57   , odd_vartime
     58   , odd#
     59   ) where
     60 
     61 import Control.DeepSeq
     62 import qualified Data.Choice as C
     63 import Data.Word.Limb (Limb(..))
     64 import qualified Data.Word.Limb as L
     65 import qualified Data.Word.Wide as W
     66 import Data.Word.Wider (Wider(..))
     67 import qualified Data.Word.Wider as WW
     68 import GHC.Exts (Word(..), Word#)
     69 import Prelude hiding (or, and, not, exp)
     70 
     71 -- montgomery arithmetic, specialized to the secp256k1 scalar group order
     72 -- 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141
     73 
     74 -- | Montgomery-form 'Wider' words, on the Montgomery domain defined by
     75 --   the secp256k1 scalar group order.
     76 --
     77 --   >>> let one = 1 :: Montgomery
     78 --   >>> one
     79 --   1
     80 --   >>> putStrLn (render one)
     81 --   (4624529908474429119, 4994812053365940164, 1, 0)
     82 data Montgomery = Montgomery !Limb4
     83 
     84 instance Show Montgomery where
     85   show = show . from
     86 
     87 -- | Render a 'Montgomery' value as a 'String', showing its individual
     88 --   'Limb's.
     89 --
     90 --   >>> putStrLn (render 1)
     91 --   (4624529908474429119, 4994812053365940164, 1, 0)
     92 render :: Montgomery -> String
     93 render (Montgomery (L4 a b c d)) =
     94      "(" <> show (W# a) <> ", " <> show (W# b) <> ", "
     95   <> show (W# c) <> ", " <> show (W# d) <> ")"
     96 
     97 -- | Note that 'fromInteger' necessarily runs in variable time due
     98 --   to conversion from the variable-size, potentially heap-allocated
     99 --   'Integer' type.
    100 instance Num Montgomery where
    101   a + b = add a b
    102   a - b = sub a b
    103   a * b = mul a b
    104   negate a = neg a
    105   abs = id
    106   fromInteger = to . WW.to_vartime
    107   signum (Montgomery (# l0, l1, l2, l3 #)) =
    108     let !(Limb l) = l0 `L.or#` l1 `L.or#` l2 `L.or#` l3
    109         !n = C.from_word_nonzero# l
    110         !b = C.to_word# n
    111     in  Montgomery (L4 b 0## 0## 0##)
    112 
    113 instance NFData Montgomery where
    114   rnf (Montgomery a) = case a of (# _, _, _, _ #) -> ()
    115 
    116 -- utilities ------------------------------------------------------------------
    117 
    118 type Limb2 = (# Limb, Limb #)
    119 
    120 type Limb4 = (# Limb, Limb, Limb, Limb #)
    121 
    122 pattern L4 :: Word# -> Word# -> Word# -> Word# -> Limb4
    123 pattern L4 w0 w1 w2 w3 = (# Limb w0, Limb w1, Limb w2, Limb w3 #)
    124 {-# COMPLETE L4 #-}
    125 
    126 -- Wide wrapping addition, when addend is only a limb.
    127 wadd_w# :: Limb2 -> Limb -> Limb2
    128 wadd_w# (# x_lo, x_hi #) y_lo =
    129   let !(# s0, c0 #) = L.add_o# x_lo y_lo
    130       !(# s1, _ #) = L.add_o# x_hi c0
    131   in  (# s0, s1 #)
    132 {-# INLINE wadd_w# #-}
    133 
    134 -- Truncate a wide word to a 'Limb'.
    135 lo :: Limb2 -> Limb
    136 lo (# l, _ #) = l
    137 {-# INLINE lo #-}
    138 
    139 -- comparison -----------------------------------------------------------------
    140 
    141 -- | Constant-time equality comparison.
    142 eq :: Montgomery -> Montgomery -> C.Choice
    143 eq (Montgomery (L4 a0 a1 a2 a3)) (Montgomery (L4 b0 b1 b2 b3)) =
    144   C.eq_wider# (# a0, a1, a2, a3 #) (# b0, b1, b2, b3 #)
    145 {-# INLINE eq #-}
    146 
    147 -- | Variable-time equality comparison.
    148 eq_vartime :: Montgomery -> Montgomery -> Bool
    149 eq_vartime (Montgomery (Wider -> a)) (Montgomery (Wider -> b)) =
    150   WW.eq_vartime a b
    151 
    152 -- innards --------------------------------------------------------------------
    153 
    154 redc_inner#
    155   :: Limb4             -- ^ upper limbs
    156   -> Limb4             -- ^ lower limbs
    157   -> (# Limb4, Limb #) -- ^ upper limbs, meta-carry
    158 redc_inner# (# u0, u1, u2, u3 #) (# l0, l1, l2, l3 #) =
    159   let !(# m0, m1, m2, m3 #) =
    160         L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    161            0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    162       !n                = Limb 0x4B0DFF665588B13F##
    163       !w_0              = L.mul_w# l0 n
    164       !(# _, c_00 #)    = L.mac# w_0 m0 l0 (Limb 0##)
    165       !(# l0_1, c_01 #) = L.mac# w_0 m1 l1 c_00
    166       !(# l0_2, c_02 #) = L.mac# w_0 m2 l2 c_01
    167       !(# l0_3, c_03 #) = L.mac# w_0 m3 l3 c_02
    168       !(# u_0, mc_0 #)  = L.add_c# u0 c_03 (Limb 0##)
    169       !w_1              = L.mul_w# l0_1 n
    170       !(# _, c_10 #)    = L.mac# w_1 m0 l0_1 (Limb 0##)
    171       !(# l1_1, c_11 #) = L.mac# w_1 m1 l0_2 c_10
    172       !(# l1_2, c_12 #) = L.mac# w_1 m2 l0_3 c_11
    173       !(# u1_3, c_13 #) = L.mac# w_1 m3 u_0 c_12
    174       !(# u_1, mc_1 #)  = L.add_c# u1 c_13 mc_0
    175       !w_2              = L.mul_w# l1_1 n
    176       !(# _, c_20 #)    = L.mac# w_2 m0 l1_1 (Limb 0##)
    177       !(# l2_1, c_21 #) = L.mac# w_2 m1 l1_2 c_20
    178       !(# u2_2, c_22 #) = L.mac# w_2 m2 u1_3 c_21
    179       !(# u2_3, c_23 #) = L.mac# w_2 m3 u_1 c_22
    180       !(# u_2, mc_2 #)  = L.add_c# u2 c_23 mc_1
    181       !w_3              = L.mul_w# l2_1 n
    182       !(# _, c_30 #)    = L.mac# w_3 m0 l2_1 (Limb 0##)
    183       !(# u3_1, c_31 #) = L.mac# w_3 m1 u2_2 c_30
    184       !(# u3_2, c_32 #) = L.mac# w_3 m2 u2_3 c_31
    185       !(# u3_3, c_33 #) = L.mac# w_3 m3 u_2 c_32
    186       !(# u_3, mc_3 #)  = L.add_c# u3 c_33 mc_2
    187   in  (# (# u3_1, u3_2, u3_3, u_3 #), mc_3 #)
    188 {-# INLINE redc_inner# #-}
    189 
    190 redc#
    191   :: Limb4 -- ^ lower limbs
    192   -> Limb4 -- ^ upper limbs
    193   -> Limb4 -- ^ result
    194 redc# l u =
    195   let -- group order
    196       !m = L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    197               0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    198       !(# nu, mc #) = redc_inner# u l
    199   in  WW.sub_mod_c# nu mc m m
    200 {-# INLINE redc# #-}
    201 
    202 -- | Montgomery reduction.
    203 --
    204 --   The first argument represents the low words, and the second the
    205 --   high words, of an extra-large eight-limb word in Montgomery form.
    206 redc
    207   :: Montgomery -- ^ low wider-word, Montgomery form
    208   -> Montgomery -- ^ high wider-word, Montgomery form
    209   -> Montgomery -- ^ reduced value
    210 redc (Montgomery l) (Montgomery u) =
    211   let !res = redc# l u
    212   in  (Montgomery res)
    213 
    214 retr_inner#
    215   :: Limb4 -- ^ value in montgomery form
    216   -> Limb4 -- ^ retrieved value
    217 retr_inner# (# x0, x1, x2, x3 #) =
    218   let !(# m0, m1, m2, m3 #) =
    219         L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    220            0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    221       !n                = Limb 0x4B0DFF665588B13F##
    222       !u_0              = L.mul_w# x0 n
    223       !(# _, o0 #)      = L.mac# u_0 m0 x0 (Limb 0##)
    224       !(# o0_1, p0_1 #) = L.mac# u_0 m1 (Limb 0##) o0
    225       !(# p0_2, q0_2 #) = L.mac# u_0 m2 (Limb 0##) p0_1
    226       !(# q0_3, r0_3 #) = L.mac# u_0 m3 (Limb 0##) q0_2
    227       !u_1              = L.mul_w# (L.add_w# o0_1 x1) n
    228       !(# _, o1 #)      = L.mac# u_1 m0 x1 o0_1
    229       !(# o1_1, p1_1 #) = L.mac# u_1 m1 p0_2 o1
    230       !(# p1_2, q1_2 #) = L.mac# u_1 m2 q0_3 p1_1
    231       !(# q1_3, r1_3 #) = L.mac# u_1 m3 r0_3 q1_2
    232       !u_2              = L.mul_w# (L.add_w# o1_1 x2) n
    233       !(# _, o2 #)      = L.mac# u_2 m0 x2 o1_1
    234       !(# o2_1, p2_1 #) = L.mac# u_2 m1 p1_2 o2
    235       !(# p2_2, q2_2 #) = L.mac# u_2 m2 q1_3 p2_1
    236       !(# q2_3, r2_3 #) = L.mac# u_2 m3 r1_3 q2_2
    237       !u_3              = L.mul_w# (L.add_w# o2_1 x3) n
    238       !(# _, o3 #)      = L.mac# u_3 m0 x3 o2_1
    239       !(# o3_1, p3_1 #) = L.mac# u_3 m1 p2_2 o3
    240       !(# p3_2, q3_2 #) = L.mac# u_3 m2 q2_3 p3_1
    241       !(# q3_3, r3_3 #) = L.mac# u_3 m3 r2_3 q3_2
    242   in  (# o3_1, p3_2, q3_3, r3_3 #)
    243 {-# INLINE retr_inner# #-}
    244 
    245 retr#
    246   :: Limb4
    247   -> Limb4
    248 retr# f = retr_inner# f
    249 {-# INLINE retr# #-}
    250 
    251 -- | Retrieve a 'Montgomery' value from the Montgomery domain, producing
    252 --   a 'Wider' word.
    253 retr
    254   :: Montgomery -- ^ value in Montgomery form
    255   -> Wider      -- ^ retrieved value
    256 retr (Montgomery f) =
    257   let !res = retr# f
    258   in  (Wider res)
    259 
    260 -- | Montgomery multiplication (FIOS), without conditional subtract.
    261 mul_inner#
    262   :: Limb4              -- ^ x
    263   -> Limb4              -- ^ y
    264   -> (# Limb4, Limb #)  -- ^ product, meta-carry
    265 mul_inner# (# x0, x1, x2, x3 #) (# y0, y1, y2, y3 #) =
    266   let !(# m0, m1, m2, m3 #) =
    267         L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    268            0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    269       !n                           = Limb 0x4B0DFF665588B13F##
    270       !axy0                        = L.mul_c# x0 y0
    271       !u0                          = L.mul_w# (lo axy0) n
    272       !(# (# _, a0 #), c0 #)       = W.add_o# (L.mul_c# u0 m0) axy0
    273       !carry0                      = (# a0, c0 #)
    274       !axy0_1                      = L.mul_c# x0 y1
    275       !umc0_1                      = W.add_w# (L.mul_c# u0 m1) carry0
    276       !(# (# o0, ab0_1 #), c0_1 #) = W.add_o# axy0_1 umc0_1
    277       !carry0_1                    = (# ab0_1, c0_1 #)
    278       !axy0_2                      = L.mul_c# x0 y2
    279       !umc0_2                      = W.add_w# (L.mul_c# u0 m2) carry0_1
    280       !(# (# p0, ab0_2 #), c0_2 #) = W.add_o# axy0_2 umc0_2
    281       !carry0_2                    = (# ab0_2, c0_2 #)
    282       !axy0_3                      = L.mul_c# x0 y3
    283       !umc0_3                      = W.add_w# (L.mul_c# u0 m3) carry0_2
    284       !(# (# q0, ab0_3 #), c0_3 #) = W.add_o# axy0_3 umc0_3
    285       !carry0_3                    = (# ab0_3, c0_3 #)
    286       !(# r0, mc0 #)               = carry0_3
    287       !axy1                        = wadd_w# (L.mul_c# x1 y0) o0
    288       !u1                          = L.mul_w# (lo axy1) n
    289       !(# (# _, a1 #), c1 #)       = W.add_o# (L.mul_c# u1 m0) axy1
    290       !carry1                      = (# a1, c1 #)
    291       !axy1_1                      = wadd_w# (L.mul_c# x1 y1) p0
    292       !umc1_1                      = W.add_w# (L.mul_c# u1 m1) carry1
    293       !(# (# o1, ab1_1 #), c1_1 #) = W.add_o# axy1_1 umc1_1
    294       !carry1_1                    = (# ab1_1, c1_1 #)
    295       !axy1_2                      = wadd_w# (L.mul_c# x1 y2) q0
    296       !umc1_2                      = W.add_w# (L.mul_c# u1 m2) carry1_1
    297       !(# (# p1, ab1_2 #), c1_2 #) = W.add_o# axy1_2 umc1_2
    298       !carry1_2                    = (# ab1_2, c1_2 #)
    299       !axy1_3                      = wadd_w# (L.mul_c# x1 y3) r0
    300       !umc1_3                      = W.add_w# (L.mul_c# u1 m3) carry1_2
    301       !(# (# q1, ab1_3 #), c1_3 #) = W.add_o# axy1_3 umc1_3
    302       !carry1_3                    = (# ab1_3, c1_3 #)
    303       !(# r1, mc1 #)               = wadd_w# carry1_3 mc0
    304       !axy2                        = wadd_w# (L.mul_c# x2 y0) o1
    305       !u2                          = L.mul_w# (lo axy2) n
    306       !(# (# _, a2 #), c2 #)       = W.add_o# (L.mul_c# u2 m0) axy2
    307       !carry2                      = (# a2, c2 #)
    308       !axy2_1                      = wadd_w# (L.mul_c# x2 y1) p1
    309       !umc2_1                      = W.add_w# (L.mul_c# u2 m1) carry2
    310       !(# (# o2, ab2_1 #), c2_1 #) = W.add_o# axy2_1 umc2_1
    311       !carry2_1                    = (# ab2_1, c2_1 #)
    312       !axy2_2                      = wadd_w# (L.mul_c# x2 y2) q1
    313       !umc2_2                      = W.add_w# (L.mul_c# u2 m2) carry2_1
    314       !(# (# p2, ab2_2 #), c2_2 #) = W.add_o# axy2_2 umc2_2
    315       !carry2_2                    = (# ab2_2, c2_2 #)
    316       !axy2_3                      = wadd_w# (L.mul_c# x2 y3) r1
    317       !umc2_3                      = W.add_w# (L.mul_c# u2 m3) carry2_2
    318       !(# (# q2, ab2_3 #), c2_3 #) = W.add_o# axy2_3 umc2_3
    319       !carry2_3                    = (# ab2_3, c2_3 #)
    320       !(# r2, mc2 #)               = wadd_w# carry2_3 mc1
    321       !axy3                        = wadd_w# (L.mul_c# x3 y0) o2
    322       !u3                          = L.mul_w# (lo axy3) n
    323       !(# (# _, a3 #), c3 #)       = W.add_o# (L.mul_c# u3 m0) axy3
    324       !carry3                      = (# a3, c3 #)
    325       !axy3_1                      = wadd_w# (L.mul_c# x3 y1) p2
    326       !umc3_1                      = W.add_w# (L.mul_c# u3 m1) carry3
    327       !(# (# o3, ab3_1 #), c3_1 #) = W.add_o# axy3_1 umc3_1
    328       !carry3_1                    = (# ab3_1, c3_1 #)
    329       !axy3_2                      = wadd_w# (L.mul_c# x3 y2) q2
    330       !umc3_2                      = W.add_w# (L.mul_c# u3 m2) carry3_1
    331       !(# (# p3, ab3_2 #), c3_2 #) = W.add_o# axy3_2 umc3_2
    332       !carry3_2                    = (# ab3_2, c3_2 #)
    333       !axy3_3                      = wadd_w# (L.mul_c# x3 y3) r2
    334       !umc3_3                      = W.add_w# (L.mul_c# u3 m3) carry3_2
    335       !(# (# q3, ab3_3 #), c3_3 #) = W.add_o# axy3_3 umc3_3
    336       !carry3_3                    = (# ab3_3, c3_3 #)
    337       !(# r3, mc3 #)               = wadd_w# carry3_3 mc2
    338   in  (# (# o3, p3, q3, r3 #), mc3 #)
    339 {-# INLINE mul_inner# #-}
    340 
    341 mul#
    342   :: Limb4
    343   -> Limb4
    344   -> Limb4
    345 mul# a b =
    346   let -- group order
    347       !m = L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    348               0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    349       !(# nu, mc #) = mul_inner# a b
    350   in  WW.sub_mod_c# nu mc m m
    351 {-# NOINLINE mul# #-} -- cannot be inlined without exploding comp time
    352 
    353 -- | Multiplication in the Montgomery domain.
    354 --
    355 --   Note that 'Montgomery' is an instance of 'Num', so you can use '*'
    356 --   to apply this function.
    357 --
    358 --   >>> 1 * 1 :: Montgomery
    359 --   1
    360 mul
    361   :: Montgomery -- ^ multiplicand in montgomery form
    362   -> Montgomery -- ^ multiplier in montgomery form
    363   -> Montgomery -- ^ montgomery product
    364 mul (Montgomery a) (Montgomery b) = Montgomery (mul# a b)
    365 
    366 to#
    367   :: Limb4 -- ^ integer
    368   -> Limb4
    369 to# x =
    370   let !r2 = L4 0x896CF21467D7D140## 0x741496C20E7CF878## -- r^2 mod m
    371                0xE697F5E45BCD07C6## 0x9D671CD581C69BC5##
    372   in  mul# x r2
    373 {-# INLINE to# #-}
    374 
    375 -- | Convert a 'Wider' word to the Montgomery domain.
    376 to :: Wider -> Montgomery
    377 to (Wider x) = Montgomery (to# x)
    378 
    379 -- | Retrieve a 'Montgomery' word from the Montgomery domain.
    380 --
    381 --   This function is a synonym for 'retr'.
    382 from :: Montgomery -> Wider
    383 from = retr
    384 
    385 add#
    386   :: Limb4 -- ^ augend
    387   -> Limb4 -- ^ addend
    388   -> Limb4 -- ^ sum
    389 add# a b =
    390   let -- group order
    391       !m = L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    392               0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    393   in  WW.add_mod# a b m
    394 {-# INLINE add# #-}
    395 
    396 -- | Addition in the Montgomery domain.
    397 --
    398 --   Note that 'Montgomery' is an instance of 'Num', so you can use '+'
    399 --   to apply this function.
    400 --
    401 --   >>> 1 + 1 :: Montgomery
    402 --   2
    403 add
    404   :: Montgomery -- ^ augend
    405   -> Montgomery -- ^ addend
    406   -> Montgomery -- ^ sum
    407 add (Montgomery a) (Montgomery b) = Montgomery (add# a b)
    408 
    409 sub#
    410   :: Limb4 -- ^ minuend
    411   -> Limb4 -- ^ subtrahend
    412   -> Limb4 -- ^ difference
    413 sub# a b =
    414   let !m = L4 0xBFD25E8CD0364141## 0xBAAEDCE6AF48A03B##
    415               0xFFFFFFFFFFFFFFFE## 0xFFFFFFFFFFFFFFFF##
    416   in  WW.sub_mod# a b m
    417 {-# INLINE sub# #-}
    418 
    419 -- | Subtraction in the Montgomery domain.
    420 --
    421 --   Note that 'Montgomery' is an instance of 'Num', so you can use '-'
    422 --   to apply this function.
    423 --
    424 --   >>> 1 - 1 :: Montgomery
    425 --   0
    426 sub
    427   :: Montgomery -- ^ minuend
    428   -> Montgomery -- ^ subtrahend
    429   -> Montgomery -- ^ difference
    430 sub (Montgomery a) (Montgomery b) = Montgomery (sub# a b)
    431 
    432 neg#
    433   :: Limb4 -- ^ argument
    434   -> Limb4 -- ^ modular negation
    435 neg# a = sub# (L4 0## 0## 0## 0##) a
    436 {-# INLINE neg# #-}
    437 
    438 -- | Additive inverse in the Montgomery domain.
    439 --
    440 --   Note that 'Montgomery' is an instance of 'Num', so you can use 'negate'
    441 --   to apply this function.
    442 --
    443 --   >>> negate 1 :: Montgomery
    444 --   115792089237316195423570985008687907852837564279074904382605163141518161494336
    445 --   >>> (negate 1 :: Montgomery) + 1
    446 --   0
    447 neg :: Montgomery -> Montgomery
    448 neg (Montgomery a) = Montgomery (neg# a)
    449 
    450 sqr# :: Limb4 -> Limb4
    451 sqr# a =
    452   let !(# l, h #) = WW.sqr# a
    453   in  redc# l h
    454 {-# NOINLINE sqr# #-} -- cannot be inlined without exploding comp time
    455 
    456 -- | Squaring in the Montgomery domain.
    457 --
    458 --   >>> sqr 1
    459 --   1
    460 --   >>> sqr 2
    461 --   4
    462 --   >>> sqr (negate 2)
    463 --   4
    464 sqr
    465   :: Montgomery -- ^ argument
    466   -> Montgomery -- ^ square
    467 sqr (Montgomery a) = Montgomery (mul# a a)
    468 
    469 -- | Zero (the additive unit) in the Montgomery domain.
    470 zero :: Montgomery
    471 zero = Montgomery (L4 0## 0## 0## 0##)
    472 
    473 -- | One (the multiplicative unit) in the Montgomery domain.
    474 one :: Montgomery
    475 one = Montgomery (L4 0x402DA1732FC9BEBF## 0x4551231950B75FC4##
    476                      0x0000000000000001## 0x0000000000000000##)
    477 
    478 -- generated by etc/generate_inv.sh
    479 inv#
    480   :: Limb4
    481   -> Limb4
    482 inv# a =
    483   let !t0 = L4 0x402DA1732FC9BEBF## 0x4551231950B75FC4##
    484                0x0000000000000001## 0x0000000000000000##
    485       !t1 = sqr# t0
    486       !t2 = mul# a t1
    487       !t3 = sqr# t2
    488       !t4 = mul# a t3
    489       !t5 = sqr# t4
    490       !t6 = mul# a t5
    491       !t7 = sqr# t6
    492       !t8 = mul# a t7
    493       !t9 = sqr# t8
    494       !t10 = mul# a t9
    495       !t11 = sqr# t10
    496       !t12 = mul# a t11
    497       !t13 = sqr# t12
    498       !t14 = mul# a t13
    499       !t15 = sqr# t14
    500       !t16 = mul# a t15
    501       !t17 = sqr# t16
    502       !t18 = mul# a t17
    503       !t19 = sqr# t18
    504       !t20 = mul# a t19
    505       !t21 = sqr# t20
    506       !t22 = mul# a t21
    507       !t23 = sqr# t22
    508       !t24 = mul# a t23
    509       !t25 = sqr# t24
    510       !t26 = mul# a t25
    511       !t27 = sqr# t26
    512       !t28 = mul# a t27
    513       !t29 = sqr# t28
    514       !t30 = mul# a t29
    515       !t31 = sqr# t30
    516       !t32 = mul# a t31
    517       !t33 = sqr# t32
    518       !t34 = mul# a t33
    519       !t35 = sqr# t34
    520       !t36 = mul# a t35
    521       !t37 = sqr# t36
    522       !t38 = mul# a t37
    523       !t39 = sqr# t38
    524       !t40 = mul# a t39
    525       !t41 = sqr# t40
    526       !t42 = mul# a t41
    527       !t43 = sqr# t42
    528       !t44 = mul# a t43
    529       !t45 = sqr# t44
    530       !t46 = mul# a t45
    531       !t47 = sqr# t46
    532       !t48 = mul# a t47
    533       !t49 = sqr# t48
    534       !t50 = mul# a t49
    535       !t51 = sqr# t50
    536       !t52 = mul# a t51
    537       !t53 = sqr# t52
    538       !t54 = mul# a t53
    539       !t55 = sqr# t54
    540       !t56 = mul# a t55
    541       !t57 = sqr# t56
    542       !t58 = mul# a t57
    543       !t59 = sqr# t58
    544       !t60 = mul# a t59
    545       !t61 = sqr# t60
    546       !t62 = mul# a t61
    547       !t63 = sqr# t62
    548       !t64 = mul# a t63
    549       !t65 = sqr# t64
    550       !t66 = mul# a t65
    551       !t67 = sqr# t66
    552       !t68 = mul# a t67
    553       !t69 = sqr# t68
    554       !t70 = mul# a t69
    555       !t71 = sqr# t70
    556       !t72 = mul# a t71
    557       !t73 = sqr# t72
    558       !t74 = mul# a t73
    559       !t75 = sqr# t74
    560       !t76 = mul# a t75
    561       !t77 = sqr# t76
    562       !t78 = mul# a t77
    563       !t79 = sqr# t78
    564       !t80 = mul# a t79
    565       !t81 = sqr# t80
    566       !t82 = mul# a t81
    567       !t83 = sqr# t82
    568       !t84 = mul# a t83
    569       !t85 = sqr# t84
    570       !t86 = mul# a t85
    571       !t87 = sqr# t86
    572       !t88 = mul# a t87
    573       !t89 = sqr# t88
    574       !t90 = mul# a t89
    575       !t91 = sqr# t90
    576       !t92 = mul# a t91
    577       !t93 = sqr# t92
    578       !t94 = mul# a t93
    579       !t95 = sqr# t94
    580       !t96 = mul# a t95
    581       !t97 = sqr# t96
    582       !t98 = mul# a t97
    583       !t99 = sqr# t98
    584       !t100 = mul# a t99
    585       !t101 = sqr# t100
    586       !t102 = mul# a t101
    587       !t103 = sqr# t102
    588       !t104 = mul# a t103
    589       !t105 = sqr# t104
    590       !t106 = mul# a t105
    591       !t107 = sqr# t106
    592       !t108 = mul# a t107
    593       !t109 = sqr# t108
    594       !t110 = mul# a t109
    595       !t111 = sqr# t110
    596       !t112 = mul# a t111
    597       !t113 = sqr# t112
    598       !t114 = mul# a t113
    599       !t115 = sqr# t114
    600       !t116 = mul# a t115
    601       !t117 = sqr# t116
    602       !t118 = mul# a t117
    603       !t119 = sqr# t118
    604       !t120 = mul# a t119
    605       !t121 = sqr# t120
    606       !t122 = mul# a t121
    607       !t123 = sqr# t122
    608       !t124 = mul# a t123
    609       !t125 = sqr# t124
    610       !t126 = mul# a t125
    611       !t127 = sqr# t126
    612       !t128 = mul# a t127
    613       !t129 = sqr# t128
    614       !t130 = mul# a t129
    615       !t131 = sqr# t130
    616       !t132 = mul# a t131
    617       !t133 = sqr# t132
    618       !t134 = mul# a t133
    619       !t135 = sqr# t134
    620       !t136 = mul# a t135
    621       !t137 = sqr# t136
    622       !t138 = mul# a t137
    623       !t139 = sqr# t138
    624       !t140 = mul# a t139
    625       !t141 = sqr# t140
    626       !t142 = mul# a t141
    627       !t143 = sqr# t142
    628       !t144 = mul# a t143
    629       !t145 = sqr# t144
    630       !t146 = mul# a t145
    631       !t147 = sqr# t146
    632       !t148 = mul# a t147
    633       !t149 = sqr# t148
    634       !t150 = mul# a t149
    635       !t151 = sqr# t150
    636       !t152 = mul# a t151
    637       !t153 = sqr# t152
    638       !t154 = mul# a t153
    639       !t155 = sqr# t154
    640       !t156 = mul# a t155
    641       !t157 = sqr# t156
    642       !t158 = mul# a t157
    643       !t159 = sqr# t158
    644       !t160 = mul# a t159
    645       !t161 = sqr# t160
    646       !t162 = mul# a t161
    647       !t163 = sqr# t162
    648       !t164 = mul# a t163
    649       !t165 = sqr# t164
    650       !t166 = mul# a t165
    651       !t167 = sqr# t166
    652       !t168 = mul# a t167
    653       !t169 = sqr# t168
    654       !t170 = mul# a t169
    655       !t171 = sqr# t170
    656       !t172 = mul# a t171
    657       !t173 = sqr# t172
    658       !t174 = mul# a t173
    659       !t175 = sqr# t174
    660       !t176 = mul# a t175
    661       !t177 = sqr# t176
    662       !t178 = mul# a t177
    663       !t179 = sqr# t178
    664       !t180 = mul# a t179
    665       !t181 = sqr# t180
    666       !t182 = mul# a t181
    667       !t183 = sqr# t182
    668       !t184 = mul# a t183
    669       !t185 = sqr# t184
    670       !t186 = mul# a t185
    671       !t187 = sqr# t186
    672       !t188 = mul# a t187
    673       !t189 = sqr# t188
    674       !t190 = mul# a t189
    675       !t191 = sqr# t190
    676       !t192 = mul# a t191
    677       !t193 = sqr# t192
    678       !t194 = mul# a t193
    679       !t195 = sqr# t194
    680       !t196 = mul# a t195
    681       !t197 = sqr# t196
    682       !t198 = mul# a t197
    683       !t199 = sqr# t198
    684       !t200 = mul# a t199
    685       !t201 = sqr# t200
    686       !t202 = mul# a t201
    687       !t203 = sqr# t202
    688       !t204 = mul# a t203
    689       !t205 = sqr# t204
    690       !t206 = mul# a t205
    691       !t207 = sqr# t206
    692       !t208 = mul# a t207
    693       !t209 = sqr# t208
    694       !t210 = mul# a t209
    695       !t211 = sqr# t210
    696       !t212 = mul# a t211
    697       !t213 = sqr# t212
    698       !t214 = mul# a t213
    699       !t215 = sqr# t214
    700       !t216 = mul# a t215
    701       !t217 = sqr# t216
    702       !t218 = mul# a t217
    703       !t219 = sqr# t218
    704       !t220 = mul# a t219
    705       !t221 = sqr# t220
    706       !t222 = mul# a t221
    707       !t223 = sqr# t222
    708       !t224 = mul# a t223
    709       !t225 = sqr# t224
    710       !t226 = mul# a t225
    711       !t227 = sqr# t226
    712       !t228 = mul# a t227
    713       !t229 = sqr# t228
    714       !t230 = mul# a t229
    715       !t231 = sqr# t230
    716       !t232 = mul# a t231
    717       !t233 = sqr# t232
    718       !t234 = mul# a t233
    719       !t235 = sqr# t234
    720       !t236 = mul# a t235
    721       !t237 = sqr# t236
    722       !t238 = mul# a t237
    723       !t239 = sqr# t238
    724       !t240 = mul# a t239
    725       !t241 = sqr# t240
    726       !t242 = mul# a t241
    727       !t243 = sqr# t242
    728       !t244 = mul# a t243
    729       !t245 = sqr# t244
    730       !t246 = mul# a t245
    731       !t247 = sqr# t246
    732       !t248 = mul# a t247
    733       !t249 = sqr# t248
    734       !t250 = mul# a t249
    735       !t251 = sqr# t250
    736       !t252 = mul# a t251
    737       !t253 = sqr# t252
    738       !t254 = mul# a t253
    739       !t255 = sqr# t254
    740       !t256 = sqr# t255
    741       !t257 = mul# a t256
    742       !t258 = sqr# t257
    743       !t259 = sqr# t258
    744       !t260 = mul# a t259
    745       !t261 = sqr# t260
    746       !t262 = mul# a t261
    747       !t263 = sqr# t262
    748       !t264 = mul# a t263
    749       !t265 = sqr# t264
    750       !t266 = sqr# t265
    751       !t267 = mul# a t266
    752       !t268 = sqr# t267
    753       !t269 = sqr# t268
    754       !t270 = mul# a t269
    755       !t271 = sqr# t270
    756       !t272 = sqr# t271
    757       !t273 = mul# a t272
    758       !t274 = sqr# t273
    759       !t275 = sqr# t274
    760       !t276 = mul# a t275
    761       !t277 = sqr# t276
    762       !t278 = mul# a t277
    763       !t279 = sqr# t278
    764       !t280 = mul# a t279
    765       !t281 = sqr# t280
    766       !t282 = sqr# t281
    767       !t283 = mul# a t282
    768       !t284 = sqr# t283
    769       !t285 = mul# a t284
    770       !t286 = sqr# t285
    771       !t287 = sqr# t286
    772       !t288 = mul# a t287
    773       !t289 = sqr# t288
    774       !t290 = mul# a t289
    775       !t291 = sqr# t290
    776       !t292 = mul# a t291
    777       !t293 = sqr# t292
    778       !t294 = sqr# t293
    779       !t295 = sqr# t294
    780       !t296 = mul# a t295
    781       !t297 = sqr# t296
    782       !t298 = mul# a t297
    783       !t299 = sqr# t298
    784       !t300 = mul# a t299
    785       !t301 = sqr# t300
    786       !t302 = sqr# t301
    787       !t303 = sqr# t302
    788       !t304 = mul# a t303
    789       !t305 = sqr# t304
    790       !t306 = mul# a t305
    791       !t307 = sqr# t306
    792       !t308 = sqr# t307
    793       !t309 = mul# a t308
    794       !t310 = sqr# t309
    795       !t311 = sqr# t310
    796       !t312 = mul# a t311
    797       !t313 = sqr# t312
    798       !t314 = sqr# t313
    799       !t315 = mul# a t314
    800       !t316 = sqr# t315
    801       !t317 = mul# a t316
    802       !t318 = sqr# t317
    803       !t319 = mul# a t318
    804       !t320 = sqr# t319
    805       !t321 = mul# a t320
    806       !t322 = sqr# t321
    807       !t323 = sqr# t322
    808       !t324 = mul# a t323
    809       !t325 = sqr# t324
    810       !t326 = sqr# t325
    811       !t327 = sqr# t326
    812       !t328 = mul# a t327
    813       !t329 = sqr# t328
    814       !t330 = sqr# t329
    815       !t331 = sqr# t330
    816       !t332 = sqr# t331
    817       !t333 = mul# a t332
    818       !t334 = sqr# t333
    819       !t335 = sqr# t334
    820       !t336 = mul# a t335
    821       !t337 = sqr# t336
    822       !t338 = sqr# t337
    823       !t339 = sqr# t338
    824       !t340 = sqr# t339
    825       !t341 = sqr# t340
    826       !t342 = sqr# t341
    827       !t343 = sqr# t342
    828       !t344 = sqr# t343
    829       !t345 = mul# a t344
    830       !t346 = sqr# t345
    831       !t347 = mul# a t346
    832       !t348 = sqr# t347
    833       !t349 = mul# a t348
    834       !t350 = sqr# t349
    835       !t351 = sqr# t350
    836       !t352 = mul# a t351
    837       !t353 = sqr# t352
    838       !t354 = mul# a t353
    839       !t355 = sqr# t354
    840       !t356 = mul# a t355
    841       !t357 = sqr# t356
    842       !t358 = sqr# t357
    843       !t359 = mul# a t358
    844       !t360 = sqr# t359
    845       !t361 = mul# a t360
    846       !t362 = sqr# t361
    847       !t363 = mul# a t362
    848       !t364 = sqr# t363
    849       !t365 = mul# a t364
    850       !t366 = sqr# t365
    851       !t367 = mul# a t366
    852       !t368 = sqr# t367
    853       !t369 = mul# a t368
    854       !t370 = sqr# t369
    855       !t371 = mul# a t370
    856       !t372 = sqr# t371
    857       !t373 = mul# a t372
    858       !t374 = sqr# t373
    859       !t375 = sqr# t374
    860       !t376 = mul# a t375
    861       !t377 = sqr# t376
    862       !t378 = sqr# t377
    863       !t379 = sqr# t378
    864       !t380 = mul# a t379
    865       !t381 = sqr# t380
    866       !t382 = sqr# t381
    867       !t383 = sqr# t382
    868       !t384 = mul# a t383
    869       !t385 = sqr# t384
    870       !t386 = sqr# t385
    871       !t387 = mul# a t386
    872       !t388 = sqr# t387
    873       !t389 = mul# a t388
    874       !t390 = sqr# t389
    875       !t391 = mul# a t390
    876       !t392 = sqr# t391
    877       !t393 = mul# a t392
    878       !t394 = sqr# t393
    879       !t395 = sqr# t394
    880       !t396 = mul# a t395
    881       !t397 = sqr# t396
    882       !t398 = sqr# t397
    883       !t399 = sqr# t398
    884       !t400 = sqr# t399
    885       !t401 = mul# a t400
    886       !t402 = sqr# t401
    887       !t403 = mul# a t402
    888       !t404 = sqr# t403
    889       !t405 = sqr# t404
    890       !t406 = sqr# t405
    891       !t407 = mul# a t406
    892       !t408 = sqr# t407
    893       !t409 = mul# a t408
    894       !t410 = sqr# t409
    895       !t411 = sqr# t410
    896       !t412 = mul# a t411
    897       !t413 = sqr# t412
    898       !t414 = sqr# t413
    899       !t415 = sqr# t414
    900       !t416 = sqr# t415
    901       !t417 = sqr# t416
    902       !t418 = sqr# t417
    903       !t419 = sqr# t418
    904       !t420 = mul# a t419
    905       !t421 = sqr# t420
    906       !t422 = mul# a t421
    907       !t423 = sqr# t422
    908       !t424 = sqr# t423
    909       !t425 = mul# a t424
    910       !t426 = sqr# t425
    911       !t427 = mul# a t426
    912       !t428 = sqr# t427
    913       !t429 = sqr# t428
    914       !t430 = sqr# t429
    915       !t431 = mul# a t430
    916       !t432 = sqr# t431
    917       !t433 = sqr# t432
    918       !t434 = sqr# t433
    919       !t435 = sqr# t434
    920       !t436 = sqr# t435
    921       !t437 = sqr# t436
    922       !t438 = mul# a t437
    923       !t439 = sqr# t438
    924       !t440 = sqr# t439
    925       !t441 = sqr# t440
    926       !t442 = mul# a t441
    927       !t443 = sqr# t442
    928       !t444 = mul# a t443
    929       !t445 = sqr# t444
    930       !t446 = mul# a t445
    931       !t447 = sqr# t446
    932       !t448 = mul# a t447
    933       !t449 = sqr# t448
    934       !t450 = mul# a t449
    935       !t451 = sqr# t450
    936       !t452 = mul# a t451
    937       !r = t452
    938   in  r
    939 {-# INLINE inv# #-}
    940 
    941 -- | Multiplicative inverse in the Montgomery domain.
    942 --
    943 --   >> inv 2
    944 --   57896044618658097711785492504343953926418782139537452191302581570759080747169
    945 --   >> inv 2 * 2
    946 --   1
    947 inv
    948   :: Montgomery -- ^ argument
    949   -> Montgomery -- ^ inverse
    950 inv (Montgomery w) = Montgomery (inv# w)
    951 
    952 -- | Exponentiation in the Montgomery domain.
    953 --
    954 --   >>> exp 2 3
    955 --   8
    956 --   >>> exp 2 10
    957 --   1024
    958 exp :: Montgomery -> Wider -> Montgomery
    959 exp (Montgomery b) (Wider e) = Montgomery (exp# b e)
    960 
    961 exp#
    962   :: Limb4
    963   -> Limb4
    964   -> Limb4
    965 exp# b e =
    966   let !o = L4 0x402DA1732FC9BEBF## 0x4551231950B75FC4##
    967               0x0000000000000001## 0x0000000000000000##
    968       loop !r !m !ex n = case n of
    969         0 -> r
    970         _ ->
    971           let !(# ne, bit #) = WW.shr1_c# ex
    972               !candidate = mul# r m
    973               !nr = select# r candidate bit
    974               !nm = sqr# m
    975           in  loop nr nm ne (n - 1)
    976   in  loop o b e (256 :: Word)
    977 {-# INLINE exp# #-}
    978 
    979 odd# :: Limb4 -> C.Choice
    980 odd# = WW.odd#
    981 {-# INLINE odd# #-}
    982 
    983 -- | Check if a 'Montgomery' value is odd.
    984 --
    985 --   Note that the comparison is performed in constant time, but we
    986 --   branch when converting to 'Bool'.
    987 --
    988 --   >>> odd 1
    989 --   True
    990 --   >>> odd 2
    991 --   False
    992 --   >>> Data.Word.Wider.odd (retr 3) -- parity is preserved
    993 --   True
    994 odd_vartime :: Montgomery -> Bool
    995 odd_vartime (Montgomery m) = C.decide (odd# m)
    996 
    997 -- constant-time selection ----------------------------------------------------
    998 
    999 select#
   1000   :: Limb4    -- ^ a
   1001   -> Limb4    -- ^ b
   1002   -> C.Choice -- ^ c
   1003   -> Limb4    -- ^ result
   1004 select# = WW.select#
   1005 {-# INLINE select# #-}
   1006 
   1007 -- | Return a if c is truthy, otherwise return b.
   1008 --
   1009 --   >>> import qualified Data.Choice as C
   1010 --   >>> select 0 1 (C.true# ())
   1011 --   1
   1012 select
   1013   :: Montgomery    -- ^ a
   1014   -> Montgomery    -- ^ b
   1015   -> C.Choice      -- ^ c
   1016   -> Montgomery    -- ^ result
   1017 select (Montgomery a) (Montgomery b) c = Montgomery (select# a b c)
   1018