fixed

Pure Haskell large fixed-width integers.
git clone git://git.ppad.tech/fixed.git
Log | Files | Refs | README | LICENSE

Extended.hs (31350B)


      1 {-# LANGUAGE BangPatterns #-}
      2 {-# LANGUAGE DeriveGeneric #-}
      3 {-# LANGUAGE MagicHash #-}
      4 {-# LANGUAGE NumericUnderscores #-}
      5 {-# LANGUAGE UnboxedTuples #-}
      6 {-# LANGUAGE ViewPatterns #-}
      7 
      8 -- |
      9 -- Module: Data.Word.Extended
     10 -- Copyright: (c) 2025 Jared Tobin
     11 -- License: MIT
     12 -- Maintainer: Jared Tobin <jared@ppad.tech>
     13 --
     14 -- Large fixed-width words, complete with support for conversion,
     15 -- comparison, bitwise operations, arithmetic, and modular arithmetic.
     16 
     17 module Data.Word.Extended (
     18     Word256(..)
     19   , zero
     20   , one
     21 
     22   -- * Conversion
     23   , to_integer
     24   , to_word256
     25 
     26   -- * Comparison
     27   , lt
     28   , gt
     29   , is_zero
     30 
     31   -- * Bit Operations
     32   , or
     33   , and
     34   , xor
     35 
     36   -- * Arithmetic
     37   , add
     38   , sub
     39   , mul
     40   , div
     41   , mod
     42 
     43   -- for testing/benchmarking
     44   , Word128(..)
     45   , quotrem
     46   , quot_r
     47   , quotrem_r
     48   , quotrem_by1
     49   , rem_by1
     50   , quotrem_2by1
     51   , quotrem_knuth
     52   , recip_2by1
     53   , mul_c
     54   , mul_c#
     55   , umul_hop#
     56   , umul_step#
     57   , mul_512#
     58   ) where
     59 
     60 import Control.DeepSeq
     61 import Control.Monad.Primitive
     62 import Control.Monad.ST
     63 import Data.Bits ((.|.), (.&.), (.<<.), (.>>.), (.^.))
     64 import qualified Data.Bits as B
     65 import qualified Data.Primitive.PrimArray as PA
     66 import GHC.Exts
     67 import GHC.Generics
     68 import GHC.Word
     69 import Prelude hiding (div, mod, or, and, quot, rem)
     70 import qualified Prelude (quot, rem)
     71 
     72 fi :: (Integral a, Num b) => a -> b
     73 fi = fromIntegral
     74 {-# INLINE fi #-}
     75 
     76 -- word256 --------------------------------------------------------------------
     77 
     78 -- | Little-endian Word256.
     79 data Word256 = Word256
     80   {-# UNPACK #-} !Word64
     81   {-# UNPACK #-} !Word64
     82   {-# UNPACK #-} !Word64
     83   {-# UNPACK #-} !Word64
     84   deriving (Eq, Generic)
     85 
     86 instance NFData Word256
     87 
     88 instance Show Word256 where
     89   show = show . to_integer
     90 
     91 -- utility words ------------------------------------------------------------
     92 
     93 data Word128 = P
     94   {-# UNPACK #-} !Word64
     95   {-# UNPACK #-} !Word64
     96   deriving (Eq, Show, Generic)
     97 
     98 instance NFData Word128
     99 
    100 -- conversion -----------------------------------------------------------------
    101 
    102 -- | Convert a fixed-width 'Word256' into a variable-length 'Integer'.
    103 --
    104 --   >>> let foo = to_integer (Word256 0x1 0x10 0x100 0x1000)
    105 --   >>> foo
    106 --   25711008708143844408758505763390361887002166947932397379780609
    107 to_integer :: Word256 -> Integer
    108 to_integer (Word256 w0 w1 w2 w3) =
    109       fi w3 .<<. 192
    110   .|. fi w2 .<<. 128
    111   .|. fi w1 .<<. 64
    112   .|. fi w0
    113 
    114 -- | Convert a fixed-width 'Word256' into a variable-length 'Integer'.
    115 --
    116 --   >>> (\(Word256 l _ _ _) -> l) (to_word256 foo)
    117 --   1
    118 to_word256 :: Integer -> Word256
    119 to_word256 n =
    120   let !mask64 = 2 ^ (64 :: Int) - 1
    121       !w0 = fi (n .&. mask64)
    122       !w1 = fi ((n .>>. 64) .&. mask64)
    123       !w2 = fi ((n .>>. 128) .&. mask64)
    124       !w3 = fi ((n .>>. 192) .&. mask64)
    125   in  Word256 w0 w1 w2 w3
    126 
    127 -- comparison -----------------------------------------------------------------
    128 
    129 -- | Strict less-than comparison on 'Word256' values.
    130 --
    131 --   >>> to_word256 0 `lt` to_word256 1
    132 --   True
    133 --   >>> to_word256 0 `lt` to_word256 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
    134 --   True
    135 lt :: Word256 -> Word256 -> Bool
    136 lt (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
    137   let !(P _ c0) = sub_b a0 b0 0
    138       !(P _ c1) = sub_b a1 b1 c0
    139       !(P _ c2) = sub_b a2 b2 c1
    140       !(P _ c3) = sub_b a3 b3 c2
    141   in  c3 /= 0
    142 
    143 -- | Strict greater-than comparison on 'Word256' values.
    144 --
    145 --   >>> to_word256 0 `gt` to_word256 1
    146 --   False
    147 --   >>> to_word256 0 `gt` to_word256 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
    148 --   False
    149 gt :: Word256 -> Word256 -> Bool
    150 gt a b = lt b a
    151 
    152 -- | Zero, as a 'Word256'.
    153 zero :: Word256
    154 zero = Word256 0 0 0 0
    155 
    156 -- | One, as a 'Word256'.
    157 one :: Word256
    158 one = Word256 1 0 0 0
    159 
    160 -- | Test if a 'Word256' value is zero.
    161 is_zero :: Word256 -> Bool
    162 is_zero w = w == zero
    163 
    164 is_word64 :: Word256 -> Bool
    165 is_word64 (Word256 _ a b c) = a == 0 && b == 0 && c == 0
    166 
    167 -- bits -----------------------------------------------------------------------
    168 
    169 -- | Bitwise-or on 'Word256' values.
    170 or :: Word256 -> Word256 -> Word256
    171 or (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
    172   Word256 (a0 .|. b0) (a1 .|. b1) (a2 .|. b2) (a3 .|. b3)
    173 
    174 -- | Bitwise-and on 'Word256' values.
    175 and :: Word256 -> Word256 -> Word256
    176 and (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
    177   Word256 (a0 .&. b0) (a1 .&. b1) (a2 .&. b2) (a3 .&. b3)
    178 
    179 -- | Bitwise-xor on 'Word256' values.
    180 xor :: Word256 -> Word256 -> Word256
    181 xor (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
    182   Word256 (a0 .^. b0) (a1 .^. b1) (a2 .^. b2) (a3 .^. b3)
    183 
    184 -- addition, subtraction ------------------------------------------------------
    185 
    186 -- add-with-carry
    187 add_c :: Word64 -> Word64 -> Word64 -> Word128
    188 add_c (W64# a) (W64# b) (W64# c) =
    189   let !(# s, n #) = add_c# a b c
    190   in  P (W64# s) (W64# n)
    191 
    192 add_c# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #)
    193 add_c# w64_0 w64_1 c =
    194   let !s = plusWord64# (plusWord64# w64_0 w64_1) c
    195       !n | isTrue# (orI# (ltWord64# s w64_0) (ltWord64# s w64_1)) = 1#
    196          | otherwise = 0#
    197   in  (# s, wordToWord64# (int2Word# n) #)
    198 {-# INLINE add_c# #-}
    199 
    200 -- add with overflow
    201 add_of#
    202   :: (# Word64#, Word64#, Word64#, Word64# #)
    203   -> (# Word64#, Word64#, Word64#, Word64# #)
    204   -> (# Word64#, Word64#, Word64#, Word64#, Word64# #)
    205 add_of# (# a0, a1, a2, a3 #)
    206         (# b0, b1, b2, b3 #) =
    207   let !(# s0, c0 #) = add_c# a0 b0 (wordToWord64# 0##)
    208       !(# s1, c1 #) = add_c# a1 b1 c0
    209       !(# s2, c2 #) = add_c# a2 b2 c1
    210       !(# s3, c3 #) = add_c# a3 b3 c2
    211   in  (# s0, s1, s2, s3, c3 #)
    212 {-# INLINE add_of# #-}
    213 
    214 -- | Addition on 'Word256' values, with overflow.
    215 --
    216 --   >>> to_word256 0xFFFFFFFFFF `add` to_word256 0xFFFFFF
    217 --   18446742974181146625
    218 add :: Word256 -> Word256 -> Word256
    219 add (Word256 (W64# a0) (W64# a1) (W64# a2) (W64# a3))
    220      (Word256 (W64# b0) (W64# b1) (W64# b2) (W64# b3)) =
    221   let !(# c0, c1, c2, c3, _ #) = add_of#
    222         (# a0, a1, a2, a3 #)
    223         (# b0, b1, b2, b3 #)
    224   in  Word256 (W64# c0) (W64# c1) (W64# c2) (W64# c3)
    225 
    226 -- subtract-with-borrow
    227 sub_b :: Word64 -> Word64 -> Word64 -> Word128
    228 sub_b (W64# wa) (W64# wb) (W64# b) =
    229   let !(# d, n #) = sub_b# wa wb b
    230   in  P (W64# d) (W64# n)
    231 
    232 sub_b# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #)
    233 sub_b# w64_0 w64_1 b =
    234   let !d = subWord64# (subWord64# w64_0 w64_1) b
    235       !n | isTrue# (ltWord64# w64_0 (plusWord64# w64_1 b)) = wordToWord64# 1##
    236          | otherwise = wordToWord64# 0##
    237   in  (# d, n #)
    238 {-# INLINE sub_b# #-}
    239 
    240 -- subtract-with-overflow
    241 sub_of#
    242   :: (# Word64#, Word64#, Word64#, Word64# #)
    243   -> (# Word64#, Word64#, Word64#, Word64# #)
    244   -> (# Word64#, Word64#, Word64#, Word64#, Word64# #)
    245 sub_of# (# a0, a1, a2, a3 #)
    246         (# b0, b1, b2, b3 #) =
    247   let !(# s0, c0 #) = sub_b# a0 b0 (wordToWord64# 0##)
    248       !(# s1, c1 #) = sub_b# a1 b1 c0
    249       !(# s2, c2 #) = sub_b# a2 b2 c1
    250       !(# s3, c3 #) = sub_b# a3 b3 c2
    251   in  (# s0, s1, s2, s3, c3 #)
    252 {-# INLINE sub_of# #-}
    253 
    254 -- | Subtraction on 'Word256' values.
    255 --
    256 --   >>> to_word256 0xFFFFFFFFFF `sub` to_word256 0xFFFFFF
    257 --   1099494850560
    258 sub :: Word256 -> Word256 -> Word256
    259 sub (Word256 (W64# a0) (W64# a1) (W64# a2) (W64# a3))
    260      (Word256 (W64# b0) (W64# b1) (W64# b2) (W64# b3)) =
    261   let !(# c0, c1, c2, c3, _ #) = sub_of#
    262         (# a0, a1, a2, a3 #)
    263         (# b0, b1, b2, b3 #)
    264   in  Word256 (W64# c0) (W64# c1) (W64# c2) (W64# c3)
    265 
    266 -- multiplication -------------------------------------------------------------
    267 
    268 -- multiply-with-carry
    269 mul_c :: Word64 -> Word64 -> Word128
    270 mul_c (W64# x) (W64# y) =
    271   let !(# hi, lo #) = mul_c# x y
    272   in  P (W64# hi) (W64# lo)
    273 
    274 -- translated from Mul64 in go's math/bits package
    275 mul_c# :: Word64# -> Word64# -> (# Word64#, Word64# #)
    276 mul_c# x y =
    277   let !mask32 = wordToWord64# 0xffffffff##
    278       !x0 = and64# x mask32
    279       !y0 = and64# y mask32
    280       !x1 = uncheckedShiftRL64# x 32#
    281       !y1 = uncheckedShiftRL64# y 32#
    282 
    283       !w0   = timesWord64# x0 y0
    284       !t    = plusWord64# (timesWord64# x1 y0) (uncheckedShiftRL64# w0 32#)
    285       !w1   = and64# t mask32
    286       !w2   = uncheckedShiftRL64# t 32#
    287       !w1_1 = plusWord64# w1 (timesWord64# x0 y1)
    288 
    289       !hi = plusWord64#
    290         (timesWord64# x1 y1)
    291         (plusWord64# w2 (uncheckedShiftRL64# w1_1 32#))
    292       !lo = timesWord64# x y
    293   in  (# hi, lo #)
    294 {-# INLINE mul_c# #-}
    295 
    296 umul_hop# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #)
    297 umul_hop# z x y =
    298   let !(# hi_0, lo_0 #) = mul_c# x y
    299       !(# lo, c #)      = add_c# lo_0 z (wordToWord64# 0##)
    300       !(# hi, _ #)      = add_c# hi_0 (wordToWord64# 0##) c
    301   in  (# hi, lo #)
    302 {-# INLINE umul_hop# #-}
    303 
    304 umul_step#
    305   :: Word64#
    306   -> Word64#
    307   -> Word64#
    308   -> Word64#
    309   -> (# Word64#, Word64# #)
    310 umul_step# z x y c =
    311   let !(# hi_0, lo_0 #) = mul_c# x y
    312       !(# lo_1, c_0 #)  = add_c# lo_0 c (wordToWord64# 0##)
    313       !(# hi_1, _ #)    = add_c# hi_0 (wordToWord64# 0##) c_0
    314       !(# lo, c_1 #)    = add_c# lo_1 z (wordToWord64# 0##)
    315       !(# hi, _ #)      = add_c# hi_1 (wordToWord64# 0##) c_1
    316   in  (# hi, lo #)
    317 {-# INLINE umul_step# #-}
    318 
    319 -- | Multiplication on 'Word256' values, with overflow.
    320 --
    321 --   >>> to_word256 0xFFFFFFFFFF `mul` to_word256 0xFFFFFF
    322 --   18446742974181146625
    323 mul :: Word256 -> Word256 -> Word256
    324 mul (Word256 (W64# a0) (W64# a1) (W64# a2) (W64# a3))
    325     (Word256 (W64# b0) (W64# b1) (W64# b2) (W64# b3)) =
    326   let !(# c0_0, s0 #) = mul_c# a0 b0
    327       !(# c0_1, r0 #) = umul_hop# c0_0 a1 b0
    328       !(# c0_2, r1 #) = umul_hop# c0_1 a2 b0
    329       !(# c1_0, s1 #) = umul_hop# r0 a0 b1
    330       !(# c1_1, r2 #) = umul_step# r1 a1 b1 c1_0
    331       !(# c2, s2 #)   = umul_hop# r2 a1 b1
    332       !s3 = plusWord64# (timesWord64# a3 b0)
    333              (plusWord64# (timesWord64# a2 b1)
    334                (plusWord64# (timesWord64# a0 b3)
    335                  (plusWord64# (timesWord64# a1 b2)
    336                    (plusWord64# c0_2 (plusWord64# c1_1 c2)))))
    337   in  Word256 (W64# s0) (W64# s1) (W64# s2) (W64# s3)
    338 
    339 -- full 256-bit x 256-bit -> 512-bit multiplication
    340 mul_512#
    341   :: (# Word64#, Word64#, Word64#, Word64# #)
    342   -> (# Word64#, Word64#, Word64#, Word64# #)
    343   -> (# Word64#, Word64#, Word64#, Word64#, Word64#, Word64#, Word64#, Word64# #)
    344 mul_512# (# x0, x1, x2, x3 #) (# y0, y1, y2, y3 #) =
    345   let !(# c4_0,   r0 #) = mul_c#    x0 y0
    346       !(# c4_1, r0_1 #) = umul_hop# c4_0 x1 y0
    347       !(# c4_2, r0_2 #) = umul_hop# c4_1 x2 y0
    348       !(# c4,   r0_3 #) = umul_hop# c4_2 x3 y0
    349 
    350       !(# c5_0,   r1 #) = umul_hop#  r0_1 x0 y1
    351       !(# c5_1, r1_2 #) = umul_step# r0_2 x1 y1 c5_0
    352       !(# c5_2, r1_3 #) = umul_step# r0_3 x2 y1 c5_1
    353       !(# c5,   r1_4 #) = umul_step# c4 x3 y1 c5_2
    354 
    355       !(# c6_0,   r2 #) = umul_hop#  r1_2 x0 y2
    356       !(# c6_1, r2_3 #) = umul_step# r1_3 x1 y2 c6_0
    357       !(# c6_2, r2_4 #) = umul_step# r1_4 x2 y2 c6_1
    358       !(# c6,   r2_5 #) = umul_step# c5 x3 y2 c6_2
    359 
    360       !(# c7_0,   r3 #) = umul_hop#  r2_3 x0 y3
    361       !(# c7_1,   r4 #) = umul_step# r2_4 x1 y3 c7_0
    362       !(# c7_2,   r5 #) = umul_step# r2_5 x2 y3 c7_1
    363       !(# r7,     r6 #) = umul_step# c6 x3 y3 c7_2
    364   in  (# r0, r1, r2, r3, r4, r5, r6, r7 #)
    365 
    366 -- division -------------------------------------------------------------------
    367 
    368 -- quotient, remainder of (hi, lo) divided by y
    369 quotrem_r :: Word64 -> Word64 -> Word64 -> Word128
    370 quotrem_r (W64# hi) (W64# lo) (W64# y) =
    371   let !(# q, r #) = quotrem_r# hi lo y
    372   in  P (W64# q) (W64# r)
    373 
    374 -- translated from Div64 in go's math/bits package
    375 quotrem_r# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #)
    376 quotrem_r# hi lo y_0
    377     | isTrue# (eqWord64# y_0 (wordToWord64# 0##)) =
    378         error "ppad-fixed (quotrem_r): division by zero"
    379     | isTrue# (leWord64# y_0 hi) =
    380         error "ppad-fixed: overflow"
    381     | isTrue# (eqWord64# hi (wordToWord64# 0##)) =
    382         (# quotWord64# lo y_0, remWord64# lo y_0 #)
    383     | otherwise =
    384         let !s = int64ToInt# (word64ToInt64# (wordToWord64# (clz64# y_0)))
    385             !y = uncheckedShiftL64# y_0 s
    386 
    387             !yn1  = uncheckedShiftRL64# y 32#
    388             !yn0  = and64# y mask32
    389             !un32 = or64#
    390               (uncheckedShiftL64# hi s)
    391               (if   (isTrue# (s ==# 0#))
    392                then wordToWord64# 0##
    393                else uncheckedShiftRL64# lo (64# -# s))
    394 
    395             !un10 = uncheckedShiftL64# lo s
    396             !un1 = uncheckedShiftRL64# un10 32#
    397             !un0 = and64# un10 mask32
    398             !q1 = quotWord64# un32 yn1
    399             !rhat = subWord64# un32 (timesWord64# q1 yn1)
    400 
    401             !q1_l = q_loop# q1 rhat yn0 yn1 un1
    402 
    403             !un21 = subWord64#
    404               (plusWord64# (timesWord64# un32 two32) un1)
    405               (timesWord64# q1_l y)
    406             !q0 = quotWord64# un21 yn1
    407             !rhat_n = subWord64# un21 (timesWord64# q0 yn1)
    408 
    409             !q0_l = q_loop# q0 rhat_n yn0 yn1 un0
    410 
    411             !q = plusWord64# (timesWord64# q1_l two32) q0_l
    412             !r = uncheckedShiftRL64#
    413               (subWord64#
    414                 (plusWord64# (timesWord64# un21 two32) un0)
    415                 (timesWord64# q0_l y))
    416               s
    417         in  (# q, r #)
    418   where
    419     !two32  = wordToWord64# 0x100000000##
    420     !mask32 = wordToWord64# 0x0ffffffff##
    421 
    422     q_loop# !q_acc !rhat_acc !yn_0 !yn_1 !un =
    423       let go# !qa !rha
    424             | isTrue# (orI#
    425                 (geWord64# qa two32)
    426                 (gtWord64#
    427                   (timesWord64# qa yn_0)
    428                   (plusWord64# (timesWord64# two32 rha) un))) =
    429                 let !qn = subWord64# qa (wordToWord64# 1##)
    430                     !rhn = plusWord64# rha yn_1
    431                 in  if   isTrue# (geWord64# rhn two32)
    432                     then qn
    433                     else go# qn rhn
    434             | otherwise = qa
    435       in  go# q_acc rhat_acc
    436     {-# INLINE q_loop# #-}
    437 {-# INLINE quotrem_r# #-}
    438 
    439 -- same as quotrem_r, except only computes quotient
    440 quot_r :: Word64 -> Word64 -> Word64 -> Word64
    441 quot_r (W64# hi) (W64# lo) (W64# y) =
    442   let !q = quot_r# hi lo y
    443   in  W64# q
    444 
    445 quot_r# :: Word64# -> Word64# -> Word64# -> Word64#
    446 quot_r# hi lo y_0
    447     | isTrue# (eqWord64# y_0 (wordToWord64# 0##)) =
    448         error "ppad-fixed (quot_r): division by zero"
    449     | isTrue# (leWord64# y_0 hi) =
    450         error "ppad-fixed: overflow"
    451     | isTrue# (eqWord64# hi (wordToWord64# 0##)) =
    452         quotWord64# lo y_0
    453     | otherwise =
    454         let !s = int64ToInt# (word64ToInt64# (wordToWord64# (clz64# y_0)))
    455             !y = uncheckedShiftL64# y_0 s
    456 
    457             !yn1  = uncheckedShiftRL64# y 32#
    458             !yn0  = and64# y mask32
    459             !un32 = or64#
    460               (uncheckedShiftL64# hi s)
    461               (if   (isTrue# (s ==# 0#))
    462                then wordToWord64# 0##
    463                else uncheckedShiftRL64# lo (64# -# s))
    464             !un10 = uncheckedShiftL64# lo s
    465             !un1 = uncheckedShiftRL64# un10 32#
    466             !un0 = and64# un10 mask32
    467             !q1 = quotWord64# un32 yn1
    468             !rhat = subWord64# un32 (timesWord64# q1 yn1)
    469 
    470             !q1_l = q_loop# q1 rhat yn0 yn1 un1
    471 
    472             !un21 = subWord64#
    473               (plusWord64# (timesWord64# un32 two32) un1)
    474               (timesWord64# q1_l y)
    475             !q0 = quotWord64# un21 yn1
    476             !rhat_n = subWord64# un21 (timesWord64# q0 yn1)
    477 
    478             !q0_l = q_loop# q0 rhat_n yn0 yn1 un0
    479 
    480         in  plusWord64# (timesWord64# q1_l two32) q0_l
    481   where
    482     !two32  = wordToWord64# 0x100000000##
    483     !mask32 = wordToWord64# 0x0ffffffff##
    484 
    485     q_loop# !q_acc !rhat_acc !yn_0 !yn_1 !un =
    486       let go# !qa !rha
    487             | isTrue# (orI#
    488                 (geWord64# qa two32)
    489                 (gtWord64#
    490                   (timesWord64# qa yn_0)
    491                   (plusWord64# (timesWord64# two32 rha) un))) =
    492                 let !qn = subWord64# qa (wordToWord64# 1##)
    493                     !rhn = plusWord64# rha yn_1
    494                 in  if   isTrue# (geWord64# rhn two32)
    495                     then qn
    496                     else go# qn rhn
    497             | otherwise = qa
    498       in  go# q_acc rhat_acc
    499     {-# INLINE q_loop# #-}
    500 {-# INLINE quot_r# #-}
    501 
    502 -- XX these 2by1 names suck
    503 
    504 -- quotient and remainder of (hi, lo) divided by d, using reciprocal
    505 quotrem_2by1 :: Word64 -> Word64 -> Word64 -> Word64 -> Word128
    506 quotrem_2by1 (W64# uh) (W64# ul) (W64# d) (W64# rec) =
    507   let !(# q, r #) = quotrem_2by1# uh ul d rec
    508   in  P (W64# q) (W64# r)
    509 
    510 quotrem_2by1#
    511   :: Word64# -> Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #)
    512 quotrem_2by1# uh ul d rec =
    513   let !(# qh_0, ql #)  = mul_c# rec uh
    514       !(# ql_0, c #)   = add_c# ql ul (wordToWord64# 0##)
    515       !(# qh_1_l, _ #) = add_c# qh_0 uh c
    516       !qh_1            = plusWord64# qh_1_l (wordToWord64# 1##)
    517       !r               = subWord64# ul (timesWord64# qh_1 d)
    518 
    519       !(# qh_y, r_y #)
    520         | isTrue# (geWord64# r ql_0)  = (# qh_1_l, plusWord64# r d #)
    521         | otherwise = (# qh_1, r #)
    522 
    523   in  if   isTrue# (geWord64# r_y d)
    524       then (# plusWord64# qh_y (wordToWord64# 1##), subWord64# r_y d #)
    525       else (# qh_y, r_y #)
    526 {-# INLINE quotrem_2by1# #-}
    527 
    528 recip_2by1 :: Word64 -> Word64
    529 recip_2by1 (W64# d) = W64# (recip_2by1# d)
    530 
    531 recip_2by1# :: Word64# -> Word64#
    532 recip_2by1# d = quot_r# (not64# d) (wordToWord64# 0xffffffffffffffff##) d
    533 {-# INLINE recip_2by1# #-}
    534 
    535 -- quotient and remainder of variable-length u divided by 64-bit d
    536 quotrem_by1
    537   :: PrimMonad m
    538   => PA.MutablePrimArray (PrimState m) Word64 -- quotient
    539   -> PA.PrimArray Word64                      -- variable-length dividend
    540   -> Word64                                   -- divisor
    541   -> m Word64                                 -- remainder
    542 quotrem_by1 q u d = do
    543   let !rec = recip_2by1 d
    544       loop !j !hj
    545         | j < 0 = pure hj
    546         | otherwise = do
    547             let !lj = PA.indexPrimArray u j
    548                 !(P qj rj) = quotrem_2by1 hj lj d rec
    549             PA.writePrimArray q j qj
    550             loop (j - 1) rj
    551       !l  = PA.sizeofPrimArray u
    552       !hl = PA.indexPrimArray u (l - 1)
    553   loop (l - 2) hl
    554 
    555 -- remainder of variable-length u divided by 64-bit d
    556 rem_by1
    557   :: PA.PrimArray Word64                      -- variable-length dividend
    558   -> Word64                                   -- divisor
    559   -> Word64                                   -- remainder
    560 rem_by1 u d = do
    561   let !rec = recip_2by1 d
    562       loop !j !hj
    563         | j < 0 = hj
    564         | otherwise = do
    565             let !lj = PA.indexPrimArray u j
    566                 !(P _ rj) = quotrem_2by1 hj lj d rec
    567             loop (j - 1) rj
    568       !l  = PA.sizeofPrimArray u
    569       !hl = PA.indexPrimArray u (l - 1)
    570   loop (l - 2) hl
    571 
    572 -- x =- y * m
    573 -- requires (len x - x_offset) >= len y > 0
    574 sub_mul
    575   :: PrimMonad m
    576   => PA.MutablePrimArray (PrimState m) Word64
    577   -> Int
    578   -> PA.PrimArray Word64
    579   -> Int
    580   -> Word64
    581   -> m Word64
    582 sub_mul x x_offset y l (W64# m) = do
    583   let loop !j !borrow
    584         | j == l = pure (W64# borrow)
    585         | otherwise = do
    586             !(W64# x_j) <- PA.readPrimArray x (j + x_offset)
    587             let !(W64# y_j) = PA.indexPrimArray y j
    588             let !(# s, carry1 #) = sub_b# x_j borrow (wordToWord64# 0##)
    589                 !(# ph, pl #)    = mul_c# y_j m
    590                 !(# t, carry2 #) = sub_b# s pl (wordToWord64# 0##)
    591             PA.writePrimArray x (j + x_offset) (W64# t)
    592             loop (succ j) (plusWord64# (plusWord64# ph carry1) carry2)
    593   loop 0 (wordToWord64# 0##)
    594 
    595 -- x += y
    596 add_to
    597   :: PrimMonad m
    598   => PA.MutablePrimArray (PrimState m) Word64
    599   -> Int
    600   -> PA.PrimArray Word64
    601   -> Int
    602   -> m Word64
    603 add_to x x_offset y l = do
    604   let loop !j !cacc
    605         | j == l = pure cacc
    606         | otherwise = do
    607             xj <- PA.readPrimArray x (j + x_offset)
    608             let yj = PA.indexPrimArray y j
    609                 !(P nex carry) = add_c xj yj cacc
    610             PA.writePrimArray x (j + x_offset) nex
    611             loop (succ j) carry
    612   loop 0 0
    613 
    614 -- knuth's algorithm 4.3.1 for variable-length division
    615 -- divides normalized dividend by normalized divisor, writing quotient to
    616 -- 'quo' and remainder to dividend
    617 quotrem_knuth
    618   :: PrimMonad m
    619   => PA.MutablePrimArray (PrimState m) Word64  -- variable-length quotient
    620   -> PA.MutablePrimArray (PrimState m) Word64  -- normalized dividend
    621   -> Int                                       -- size of normalize dividend
    622   -> PA.PrimArray Word64                       -- normalized divisor
    623   -> m ()
    624 quotrem_knuth quo u ulen d = do
    625   let !ld = PA.sizeofPrimArray d
    626       !dh = PA.indexPrimArray d (ld - 1)
    627       !dl = PA.indexPrimArray d (ld - 2)
    628       !rec = recip_2by1 dh
    629       loop !j
    630         | j < 0 = pure ()
    631         | otherwise = do
    632             !u2 <- PA.readPrimArray u (j + ld)
    633             !u1 <- PA.readPrimArray u (j + ld - 1)
    634             !u0 <- PA.readPrimArray u (j + ld - 2)
    635             let !qhat
    636                   | u2 >= dh  = 0xffff_ffff_ffff_ffff
    637                   | otherwise =
    638                       let !(P qh rh) = quotrem_2by1 u2 u1 dh rec
    639                           !(P ph pl) = mul_c qh dl
    640                       in  if   ph > rh || (ph == rh && pl > u0)
    641                           then qh - 1
    642                           else qh
    643 
    644             !borrow <- sub_mul u j d ld qhat
    645             PA.writePrimArray u (j + ld) (u2 - borrow)
    646             if   u2 < borrow
    647             then do
    648               -- rare case
    649               let !qh = qhat - 1
    650               r <- add_to u j d ld
    651               PA.writePrimArray u (j + ld) r
    652               PA.writePrimArray quo j qh
    653             else
    654               PA.writePrimArray quo j qhat
    655             loop (pred j)
    656   loop (ulen - ld - 1)
    657 
    658 -- knuth's algorithm again, but only compute remainder
    659 rem_knuth
    660   :: PrimMonad m
    661   => PA.MutablePrimArray (PrimState m) Word64  -- normalized dividend
    662   -> Int                                       -- size of normalize dividend
    663   -> PA.PrimArray Word64                       -- normalized divisor
    664   -> m ()
    665 rem_knuth u ulen d = do
    666   let !ld = PA.sizeofPrimArray d
    667       !dh = PA.indexPrimArray d (ld - 1)
    668       !dl = PA.indexPrimArray d (ld - 2)
    669       !rec = recip_2by1 dh
    670       loop !j
    671         | j < 0 = pure ()
    672         | otherwise = do
    673             !u2 <- PA.readPrimArray u (j + ld)
    674             !u1 <- PA.readPrimArray u (j + ld - 1)
    675             !u0 <- PA.readPrimArray u (j + ld - 2)
    676             let !qhat
    677                   | u2 >= dh  = 0xffff_ffff_ffff_ffff
    678                   | otherwise =
    679                       let !(P qh rh) = quotrem_2by1 u2 u1 dh rec
    680                           !(P ph pl) = mul_c qh dl
    681                       in  if   ph > rh || (ph == rh && pl > u0)
    682                           then qh - 1
    683                           else qh
    684 
    685             !borrow <- sub_mul u j d ld qhat
    686             PA.writePrimArray u (j + ld) (u2 - borrow)
    687             if   u2 < borrow
    688             then do
    689               -- rare case
    690               r <- add_to u j d ld
    691               PA.writePrimArray u (j + ld) r
    692             else
    693               pure ()
    694             loop (pred j)
    695   loop (ulen - ld - 1)
    696 
    697 normalized_dividend_length
    698   :: PrimMonad m
    699   => PA.MutablePrimArray (PrimState m) Word64 -- dividend
    700   -> m Int
    701 normalized_dividend_length u = do
    702   !lu <- PA.getSizeofMutablePrimArray u
    703   let loop !j
    704         | j < 0 = pure 0
    705         | otherwise = do
    706             uj <- PA.readPrimArray u j
    707             if uj /= 0 then pure (j + 1) else loop (j - 1)
    708   loop (lu - 2) -- last word will be uninitialized, skip it
    709 {-# INLINE normalized_dividend_length #-}
    710 
    711 -- normalize 256-bit divisor
    712 normalize_divisor
    713   :: PrimMonad m
    714   => Word256
    715   -> m (PA.PrimArray Word64, Int, Int, Word64) -- XX more efficient
    716 normalize_divisor (Word256 d0 d1 d2 d3) = do
    717   let (dlen, d_last, shift)
    718         | d3 /= 0 = (4, d3, B.countLeadingZeros d3)
    719         | d2 /= 0 = (3, d2, B.countLeadingZeros d2)
    720         | d1 /= 0 = (2, d1, B.countLeadingZeros d1)
    721         | d0 /= 0 = (1, d0, B.countLeadingZeros d0)
    722         | otherwise = error "ppad-fixed (normalize): invalid 256-bit word"
    723   dn <- PA.newPrimArray dlen
    724   case dlen of
    725     4 -> do
    726       PA.writePrimArray dn 3 d3
    727       PA.writePrimArray dn 2 d2
    728       PA.writePrimArray dn 1 d1
    729       PA.writePrimArray dn 0 d0
    730     3 -> do
    731       PA.writePrimArray dn 2 d2
    732       PA.writePrimArray dn 1 d1
    733       PA.writePrimArray dn 0 d0
    734     2 -> do
    735       PA.writePrimArray dn 1 d1
    736       PA.writePrimArray dn 0 d0
    737     _ -> do
    738       PA.writePrimArray dn 0 d0
    739   let norm !j !dj
    740         | j == 0 = do
    741             let !dn_0 = dj .<<. shift
    742             PA.writePrimArray dn 0 dn_0
    743             pure dn_0
    744         | otherwise = do
    745             dj_1 <- PA.readPrimArray dn (j - 1)
    746             PA.writePrimArray dn j $
    747               (dj .<<. shift) .|. (dj_1 .>>. (64 - shift))
    748             norm (j - 1) dj_1
    749   dn_0 <- norm (dlen - 1) d_last
    750   d_final <- PA.unsafeFreezePrimArray dn
    751   pure (d_final, dlen, shift, dn_0)
    752 {-# INLINE normalize_divisor #-}
    753 
    754 -- normalize variable-length dividend
    755 normalize_dividend
    756   :: PrimMonad m
    757   => PA.MutablePrimArray (PrimState m) Word64
    758   -> Int
    759   -> Int
    760   -> m ()
    761 normalize_dividend u ulen s = do
    762   u_hi <- PA.readPrimArray u (ulen - 1)
    763   PA.writePrimArray u ulen (u_hi .>>. (64 - s))
    764   let loop !j !uj
    765         | j == 0 =
    766             PA.writePrimArray u 0 (uj .<<. s)
    767         | otherwise = do
    768             !uj_1 <- PA.readPrimArray u (j - 1)
    769             PA.writePrimArray u j $
    770               (uj .<<. s) .|. (uj_1 .>>. (64 - s))
    771             loop (j - 1) uj_1
    772   loop (ulen - 1) u_hi
    773 {-# INLINE normalize_dividend #-}
    774 
    775 -- quotient and remainder of variable-length u divided by variable-length d
    776 quotrem
    777   :: PrimMonad m
    778   => PA.MutablePrimArray (PrimState m) Word64 -- quotient (potentially large)
    779   -> PA.MutablePrimArray (PrimState m) Word64 -- unnormalized dividend
    780   -> Word256                                  -- unnormalized divisor
    781   -> m (PA.PrimArray Word64)                  -- remainder (256-bit)
    782 quotrem quo u d = do
    783   -- normalize divisor
    784   !(dn, dlen, shift, dn_0) <- normalize_divisor d
    785   -- get size of normalized dividend
    786   !ulen <- normalized_dividend_length u
    787   if   ulen < dlen
    788   then PA.freezePrimArray u 0 4
    789   else do
    790     -- normalize dividend
    791     normalize_dividend u ulen shift
    792     if   dlen == 1
    793     then do
    794      -- normalized divisor is small
    795       !un <- PA.unsafeFreezePrimArray u
    796       !r <- quotrem_by1 quo un dn_0
    797       pure $ PA.primArrayFromList [r .>>. shift, 0, 0, 0] -- XX
    798     else do
    799       -- quotrem of normalized dividend divided by normalized divisor
    800       quotrem_knuth quo u (ulen + 1) dn
    801       -- unnormalize remainder
    802       let unn_rem !j !unj
    803             | j == dlen = do
    804                 PA.unsafeFreezePrimArray u
    805             | j + 1 == ulen = do
    806                 PA.writePrimArray u j (unj .>>. shift)
    807                 PA.unsafeFreezePrimArray u
    808             | otherwise = do
    809                 !unj_1 <- PA.readPrimArray u (j + 1)
    810                 PA.writePrimArray u j $
    811                   (unj .>>. shift) .|. (unj_1 .<<. (64 - shift))
    812                 unn_rem (j + 1) unj_1
    813 
    814       !un_0 <- PA.readPrimArray u 0
    815       unn_rem 0 un_0
    816 {-# INLINE quotrem #-}
    817 
    818 -- quotient of u variable-length u divided by variable-length d
    819 quot
    820   :: PrimMonad m
    821   => PA.MutablePrimArray (PrimState m) Word64 -- quotient
    822   -> PA.MutablePrimArray (PrimState m) Word64 -- unnormalized dividend
    823   -> Word256                                  -- unnormalized divisor
    824   -> m Int                                    -- length of quotient
    825 quot quo u d = do
    826   -- normalize divisor
    827   !(dn, dlen, shift, dn_0) <- normalize_divisor d
    828   -- get size of normalized dividend
    829   !ulen <- normalized_dividend_length u
    830   if   ulen < dlen
    831   then pure 0
    832   else do
    833     normalize_dividend u ulen shift
    834     if   dlen == 1
    835     then do
    836      -- normalized divisor is small
    837       !un <- PA.unsafeFreezePrimArray u
    838       _ <- quotrem_by1 quo un dn_0
    839       pure ulen
    840     else do
    841       quotrem_knuth quo u (ulen + 1) dn
    842       pure (ulen + 1 - dlen)
    843 {-# INLINE quot #-}
    844 
    845 -- remainder of variable-length u divided by variable-length d
    846 rem
    847   :: PrimMonad m
    848   => PA.MutablePrimArray (PrimState m) Word64 -- quotient  (potentially large)
    849   -> PA.MutablePrimArray (PrimState m) Word64 -- unnormalized dividend
    850   -> Word256                                  -- unnormalized divisor
    851   -> m (PA.PrimArray Word64)                  -- remainder (256-bit)
    852 rem quo u d = do
    853   -- normalize divisor
    854   !(dn, dlen, shift, dn_0) <- normalize_divisor d
    855   -- get size of normalized dividend
    856   !ulen <- normalized_dividend_length u
    857   if   ulen < dlen
    858   then PA.freezePrimArray u 0 4
    859   else do
    860     -- normalize dividend
    861     normalize_dividend u ulen shift
    862     if   dlen == 1
    863     then do
    864      -- normalized divisor is small
    865       !un <- PA.unsafeFreezePrimArray u
    866       !r <- quotrem_by1 quo un dn_0
    867       pure $ PA.primArrayFromList [r .>>. shift, 0, 0, 0] -- XX
    868     else do
    869       -- quotrem of normalized dividend divided by normalized divisor
    870       rem_knuth u (ulen + 1) dn
    871       -- unnormalize remainder
    872       let unn_rem !j !unj
    873             | j == dlen = do
    874                 PA.unsafeFreezePrimArray u
    875             | j + 1 == ulen = do
    876                 PA.writePrimArray u j (unj .>>. shift)
    877                 PA.unsafeFreezePrimArray u
    878             | otherwise = do
    879                 !unj_1 <- PA.readPrimArray u (j + 1)
    880                 PA.writePrimArray u j $
    881                   (unj .>>. shift) .|. (unj_1 .<<. (64 - shift))
    882                 unn_rem (j + 1) unj_1
    883 
    884       !un_0 <- PA.readPrimArray u 0
    885       unn_rem 0 un_0
    886 {-# INLINE rem #-}
    887 
    888 -- | Division on 'Word256' values.
    889 --
    890 --   >>> to_word256 0xFFFFFFFFFF `div` to_word256 0xFFFFFF
    891 --   65536
    892 div :: Word256 -> Word256 -> Word256
    893 div u@(Word256 u0 u1 u2 u3) d@(Word256 d0 _ _ _)
    894   | is_zero d || d `gt` u = zero -- ?
    895   | u == d                = one
    896   | is_word64 u           = Word256 (u0 `Prelude.quot` d0) 0 0 0
    897   | otherwise = runST $ do
    898       -- allocate quotient
    899       quo <- PA.newPrimArray 4
    900       -- allocate dividend, leaving enough space for normalization
    901       u_arr <- PA.newPrimArray 5
    902       PA.writePrimArray u_arr 0 u0
    903       PA.writePrimArray u_arr 1 u1
    904       PA.writePrimArray u_arr 2 u2
    905       PA.writePrimArray u_arr 3 u3
    906       -- last index of u_hot intentionally unset
    907       l <- quot quo u_arr d
    908       case l of
    909         1 -> do
    910           q0 <- PA.readPrimArray quo 0
    911           pure (Word256 q0 0 0 0)
    912         2 -> do
    913           q0 <- PA.readPrimArray quo 0
    914           q1 <- PA.readPrimArray quo 1
    915           pure (Word256 q0 q1 0 0)
    916         3 -> do
    917           q0 <- PA.readPrimArray quo 0
    918           q1 <- PA.readPrimArray quo 1
    919           q2 <- PA.readPrimArray quo 2
    920           pure (Word256 q0 q1 q2 0)
    921         4 -> do
    922           q0 <- PA.readPrimArray quo 0
    923           q1 <- PA.readPrimArray quo 1
    924           q2 <- PA.readPrimArray quo 2
    925           q3 <- PA.readPrimArray quo 3
    926           pure (Word256 q0 q1 q2 q3)
    927         _ -> error "ppad-fixed (quot): invalid quotient length"
    928 
    929 -- | Modulo operation on 'Word256' values.
    930 --
    931 --   >>> to_word256 0xFFFFFFFFFF `mod` to_word256 0xFFFFFF
    932 --   65535
    933 mod :: Word256 -> Word256 -> Word256
    934 mod u@(Word256 u0 u1 u2 u3) d@(Word256 d0 _ _ _)
    935   | is_zero d || d `gt` u = zero -- ?
    936   | u == d                = one
    937   | is_word64 u           = Word256 (u0 `Prelude.rem` d0) 0 0 0
    938   | otherwise = runST $ do
    939       -- allocate quotient
    940       quo <- PA.newPrimArray 4
    941       -- allocate dividend, leaving enough space for normalization
    942       u_hot <- PA.newPrimArray 5
    943       PA.writePrimArray u_hot 0 u0
    944       PA.writePrimArray u_hot 1 u1
    945       PA.writePrimArray u_hot 2 u2
    946       PA.writePrimArray u_hot 3 u3
    947       -- last index of u_hot intentionally unset
    948       r <- rem quo u_hot d
    949       let r0 = PA.indexPrimArray r 0
    950           r1 = PA.indexPrimArray r 1
    951           r2 = PA.indexPrimArray r 2
    952           r3 = PA.indexPrimArray r 3
    953       pure (Word256 r0 r1 r2 r3)