Extended.hs (31350B)
1 {-# LANGUAGE BangPatterns #-} 2 {-# LANGUAGE DeriveGeneric #-} 3 {-# LANGUAGE MagicHash #-} 4 {-# LANGUAGE NumericUnderscores #-} 5 {-# LANGUAGE UnboxedTuples #-} 6 {-# LANGUAGE ViewPatterns #-} 7 8 -- | 9 -- Module: Data.Word.Extended 10 -- Copyright: (c) 2025 Jared Tobin 11 -- License: MIT 12 -- Maintainer: Jared Tobin <jared@ppad.tech> 13 -- 14 -- Large fixed-width words, complete with support for conversion, 15 -- comparison, bitwise operations, arithmetic, and modular arithmetic. 16 17 module Data.Word.Extended ( 18 Word256(..) 19 , zero 20 , one 21 22 -- * Conversion 23 , to_integer 24 , to_word256 25 26 -- * Comparison 27 , lt 28 , gt 29 , is_zero 30 31 -- * Bit Operations 32 , or 33 , and 34 , xor 35 36 -- * Arithmetic 37 , add 38 , sub 39 , mul 40 , div 41 , mod 42 43 -- for testing/benchmarking 44 , Word128(..) 45 , quotrem 46 , quot_r 47 , quotrem_r 48 , quotrem_by1 49 , rem_by1 50 , quotrem_2by1 51 , quotrem_knuth 52 , recip_2by1 53 , mul_c 54 , mul_c# 55 , umul_hop# 56 , umul_step# 57 , mul_512# 58 ) where 59 60 import Control.DeepSeq 61 import Control.Monad.Primitive 62 import Control.Monad.ST 63 import Data.Bits ((.|.), (.&.), (.<<.), (.>>.), (.^.)) 64 import qualified Data.Bits as B 65 import qualified Data.Primitive.PrimArray as PA 66 import GHC.Exts 67 import GHC.Generics 68 import GHC.Word 69 import Prelude hiding (div, mod, or, and, quot, rem) 70 import qualified Prelude (quot, rem) 71 72 fi :: (Integral a, Num b) => a -> b 73 fi = fromIntegral 74 {-# INLINE fi #-} 75 76 -- word256 -------------------------------------------------------------------- 77 78 -- | Little-endian Word256. 79 data Word256 = Word256 80 {-# UNPACK #-} !Word64 81 {-# UNPACK #-} !Word64 82 {-# UNPACK #-} !Word64 83 {-# UNPACK #-} !Word64 84 deriving (Eq, Generic) 85 86 instance NFData Word256 87 88 instance Show Word256 where 89 show = show . to_integer 90 91 -- utility words ------------------------------------------------------------ 92 93 data Word128 = P 94 {-# UNPACK #-} !Word64 95 {-# UNPACK #-} !Word64 96 deriving (Eq, Show, Generic) 97 98 instance NFData Word128 99 100 -- conversion ----------------------------------------------------------------- 101 102 -- | Convert a fixed-width 'Word256' into a variable-length 'Integer'. 103 -- 104 -- >>> let foo = to_integer (Word256 0x1 0x10 0x100 0x1000) 105 -- >>> foo 106 -- 25711008708143844408758505763390361887002166947932397379780609 107 to_integer :: Word256 -> Integer 108 to_integer (Word256 w0 w1 w2 w3) = 109 fi w3 .<<. 192 110 .|. fi w2 .<<. 128 111 .|. fi w1 .<<. 64 112 .|. fi w0 113 114 -- | Convert a fixed-width 'Word256' into a variable-length 'Integer'. 115 -- 116 -- >>> (\(Word256 l _ _ _) -> l) (to_word256 foo) 117 -- 1 118 to_word256 :: Integer -> Word256 119 to_word256 n = 120 let !mask64 = 2 ^ (64 :: Int) - 1 121 !w0 = fi (n .&. mask64) 122 !w1 = fi ((n .>>. 64) .&. mask64) 123 !w2 = fi ((n .>>. 128) .&. mask64) 124 !w3 = fi ((n .>>. 192) .&. mask64) 125 in Word256 w0 w1 w2 w3 126 127 -- comparison ----------------------------------------------------------------- 128 129 -- | Strict less-than comparison on 'Word256' values. 130 -- 131 -- >>> to_word256 0 `lt` to_word256 1 132 -- True 133 -- >>> to_word256 0 `lt` to_word256 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 134 -- True 135 lt :: Word256 -> Word256 -> Bool 136 lt (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) = 137 let !(P _ c0) = sub_b a0 b0 0 138 !(P _ c1) = sub_b a1 b1 c0 139 !(P _ c2) = sub_b a2 b2 c1 140 !(P _ c3) = sub_b a3 b3 c2 141 in c3 /= 0 142 143 -- | Strict greater-than comparison on 'Word256' values. 144 -- 145 -- >>> to_word256 0 `gt` to_word256 1 146 -- False 147 -- >>> to_word256 0 `gt` to_word256 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF 148 -- False 149 gt :: Word256 -> Word256 -> Bool 150 gt a b = lt b a 151 152 -- | Zero, as a 'Word256'. 153 zero :: Word256 154 zero = Word256 0 0 0 0 155 156 -- | One, as a 'Word256'. 157 one :: Word256 158 one = Word256 1 0 0 0 159 160 -- | Test if a 'Word256' value is zero. 161 is_zero :: Word256 -> Bool 162 is_zero w = w == zero 163 164 is_word64 :: Word256 -> Bool 165 is_word64 (Word256 _ a b c) = a == 0 && b == 0 && c == 0 166 167 -- bits ----------------------------------------------------------------------- 168 169 -- | Bitwise-or on 'Word256' values. 170 or :: Word256 -> Word256 -> Word256 171 or (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) = 172 Word256 (a0 .|. b0) (a1 .|. b1) (a2 .|. b2) (a3 .|. b3) 173 174 -- | Bitwise-and on 'Word256' values. 175 and :: Word256 -> Word256 -> Word256 176 and (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) = 177 Word256 (a0 .&. b0) (a1 .&. b1) (a2 .&. b2) (a3 .&. b3) 178 179 -- | Bitwise-xor on 'Word256' values. 180 xor :: Word256 -> Word256 -> Word256 181 xor (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) = 182 Word256 (a0 .^. b0) (a1 .^. b1) (a2 .^. b2) (a3 .^. b3) 183 184 -- addition, subtraction ------------------------------------------------------ 185 186 -- add-with-carry 187 add_c :: Word64 -> Word64 -> Word64 -> Word128 188 add_c (W64# a) (W64# b) (W64# c) = 189 let !(# s, n #) = add_c# a b c 190 in P (W64# s) (W64# n) 191 192 add_c# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #) 193 add_c# w64_0 w64_1 c = 194 let !s = plusWord64# (plusWord64# w64_0 w64_1) c 195 !n | isTrue# (orI# (ltWord64# s w64_0) (ltWord64# s w64_1)) = 1# 196 | otherwise = 0# 197 in (# s, wordToWord64# (int2Word# n) #) 198 {-# INLINE add_c# #-} 199 200 -- add with overflow 201 add_of# 202 :: (# Word64#, Word64#, Word64#, Word64# #) 203 -> (# Word64#, Word64#, Word64#, Word64# #) 204 -> (# Word64#, Word64#, Word64#, Word64#, Word64# #) 205 add_of# (# a0, a1, a2, a3 #) 206 (# b0, b1, b2, b3 #) = 207 let !(# s0, c0 #) = add_c# a0 b0 (wordToWord64# 0##) 208 !(# s1, c1 #) = add_c# a1 b1 c0 209 !(# s2, c2 #) = add_c# a2 b2 c1 210 !(# s3, c3 #) = add_c# a3 b3 c2 211 in (# s0, s1, s2, s3, c3 #) 212 {-# INLINE add_of# #-} 213 214 -- | Addition on 'Word256' values, with overflow. 215 -- 216 -- >>> to_word256 0xFFFFFFFFFF `add` to_word256 0xFFFFFF 217 -- 18446742974181146625 218 add :: Word256 -> Word256 -> Word256 219 add (Word256 (W64# a0) (W64# a1) (W64# a2) (W64# a3)) 220 (Word256 (W64# b0) (W64# b1) (W64# b2) (W64# b3)) = 221 let !(# c0, c1, c2, c3, _ #) = add_of# 222 (# a0, a1, a2, a3 #) 223 (# b0, b1, b2, b3 #) 224 in Word256 (W64# c0) (W64# c1) (W64# c2) (W64# c3) 225 226 -- subtract-with-borrow 227 sub_b :: Word64 -> Word64 -> Word64 -> Word128 228 sub_b (W64# wa) (W64# wb) (W64# b) = 229 let !(# d, n #) = sub_b# wa wb b 230 in P (W64# d) (W64# n) 231 232 sub_b# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #) 233 sub_b# w64_0 w64_1 b = 234 let !d = subWord64# (subWord64# w64_0 w64_1) b 235 !n | isTrue# (ltWord64# w64_0 (plusWord64# w64_1 b)) = wordToWord64# 1## 236 | otherwise = wordToWord64# 0## 237 in (# d, n #) 238 {-# INLINE sub_b# #-} 239 240 -- subtract-with-overflow 241 sub_of# 242 :: (# Word64#, Word64#, Word64#, Word64# #) 243 -> (# Word64#, Word64#, Word64#, Word64# #) 244 -> (# Word64#, Word64#, Word64#, Word64#, Word64# #) 245 sub_of# (# a0, a1, a2, a3 #) 246 (# b0, b1, b2, b3 #) = 247 let !(# s0, c0 #) = sub_b# a0 b0 (wordToWord64# 0##) 248 !(# s1, c1 #) = sub_b# a1 b1 c0 249 !(# s2, c2 #) = sub_b# a2 b2 c1 250 !(# s3, c3 #) = sub_b# a3 b3 c2 251 in (# s0, s1, s2, s3, c3 #) 252 {-# INLINE sub_of# #-} 253 254 -- | Subtraction on 'Word256' values. 255 -- 256 -- >>> to_word256 0xFFFFFFFFFF `sub` to_word256 0xFFFFFF 257 -- 1099494850560 258 sub :: Word256 -> Word256 -> Word256 259 sub (Word256 (W64# a0) (W64# a1) (W64# a2) (W64# a3)) 260 (Word256 (W64# b0) (W64# b1) (W64# b2) (W64# b3)) = 261 let !(# c0, c1, c2, c3, _ #) = sub_of# 262 (# a0, a1, a2, a3 #) 263 (# b0, b1, b2, b3 #) 264 in Word256 (W64# c0) (W64# c1) (W64# c2) (W64# c3) 265 266 -- multiplication ------------------------------------------------------------- 267 268 -- multiply-with-carry 269 mul_c :: Word64 -> Word64 -> Word128 270 mul_c (W64# x) (W64# y) = 271 let !(# hi, lo #) = mul_c# x y 272 in P (W64# hi) (W64# lo) 273 274 -- translated from Mul64 in go's math/bits package 275 mul_c# :: Word64# -> Word64# -> (# Word64#, Word64# #) 276 mul_c# x y = 277 let !mask32 = wordToWord64# 0xffffffff## 278 !x0 = and64# x mask32 279 !y0 = and64# y mask32 280 !x1 = uncheckedShiftRL64# x 32# 281 !y1 = uncheckedShiftRL64# y 32# 282 283 !w0 = timesWord64# x0 y0 284 !t = plusWord64# (timesWord64# x1 y0) (uncheckedShiftRL64# w0 32#) 285 !w1 = and64# t mask32 286 !w2 = uncheckedShiftRL64# t 32# 287 !w1_1 = plusWord64# w1 (timesWord64# x0 y1) 288 289 !hi = plusWord64# 290 (timesWord64# x1 y1) 291 (plusWord64# w2 (uncheckedShiftRL64# w1_1 32#)) 292 !lo = timesWord64# x y 293 in (# hi, lo #) 294 {-# INLINE mul_c# #-} 295 296 umul_hop# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #) 297 umul_hop# z x y = 298 let !(# hi_0, lo_0 #) = mul_c# x y 299 !(# lo, c #) = add_c# lo_0 z (wordToWord64# 0##) 300 !(# hi, _ #) = add_c# hi_0 (wordToWord64# 0##) c 301 in (# hi, lo #) 302 {-# INLINE umul_hop# #-} 303 304 umul_step# 305 :: Word64# 306 -> Word64# 307 -> Word64# 308 -> Word64# 309 -> (# Word64#, Word64# #) 310 umul_step# z x y c = 311 let !(# hi_0, lo_0 #) = mul_c# x y 312 !(# lo_1, c_0 #) = add_c# lo_0 c (wordToWord64# 0##) 313 !(# hi_1, _ #) = add_c# hi_0 (wordToWord64# 0##) c_0 314 !(# lo, c_1 #) = add_c# lo_1 z (wordToWord64# 0##) 315 !(# hi, _ #) = add_c# hi_1 (wordToWord64# 0##) c_1 316 in (# hi, lo #) 317 {-# INLINE umul_step# #-} 318 319 -- | Multiplication on 'Word256' values, with overflow. 320 -- 321 -- >>> to_word256 0xFFFFFFFFFF `mul` to_word256 0xFFFFFF 322 -- 18446742974181146625 323 mul :: Word256 -> Word256 -> Word256 324 mul (Word256 (W64# a0) (W64# a1) (W64# a2) (W64# a3)) 325 (Word256 (W64# b0) (W64# b1) (W64# b2) (W64# b3)) = 326 let !(# c0_0, s0 #) = mul_c# a0 b0 327 !(# c0_1, r0 #) = umul_hop# c0_0 a1 b0 328 !(# c0_2, r1 #) = umul_hop# c0_1 a2 b0 329 !(# c1_0, s1 #) = umul_hop# r0 a0 b1 330 !(# c1_1, r2 #) = umul_step# r1 a1 b1 c1_0 331 !(# c2, s2 #) = umul_hop# r2 a1 b1 332 !s3 = plusWord64# (timesWord64# a3 b0) 333 (plusWord64# (timesWord64# a2 b1) 334 (plusWord64# (timesWord64# a0 b3) 335 (plusWord64# (timesWord64# a1 b2) 336 (plusWord64# c0_2 (plusWord64# c1_1 c2))))) 337 in Word256 (W64# s0) (W64# s1) (W64# s2) (W64# s3) 338 339 -- full 256-bit x 256-bit -> 512-bit multiplication 340 mul_512# 341 :: (# Word64#, Word64#, Word64#, Word64# #) 342 -> (# Word64#, Word64#, Word64#, Word64# #) 343 -> (# Word64#, Word64#, Word64#, Word64#, Word64#, Word64#, Word64#, Word64# #) 344 mul_512# (# x0, x1, x2, x3 #) (# y0, y1, y2, y3 #) = 345 let !(# c4_0, r0 #) = mul_c# x0 y0 346 !(# c4_1, r0_1 #) = umul_hop# c4_0 x1 y0 347 !(# c4_2, r0_2 #) = umul_hop# c4_1 x2 y0 348 !(# c4, r0_3 #) = umul_hop# c4_2 x3 y0 349 350 !(# c5_0, r1 #) = umul_hop# r0_1 x0 y1 351 !(# c5_1, r1_2 #) = umul_step# r0_2 x1 y1 c5_0 352 !(# c5_2, r1_3 #) = umul_step# r0_3 x2 y1 c5_1 353 !(# c5, r1_4 #) = umul_step# c4 x3 y1 c5_2 354 355 !(# c6_0, r2 #) = umul_hop# r1_2 x0 y2 356 !(# c6_1, r2_3 #) = umul_step# r1_3 x1 y2 c6_0 357 !(# c6_2, r2_4 #) = umul_step# r1_4 x2 y2 c6_1 358 !(# c6, r2_5 #) = umul_step# c5 x3 y2 c6_2 359 360 !(# c7_0, r3 #) = umul_hop# r2_3 x0 y3 361 !(# c7_1, r4 #) = umul_step# r2_4 x1 y3 c7_0 362 !(# c7_2, r5 #) = umul_step# r2_5 x2 y3 c7_1 363 !(# r7, r6 #) = umul_step# c6 x3 y3 c7_2 364 in (# r0, r1, r2, r3, r4, r5, r6, r7 #) 365 366 -- division ------------------------------------------------------------------- 367 368 -- quotient, remainder of (hi, lo) divided by y 369 quotrem_r :: Word64 -> Word64 -> Word64 -> Word128 370 quotrem_r (W64# hi) (W64# lo) (W64# y) = 371 let !(# q, r #) = quotrem_r# hi lo y 372 in P (W64# q) (W64# r) 373 374 -- translated from Div64 in go's math/bits package 375 quotrem_r# :: Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #) 376 quotrem_r# hi lo y_0 377 | isTrue# (eqWord64# y_0 (wordToWord64# 0##)) = 378 error "ppad-fixed (quotrem_r): division by zero" 379 | isTrue# (leWord64# y_0 hi) = 380 error "ppad-fixed: overflow" 381 | isTrue# (eqWord64# hi (wordToWord64# 0##)) = 382 (# quotWord64# lo y_0, remWord64# lo y_0 #) 383 | otherwise = 384 let !s = int64ToInt# (word64ToInt64# (wordToWord64# (clz64# y_0))) 385 !y = uncheckedShiftL64# y_0 s 386 387 !yn1 = uncheckedShiftRL64# y 32# 388 !yn0 = and64# y mask32 389 !un32 = or64# 390 (uncheckedShiftL64# hi s) 391 (if (isTrue# (s ==# 0#)) 392 then wordToWord64# 0## 393 else uncheckedShiftRL64# lo (64# -# s)) 394 395 !un10 = uncheckedShiftL64# lo s 396 !un1 = uncheckedShiftRL64# un10 32# 397 !un0 = and64# un10 mask32 398 !q1 = quotWord64# un32 yn1 399 !rhat = subWord64# un32 (timesWord64# q1 yn1) 400 401 !q1_l = q_loop# q1 rhat yn0 yn1 un1 402 403 !un21 = subWord64# 404 (plusWord64# (timesWord64# un32 two32) un1) 405 (timesWord64# q1_l y) 406 !q0 = quotWord64# un21 yn1 407 !rhat_n = subWord64# un21 (timesWord64# q0 yn1) 408 409 !q0_l = q_loop# q0 rhat_n yn0 yn1 un0 410 411 !q = plusWord64# (timesWord64# q1_l two32) q0_l 412 !r = uncheckedShiftRL64# 413 (subWord64# 414 (plusWord64# (timesWord64# un21 two32) un0) 415 (timesWord64# q0_l y)) 416 s 417 in (# q, r #) 418 where 419 !two32 = wordToWord64# 0x100000000## 420 !mask32 = wordToWord64# 0x0ffffffff## 421 422 q_loop# !q_acc !rhat_acc !yn_0 !yn_1 !un = 423 let go# !qa !rha 424 | isTrue# (orI# 425 (geWord64# qa two32) 426 (gtWord64# 427 (timesWord64# qa yn_0) 428 (plusWord64# (timesWord64# two32 rha) un))) = 429 let !qn = subWord64# qa (wordToWord64# 1##) 430 !rhn = plusWord64# rha yn_1 431 in if isTrue# (geWord64# rhn two32) 432 then qn 433 else go# qn rhn 434 | otherwise = qa 435 in go# q_acc rhat_acc 436 {-# INLINE q_loop# #-} 437 {-# INLINE quotrem_r# #-} 438 439 -- same as quotrem_r, except only computes quotient 440 quot_r :: Word64 -> Word64 -> Word64 -> Word64 441 quot_r (W64# hi) (W64# lo) (W64# y) = 442 let !q = quot_r# hi lo y 443 in W64# q 444 445 quot_r# :: Word64# -> Word64# -> Word64# -> Word64# 446 quot_r# hi lo y_0 447 | isTrue# (eqWord64# y_0 (wordToWord64# 0##)) = 448 error "ppad-fixed (quot_r): division by zero" 449 | isTrue# (leWord64# y_0 hi) = 450 error "ppad-fixed: overflow" 451 | isTrue# (eqWord64# hi (wordToWord64# 0##)) = 452 quotWord64# lo y_0 453 | otherwise = 454 let !s = int64ToInt# (word64ToInt64# (wordToWord64# (clz64# y_0))) 455 !y = uncheckedShiftL64# y_0 s 456 457 !yn1 = uncheckedShiftRL64# y 32# 458 !yn0 = and64# y mask32 459 !un32 = or64# 460 (uncheckedShiftL64# hi s) 461 (if (isTrue# (s ==# 0#)) 462 then wordToWord64# 0## 463 else uncheckedShiftRL64# lo (64# -# s)) 464 !un10 = uncheckedShiftL64# lo s 465 !un1 = uncheckedShiftRL64# un10 32# 466 !un0 = and64# un10 mask32 467 !q1 = quotWord64# un32 yn1 468 !rhat = subWord64# un32 (timesWord64# q1 yn1) 469 470 !q1_l = q_loop# q1 rhat yn0 yn1 un1 471 472 !un21 = subWord64# 473 (plusWord64# (timesWord64# un32 two32) un1) 474 (timesWord64# q1_l y) 475 !q0 = quotWord64# un21 yn1 476 !rhat_n = subWord64# un21 (timesWord64# q0 yn1) 477 478 !q0_l = q_loop# q0 rhat_n yn0 yn1 un0 479 480 in plusWord64# (timesWord64# q1_l two32) q0_l 481 where 482 !two32 = wordToWord64# 0x100000000## 483 !mask32 = wordToWord64# 0x0ffffffff## 484 485 q_loop# !q_acc !rhat_acc !yn_0 !yn_1 !un = 486 let go# !qa !rha 487 | isTrue# (orI# 488 (geWord64# qa two32) 489 (gtWord64# 490 (timesWord64# qa yn_0) 491 (plusWord64# (timesWord64# two32 rha) un))) = 492 let !qn = subWord64# qa (wordToWord64# 1##) 493 !rhn = plusWord64# rha yn_1 494 in if isTrue# (geWord64# rhn two32) 495 then qn 496 else go# qn rhn 497 | otherwise = qa 498 in go# q_acc rhat_acc 499 {-# INLINE q_loop# #-} 500 {-# INLINE quot_r# #-} 501 502 -- XX these 2by1 names suck 503 504 -- quotient and remainder of (hi, lo) divided by d, using reciprocal 505 quotrem_2by1 :: Word64 -> Word64 -> Word64 -> Word64 -> Word128 506 quotrem_2by1 (W64# uh) (W64# ul) (W64# d) (W64# rec) = 507 let !(# q, r #) = quotrem_2by1# uh ul d rec 508 in P (W64# q) (W64# r) 509 510 quotrem_2by1# 511 :: Word64# -> Word64# -> Word64# -> Word64# -> (# Word64#, Word64# #) 512 quotrem_2by1# uh ul d rec = 513 let !(# qh_0, ql #) = mul_c# rec uh 514 !(# ql_0, c #) = add_c# ql ul (wordToWord64# 0##) 515 !(# qh_1_l, _ #) = add_c# qh_0 uh c 516 !qh_1 = plusWord64# qh_1_l (wordToWord64# 1##) 517 !r = subWord64# ul (timesWord64# qh_1 d) 518 519 !(# qh_y, r_y #) 520 | isTrue# (geWord64# r ql_0) = (# qh_1_l, plusWord64# r d #) 521 | otherwise = (# qh_1, r #) 522 523 in if isTrue# (geWord64# r_y d) 524 then (# plusWord64# qh_y (wordToWord64# 1##), subWord64# r_y d #) 525 else (# qh_y, r_y #) 526 {-# INLINE quotrem_2by1# #-} 527 528 recip_2by1 :: Word64 -> Word64 529 recip_2by1 (W64# d) = W64# (recip_2by1# d) 530 531 recip_2by1# :: Word64# -> Word64# 532 recip_2by1# d = quot_r# (not64# d) (wordToWord64# 0xffffffffffffffff##) d 533 {-# INLINE recip_2by1# #-} 534 535 -- quotient and remainder of variable-length u divided by 64-bit d 536 quotrem_by1 537 :: PrimMonad m 538 => PA.MutablePrimArray (PrimState m) Word64 -- quotient 539 -> PA.PrimArray Word64 -- variable-length dividend 540 -> Word64 -- divisor 541 -> m Word64 -- remainder 542 quotrem_by1 q u d = do 543 let !rec = recip_2by1 d 544 loop !j !hj 545 | j < 0 = pure hj 546 | otherwise = do 547 let !lj = PA.indexPrimArray u j 548 !(P qj rj) = quotrem_2by1 hj lj d rec 549 PA.writePrimArray q j qj 550 loop (j - 1) rj 551 !l = PA.sizeofPrimArray u 552 !hl = PA.indexPrimArray u (l - 1) 553 loop (l - 2) hl 554 555 -- remainder of variable-length u divided by 64-bit d 556 rem_by1 557 :: PA.PrimArray Word64 -- variable-length dividend 558 -> Word64 -- divisor 559 -> Word64 -- remainder 560 rem_by1 u d = do 561 let !rec = recip_2by1 d 562 loop !j !hj 563 | j < 0 = hj 564 | otherwise = do 565 let !lj = PA.indexPrimArray u j 566 !(P _ rj) = quotrem_2by1 hj lj d rec 567 loop (j - 1) rj 568 !l = PA.sizeofPrimArray u 569 !hl = PA.indexPrimArray u (l - 1) 570 loop (l - 2) hl 571 572 -- x =- y * m 573 -- requires (len x - x_offset) >= len y > 0 574 sub_mul 575 :: PrimMonad m 576 => PA.MutablePrimArray (PrimState m) Word64 577 -> Int 578 -> PA.PrimArray Word64 579 -> Int 580 -> Word64 581 -> m Word64 582 sub_mul x x_offset y l (W64# m) = do 583 let loop !j !borrow 584 | j == l = pure (W64# borrow) 585 | otherwise = do 586 !(W64# x_j) <- PA.readPrimArray x (j + x_offset) 587 let !(W64# y_j) = PA.indexPrimArray y j 588 let !(# s, carry1 #) = sub_b# x_j borrow (wordToWord64# 0##) 589 !(# ph, pl #) = mul_c# y_j m 590 !(# t, carry2 #) = sub_b# s pl (wordToWord64# 0##) 591 PA.writePrimArray x (j + x_offset) (W64# t) 592 loop (succ j) (plusWord64# (plusWord64# ph carry1) carry2) 593 loop 0 (wordToWord64# 0##) 594 595 -- x += y 596 add_to 597 :: PrimMonad m 598 => PA.MutablePrimArray (PrimState m) Word64 599 -> Int 600 -> PA.PrimArray Word64 601 -> Int 602 -> m Word64 603 add_to x x_offset y l = do 604 let loop !j !cacc 605 | j == l = pure cacc 606 | otherwise = do 607 xj <- PA.readPrimArray x (j + x_offset) 608 let yj = PA.indexPrimArray y j 609 !(P nex carry) = add_c xj yj cacc 610 PA.writePrimArray x (j + x_offset) nex 611 loop (succ j) carry 612 loop 0 0 613 614 -- knuth's algorithm 4.3.1 for variable-length division 615 -- divides normalized dividend by normalized divisor, writing quotient to 616 -- 'quo' and remainder to dividend 617 quotrem_knuth 618 :: PrimMonad m 619 => PA.MutablePrimArray (PrimState m) Word64 -- variable-length quotient 620 -> PA.MutablePrimArray (PrimState m) Word64 -- normalized dividend 621 -> Int -- size of normalize dividend 622 -> PA.PrimArray Word64 -- normalized divisor 623 -> m () 624 quotrem_knuth quo u ulen d = do 625 let !ld = PA.sizeofPrimArray d 626 !dh = PA.indexPrimArray d (ld - 1) 627 !dl = PA.indexPrimArray d (ld - 2) 628 !rec = recip_2by1 dh 629 loop !j 630 | j < 0 = pure () 631 | otherwise = do 632 !u2 <- PA.readPrimArray u (j + ld) 633 !u1 <- PA.readPrimArray u (j + ld - 1) 634 !u0 <- PA.readPrimArray u (j + ld - 2) 635 let !qhat 636 | u2 >= dh = 0xffff_ffff_ffff_ffff 637 | otherwise = 638 let !(P qh rh) = quotrem_2by1 u2 u1 dh rec 639 !(P ph pl) = mul_c qh dl 640 in if ph > rh || (ph == rh && pl > u0) 641 then qh - 1 642 else qh 643 644 !borrow <- sub_mul u j d ld qhat 645 PA.writePrimArray u (j + ld) (u2 - borrow) 646 if u2 < borrow 647 then do 648 -- rare case 649 let !qh = qhat - 1 650 r <- add_to u j d ld 651 PA.writePrimArray u (j + ld) r 652 PA.writePrimArray quo j qh 653 else 654 PA.writePrimArray quo j qhat 655 loop (pred j) 656 loop (ulen - ld - 1) 657 658 -- knuth's algorithm again, but only compute remainder 659 rem_knuth 660 :: PrimMonad m 661 => PA.MutablePrimArray (PrimState m) Word64 -- normalized dividend 662 -> Int -- size of normalize dividend 663 -> PA.PrimArray Word64 -- normalized divisor 664 -> m () 665 rem_knuth u ulen d = do 666 let !ld = PA.sizeofPrimArray d 667 !dh = PA.indexPrimArray d (ld - 1) 668 !dl = PA.indexPrimArray d (ld - 2) 669 !rec = recip_2by1 dh 670 loop !j 671 | j < 0 = pure () 672 | otherwise = do 673 !u2 <- PA.readPrimArray u (j + ld) 674 !u1 <- PA.readPrimArray u (j + ld - 1) 675 !u0 <- PA.readPrimArray u (j + ld - 2) 676 let !qhat 677 | u2 >= dh = 0xffff_ffff_ffff_ffff 678 | otherwise = 679 let !(P qh rh) = quotrem_2by1 u2 u1 dh rec 680 !(P ph pl) = mul_c qh dl 681 in if ph > rh || (ph == rh && pl > u0) 682 then qh - 1 683 else qh 684 685 !borrow <- sub_mul u j d ld qhat 686 PA.writePrimArray u (j + ld) (u2 - borrow) 687 if u2 < borrow 688 then do 689 -- rare case 690 r <- add_to u j d ld 691 PA.writePrimArray u (j + ld) r 692 else 693 pure () 694 loop (pred j) 695 loop (ulen - ld - 1) 696 697 normalized_dividend_length 698 :: PrimMonad m 699 => PA.MutablePrimArray (PrimState m) Word64 -- dividend 700 -> m Int 701 normalized_dividend_length u = do 702 !lu <- PA.getSizeofMutablePrimArray u 703 let loop !j 704 | j < 0 = pure 0 705 | otherwise = do 706 uj <- PA.readPrimArray u j 707 if uj /= 0 then pure (j + 1) else loop (j - 1) 708 loop (lu - 2) -- last word will be uninitialized, skip it 709 {-# INLINE normalized_dividend_length #-} 710 711 -- normalize 256-bit divisor 712 normalize_divisor 713 :: PrimMonad m 714 => Word256 715 -> m (PA.PrimArray Word64, Int, Int, Word64) -- XX more efficient 716 normalize_divisor (Word256 d0 d1 d2 d3) = do 717 let (dlen, d_last, shift) 718 | d3 /= 0 = (4, d3, B.countLeadingZeros d3) 719 | d2 /= 0 = (3, d2, B.countLeadingZeros d2) 720 | d1 /= 0 = (2, d1, B.countLeadingZeros d1) 721 | d0 /= 0 = (1, d0, B.countLeadingZeros d0) 722 | otherwise = error "ppad-fixed (normalize): invalid 256-bit word" 723 dn <- PA.newPrimArray dlen 724 case dlen of 725 4 -> do 726 PA.writePrimArray dn 3 d3 727 PA.writePrimArray dn 2 d2 728 PA.writePrimArray dn 1 d1 729 PA.writePrimArray dn 0 d0 730 3 -> do 731 PA.writePrimArray dn 2 d2 732 PA.writePrimArray dn 1 d1 733 PA.writePrimArray dn 0 d0 734 2 -> do 735 PA.writePrimArray dn 1 d1 736 PA.writePrimArray dn 0 d0 737 _ -> do 738 PA.writePrimArray dn 0 d0 739 let norm !j !dj 740 | j == 0 = do 741 let !dn_0 = dj .<<. shift 742 PA.writePrimArray dn 0 dn_0 743 pure dn_0 744 | otherwise = do 745 dj_1 <- PA.readPrimArray dn (j - 1) 746 PA.writePrimArray dn j $ 747 (dj .<<. shift) .|. (dj_1 .>>. (64 - shift)) 748 norm (j - 1) dj_1 749 dn_0 <- norm (dlen - 1) d_last 750 d_final <- PA.unsafeFreezePrimArray dn 751 pure (d_final, dlen, shift, dn_0) 752 {-# INLINE normalize_divisor #-} 753 754 -- normalize variable-length dividend 755 normalize_dividend 756 :: PrimMonad m 757 => PA.MutablePrimArray (PrimState m) Word64 758 -> Int 759 -> Int 760 -> m () 761 normalize_dividend u ulen s = do 762 u_hi <- PA.readPrimArray u (ulen - 1) 763 PA.writePrimArray u ulen (u_hi .>>. (64 - s)) 764 let loop !j !uj 765 | j == 0 = 766 PA.writePrimArray u 0 (uj .<<. s) 767 | otherwise = do 768 !uj_1 <- PA.readPrimArray u (j - 1) 769 PA.writePrimArray u j $ 770 (uj .<<. s) .|. (uj_1 .>>. (64 - s)) 771 loop (j - 1) uj_1 772 loop (ulen - 1) u_hi 773 {-# INLINE normalize_dividend #-} 774 775 -- quotient and remainder of variable-length u divided by variable-length d 776 quotrem 777 :: PrimMonad m 778 => PA.MutablePrimArray (PrimState m) Word64 -- quotient (potentially large) 779 -> PA.MutablePrimArray (PrimState m) Word64 -- unnormalized dividend 780 -> Word256 -- unnormalized divisor 781 -> m (PA.PrimArray Word64) -- remainder (256-bit) 782 quotrem quo u d = do 783 -- normalize divisor 784 !(dn, dlen, shift, dn_0) <- normalize_divisor d 785 -- get size of normalized dividend 786 !ulen <- normalized_dividend_length u 787 if ulen < dlen 788 then PA.freezePrimArray u 0 4 789 else do 790 -- normalize dividend 791 normalize_dividend u ulen shift 792 if dlen == 1 793 then do 794 -- normalized divisor is small 795 !un <- PA.unsafeFreezePrimArray u 796 !r <- quotrem_by1 quo un dn_0 797 pure $ PA.primArrayFromList [r .>>. shift, 0, 0, 0] -- XX 798 else do 799 -- quotrem of normalized dividend divided by normalized divisor 800 quotrem_knuth quo u (ulen + 1) dn 801 -- unnormalize remainder 802 let unn_rem !j !unj 803 | j == dlen = do 804 PA.unsafeFreezePrimArray u 805 | j + 1 == ulen = do 806 PA.writePrimArray u j (unj .>>. shift) 807 PA.unsafeFreezePrimArray u 808 | otherwise = do 809 !unj_1 <- PA.readPrimArray u (j + 1) 810 PA.writePrimArray u j $ 811 (unj .>>. shift) .|. (unj_1 .<<. (64 - shift)) 812 unn_rem (j + 1) unj_1 813 814 !un_0 <- PA.readPrimArray u 0 815 unn_rem 0 un_0 816 {-# INLINE quotrem #-} 817 818 -- quotient of u variable-length u divided by variable-length d 819 quot 820 :: PrimMonad m 821 => PA.MutablePrimArray (PrimState m) Word64 -- quotient 822 -> PA.MutablePrimArray (PrimState m) Word64 -- unnormalized dividend 823 -> Word256 -- unnormalized divisor 824 -> m Int -- length of quotient 825 quot quo u d = do 826 -- normalize divisor 827 !(dn, dlen, shift, dn_0) <- normalize_divisor d 828 -- get size of normalized dividend 829 !ulen <- normalized_dividend_length u 830 if ulen < dlen 831 then pure 0 832 else do 833 normalize_dividend u ulen shift 834 if dlen == 1 835 then do 836 -- normalized divisor is small 837 !un <- PA.unsafeFreezePrimArray u 838 _ <- quotrem_by1 quo un dn_0 839 pure ulen 840 else do 841 quotrem_knuth quo u (ulen + 1) dn 842 pure (ulen + 1 - dlen) 843 {-# INLINE quot #-} 844 845 -- remainder of variable-length u divided by variable-length d 846 rem 847 :: PrimMonad m 848 => PA.MutablePrimArray (PrimState m) Word64 -- quotient (potentially large) 849 -> PA.MutablePrimArray (PrimState m) Word64 -- unnormalized dividend 850 -> Word256 -- unnormalized divisor 851 -> m (PA.PrimArray Word64) -- remainder (256-bit) 852 rem quo u d = do 853 -- normalize divisor 854 !(dn, dlen, shift, dn_0) <- normalize_divisor d 855 -- get size of normalized dividend 856 !ulen <- normalized_dividend_length u 857 if ulen < dlen 858 then PA.freezePrimArray u 0 4 859 else do 860 -- normalize dividend 861 normalize_dividend u ulen shift 862 if dlen == 1 863 then do 864 -- normalized divisor is small 865 !un <- PA.unsafeFreezePrimArray u 866 !r <- quotrem_by1 quo un dn_0 867 pure $ PA.primArrayFromList [r .>>. shift, 0, 0, 0] -- XX 868 else do 869 -- quotrem of normalized dividend divided by normalized divisor 870 rem_knuth u (ulen + 1) dn 871 -- unnormalize remainder 872 let unn_rem !j !unj 873 | j == dlen = do 874 PA.unsafeFreezePrimArray u 875 | j + 1 == ulen = do 876 PA.writePrimArray u j (unj .>>. shift) 877 PA.unsafeFreezePrimArray u 878 | otherwise = do 879 !unj_1 <- PA.readPrimArray u (j + 1) 880 PA.writePrimArray u j $ 881 (unj .>>. shift) .|. (unj_1 .<<. (64 - shift)) 882 unn_rem (j + 1) unj_1 883 884 !un_0 <- PA.readPrimArray u 0 885 unn_rem 0 un_0 886 {-# INLINE rem #-} 887 888 -- | Division on 'Word256' values. 889 -- 890 -- >>> to_word256 0xFFFFFFFFFF `div` to_word256 0xFFFFFF 891 -- 65536 892 div :: Word256 -> Word256 -> Word256 893 div u@(Word256 u0 u1 u2 u3) d@(Word256 d0 _ _ _) 894 | is_zero d || d `gt` u = zero -- ? 895 | u == d = one 896 | is_word64 u = Word256 (u0 `Prelude.quot` d0) 0 0 0 897 | otherwise = runST $ do 898 -- allocate quotient 899 quo <- PA.newPrimArray 4 900 -- allocate dividend, leaving enough space for normalization 901 u_arr <- PA.newPrimArray 5 902 PA.writePrimArray u_arr 0 u0 903 PA.writePrimArray u_arr 1 u1 904 PA.writePrimArray u_arr 2 u2 905 PA.writePrimArray u_arr 3 u3 906 -- last index of u_hot intentionally unset 907 l <- quot quo u_arr d 908 case l of 909 1 -> do 910 q0 <- PA.readPrimArray quo 0 911 pure (Word256 q0 0 0 0) 912 2 -> do 913 q0 <- PA.readPrimArray quo 0 914 q1 <- PA.readPrimArray quo 1 915 pure (Word256 q0 q1 0 0) 916 3 -> do 917 q0 <- PA.readPrimArray quo 0 918 q1 <- PA.readPrimArray quo 1 919 q2 <- PA.readPrimArray quo 2 920 pure (Word256 q0 q1 q2 0) 921 4 -> do 922 q0 <- PA.readPrimArray quo 0 923 q1 <- PA.readPrimArray quo 1 924 q2 <- PA.readPrimArray quo 2 925 q3 <- PA.readPrimArray quo 3 926 pure (Word256 q0 q1 q2 q3) 927 _ -> error "ppad-fixed (quot): invalid quotient length" 928 929 -- | Modulo operation on 'Word256' values. 930 -- 931 -- >>> to_word256 0xFFFFFFFFFF `mod` to_word256 0xFFFFFF 932 -- 65535 933 mod :: Word256 -> Word256 -> Word256 934 mod u@(Word256 u0 u1 u2 u3) d@(Word256 d0 _ _ _) 935 | is_zero d || d `gt` u = zero -- ? 936 | u == d = one 937 | is_word64 u = Word256 (u0 `Prelude.rem` d0) 0 0 0 938 | otherwise = runST $ do 939 -- allocate quotient 940 quo <- PA.newPrimArray 4 941 -- allocate dividend, leaving enough space for normalization 942 u_hot <- PA.newPrimArray 5 943 PA.writePrimArray u_hot 0 u0 944 PA.writePrimArray u_hot 1 u1 945 PA.writePrimArray u_hot 2 u2 946 PA.writePrimArray u_hot 3 u3 947 -- last index of u_hot intentionally unset 948 r <- rem quo u_hot d 949 let r0 = PA.indexPrimArray r 0 950 r1 = PA.indexPrimArray r 1 951 r2 = PA.indexPrimArray r 2 952 r3 = PA.indexPrimArray r 3 953 pure (Word256 r0 r1 r2 r3)