commit 83dfead9e9cba75d7b927a61741df8bc28e20be8
parent fc6cae57cd0293ead91bbba9d250cb3e9901a1e2
Author: Jared Tobin <jared@jtobin.io>
Date: Sun, 26 Jan 2025 20:39:14 +0400
Revert "lib: switch to primarrays again"
This reverts commit 9c0401625cd96c6764076f044e3944db332639a3.
Diffstat:
5 files changed, 722 insertions(+), 242 deletions(-)
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -5,6 +5,7 @@ module Main where
import Criterion.Main
import Data.Bits ((.|.), (.&.), (.^.))
+import qualified Data.Bits as B
import qualified Data.Word.Extended as W
import Prelude hiding (or, and, div, mod)
import qualified Prelude (div)
@@ -133,6 +134,24 @@ mod = bench "mod (pure)" $ nf (W.mod w0) w1 where
!w1 = W.to_word256
0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06
+quotrem_by1 :: Benchmark
+quotrem_by1 =
+ bench "quotrem_by1" $
+ nf (W.quotrem_by1 (W.Word576 300 200 100 0 0 0 0 0 0) 3)
+ (B.complement 50)
+
+quotrem_knuth :: Benchmark
+quotrem_knuth =
+ bench "quotrem_knuth" $
+ nf (W.quotrem_knuth u 5 d) 4
+ where
+ !u = W.Word576
+ 2162362899639802732 8848548347662387477 13702897166684377657
+ 16799544643779908154 1 0 0 0 0
+ !d = W.Word256
+ 16950798510782491100 2612788699139816405
+ 5146719872810836952 14966148379609982000
+
arithmetic :: Benchmark
arithmetic = bgroup "arithmetic" [
add
@@ -172,7 +191,6 @@ bits = bgroup "bits" [
main :: IO ()
main = defaultMain [
- div
- , div_baseline
+ baseline_comparison
]
diff --git a/bench/Weight.hs b/bench/Weight.hs
@@ -5,6 +5,7 @@
module Main where
+import qualified Data.Bits as B
import qualified Data.Word.Extended as E
import qualified Weigh as W
@@ -25,13 +26,42 @@ w2 = E.to_word256 i2
w3 = E.to_word256 i3
main :: IO ()
-main = W.mainWith $ do
- W.func "add (baseline)" ((+) i0) i1
- W.func "add" (E.add w0) w1
- W.func "sub (baseline)" ((-) i0) i1
- W.func "sub" (E.sub w0) w1
- W.func "mul (baseline)" ((*) i0) i1
- W.func "mul" (E.mul w0) w1
- W.func "div (baseline)" (Prelude.div i2) i3
- W.func "div" (E.div w2) w3
+main = do
+ let !u0 = E.Word576 300 200 100 0 0 0 0 0 0
+ !u1 = E.Word576
+ 0x1234567890ABCDEF
+ 0xFEDCBA0987654321
+ 0x123456789ABCDEF0
+ 0 0 0 0 0 0
+
+ !u2 = E.Word576
+ 2162362899639802732
+ 8848548347662387477
+ 13702897166684377657
+ 16799544643779908154
+ 1 0 0 0 0
+
+ !d0 = B.complement 50
+
+ !d1 = E.Word256 0x0 0x0 0x1 0x100000000
+
+ !d2 = E.Word256
+ 16950798510782491100
+ 2612788699139816405
+ 5146719872810836952
+ 14966148379609982000
+
+ W.mainWith $ do
+ W.func "add (baseline)" ((+) i0) i1
+ W.func "add" (E.add w0) w1
+ W.func "sub (baseline)" ((-) i0) i1
+ W.func "sub" (E.sub w0) w1
+ W.func "mul (baseline)" ((*) i0) i1
+ W.func "mul" (E.mul w0) w1
+ W.func "div (baseline)" (Prelude.div i2) i3
+ W.func "div" (E.div w2) w3
+ W.func "quotrem_by1" (E.quotrem_by1 u0 3) d0
+ W.func "quotrem" (E.quotrem u1) d1
+ W.func "quotrem_knuth" (E.quotrem_knuth u2 5 d2) 4
+
diff --git a/lib/Data/Word/Extended.hs b/lib/Data/Word/Extended.hs
@@ -43,6 +43,10 @@ module Data.Word.Extended (
-- for testing/benchmarking
, Word128(..)
+ , Word576(..)
+ , Word640(..)
+ , Word832(..)
+ , Word1152(..)
, quotrem
, quotrem_r
, quotrem_by1
@@ -58,11 +62,8 @@ module Data.Word.Extended (
) where
import Control.DeepSeq
-import Control.Monad.Primitive
-import Control.Monad.ST
import Data.Bits ((.|.), (.&.), (.<<.), (.>>.), (.^.))
import qualified Data.Bits as B
-import qualified Data.Primitive.PrimArray as PA
import Data.Word (Word64)
import GHC.Generics
import Prelude hiding (div, mod, or, and)
@@ -86,6 +87,21 @@ instance NFData Word256
instance Show Word256 where
show = show . to_integer
+-- XX avoid
+sel256 :: Word256 -> Int -> Word64
+sel256 (Word256 a0 a1 a2 a3) = \case
+ 0 -> a0; 1 -> a1; 2 -> a2; 3 -> a3
+ _ -> error "ppad-fixed (sel256): bad index"
+
+-- XX avoid
+set256 :: Word256 -> Int -> Word64 -> Word256
+set256 (Word256 a0 a1 a2 a3) j wo = case j of
+ 0 -> Word256 wo a1 a2 a3
+ 1 -> Word256 a0 wo a2 a3
+ 2 -> Word256 a0 a1 wo a3
+ 3 -> Word256 a0 a1 a2 wo
+ _ -> error "ppad-fixed (set256): bad index"
+
-- | Little-endian Word512.
data Word512 = Word512
{-# UNPACK #-} !Word64
@@ -130,6 +146,30 @@ data Word576 = Word576
instance NFData Word576
+zero576 :: Word576
+zero576 = Word576 0 0 0 0 0 0 0 0 0
+
+-- XX avoid
+sel576 :: Word576 -> Int -> Word64
+sel576 (Word576 a0 a1 a2 a3 a4 a5 a6 a7 a8) = \case
+ 0 -> a0; 1 -> a1; 2 -> a2; 3 -> a3; 4 -> a4
+ 5 -> a5; 6 -> a6; 7 -> a7; 8 -> a8
+ _ -> error "ppad-fixed (sel576): bad index"
+
+-- XX avoid
+set576 :: Word576 -> Int -> Word64 -> Word576
+set576 (Word576 a0 a1 a2 a3 a4 a5 a6 a7 a8) j wo = case j of
+ 0 -> Word576 wo a1 a2 a3 a4 a5 a6 a7 a8
+ 1 -> Word576 a0 wo a2 a3 a4 a5 a6 a7 a8
+ 2 -> Word576 a0 a1 wo a3 a4 a5 a6 a7 a8
+ 3 -> Word576 a0 a1 a2 wo a4 a5 a6 a7 a8
+ 4 -> Word576 a0 a1 a2 a3 wo a5 a6 a7 a8
+ 5 -> Word576 a0 a1 a2 a3 a4 wo a6 a7 a8
+ 6 -> Word576 a0 a1 a2 a3 a4 a5 wo a7 a8
+ 7 -> Word576 a0 a1 a2 a3 a4 a5 a6 wo a8
+ 8 -> Word576 a0 a1 a2 a3 a4 a5 a6 a7 wo
+ _ -> error "ppad-fixed (set576): bad index"
+
data Word640 = Word640
{-# UNPACK #-} !Word576
{-# UNPACK #-} !Word64
@@ -410,192 +450,284 @@ mul_512 (Word256 x0 x1 x2 x3) (Word256 y0 y1 y2 y3) =
-- division -------------------------------------------------------------------
--- primitive -------------------------
-
--- x =- y * m
--- requires (len x - x_offset) >= len y > 0
-sub_mul
- :: PrimMonad m
- => PA.MutablePrimArray (PrimState m) Word64
- -> Int
- -> Word256
- -> Int
- -> Word64
- -> m Word64
-sub_mul x x_offset (Word256 y0 y1 y2 y3) l m = do
- let loop !j !borrow
- | j == l = pure borrow
- | otherwise = do
- !x_j <- PA.readPrimArray x (j + x_offset)
- let !(P s carry1) = sub_b x_j borrow 0
- !(P ph pl) = case j of
- 0 -> mul_c y0 m
- 1 -> mul_c y1 m
- 2 -> mul_c y2 m
- 3 -> mul_c y3 m
- _ -> error "ppad-fixed (sub_mul): bad index"
- !(P t carry2) = sub_b s pl 0
- PA.writePrimArray x (j + x_offset) t
- loop (succ j) (ph + carry1 + carry2)
- loop 0 0
-
-quotrem_by1
- :: PrimMonad m
- => PA.MutablePrimArray (PrimState m) Word64
- -> PA.PrimArray Word64
- -> Word64
- -> m Word64
-quotrem_by1 quo u d = do
- let !rec = recip_2by1 d
- !lu = PA.sizeofPrimArray u
- !r0 = PA.indexPrimArray u (lu - 1)
- loop !j !racc
- | j < 0 = pure racc
- | otherwise = do
- let uj = PA.indexPrimArray u j
- !(P qj rnex) = quotrem_2by1 racc uj d rec
- PA.writePrimArray quo j qj
- loop (pred j) rnex
- loop (lu - 2) r0
-
-add_to
- :: PrimMonad m
- => PA.MutablePrimArray (PrimState m) Word64
- -> Int
- -> Word256
- -> Int
- -> m Word64
-add_to x x_offset (Word256 y0 y1 y2 y3) l = do
- let loop !j !cacc
- | j == l = pure cacc
- | otherwise = do
- xj <- PA.readPrimArray x (j + x_offset)
- let !(P nex carry) = case j of
- 0 -> add_c xj y0 cacc
- 1 -> add_c xj y1 cacc
- 2 -> add_c xj y2 cacc
- 3 -> add_c xj y3 cacc
- _ -> error "ppad-fixed (add_to): bad index"
- PA.writePrimArray x (j + x_offset) nex
- loop (succ j) carry
- loop 0 0
-
-quotrem
- :: PrimMonad m
- => PA.MutablePrimArray (PrimState m) Word64 -- quotient (potentially large)
- -> PA.PrimArray Word64 -- dividend (potentially large)
- -> Word256 -- divisor (256-bit)
- -> m Word256 -- remainder (256-bit)
-quotrem quo u (Word256 d0 d1 d2 d3) = do
- let -- normalize divisor
- (dlen, shift)
- | d3 /= 0 = (4, B.countLeadingZeros d3)
- | d2 /= 0 = (3, B.countLeadingZeros d2)
- | d1 /= 0 = (2, B.countLeadingZeros d1)
- | otherwise = (1, B.countLeadingZeros d0) -- zero not checked
- dn_3 = (d3 .<<. shift) .|. (d2 .>>. (64 - shift))
- dn_2 = (d2 .<<. shift) .|. (d1 .>>. (64 - shift))
- dn_1 = (d1 .<<. shift) .|. (d0 .>>. (64 - shift))
- dn_0 = d0 .<<. shift
- !dn = Word256 dn_0 dn_1 dn_2 dn_3
- -- get size of normalized dividend
- lu = PA.sizeofPrimArray u
- ulen = let loop !j
- | j < 0 = 0
- | PA.indexPrimArray u j /= 0 = j + 1
- | otherwise = loop (j - 1)
- in loop (lu - 1)
- if ulen < dlen
- then do
- -- u always has size at least 4
- let u0 = PA.indexPrimArray u 0; u1 = PA.indexPrimArray u 1
- u2 = PA.indexPrimArray u 2; u3 = PA.indexPrimArray u 3
- pure (Word256 u0 u1 u2 u3)
- else do
- -- normalize dividend
- !un <- PA.newPrimArray (ulen + 1)
- PA.setPrimArray un 0 (ulen + 1) 0
- let u_hi = PA.indexPrimArray u (ulen - 1)
- PA.writePrimArray un ulen (u_hi .>>. (64 - shift))
- let normalize_u !j !uj
- | j == 0 =
- PA.writePrimArray un 0 (PA.indexPrimArray u 0 .<<. shift)
- | otherwise = do
- let !uj_1 = PA.indexPrimArray u (j - 1)
- !val = (uj .<<. shift) .|. (uj_1 .>>. (64 - shift))
- PA.writePrimArray un j val
- normalize_u (pred j) uj_1
- normalize_u (ulen - 1) u_hi
- if dlen == 1
- then do
- -- normalized divisor is small
- !un_final <- PA.unsafeFreezePrimArray un
- !r <- quotrem_by1 quo un_final dn_0
- pure (Word256 (r .>>. shift) 0 0 0)
- else do
- quotrem_knuth quo un dn dlen
- -- compute unnormalized remainder
- let unn_rem !j !(Word256 r0 r1 r2 r3) !un_j
- | j == dlen = pure $ case j of
- 2 -> Word256 r0 (un_j .>>. shift) r2 r3
- 3 -> Word256 r0 r1 (un_j .>>. shift) r3
- 4 -> Word256 r0 r1 r2 (un_j .>>. shift)
- _ -> error "ppad-fixed (quotrem): bad index"
- | otherwise = do
- un_j_1 <- PA.readPrimArray un (j + 1)
- let !unn_j = (un_j .>>. shift) .|. (un_j_1 .<<. (64 - shift))
- !nacc = case j of
- 0 -> Word256 unn_j 0 0 0
- 1 -> Word256 r0 unn_j 0 0
- 2 -> Word256 r0 r1 unn_j 0
- 3 -> Word256 r0 r1 r2 unn_j
- _ -> error "ppad-fixed (quotrem): bad index"
- unn_rem (j + 1) nacc un_j_1
- !un_0 <- PA.readPrimArray un 0
- unn_rem 0 zero un_0
-
-quotrem_knuth
- :: PrimMonad m
- => PA.MutablePrimArray (PrimState m) Word64 -- quotient (potentially large)
- -> PA.MutablePrimArray (PrimState m) Word64 -- normalized dividend
- -> Word256 -- normalized divisor
- -> Int -- words in normalized divisor
- -> m ()
-quotrem_knuth quo u d@(Word256 d0 d1 d2 d3) ld = do
- !lu <- PA.getSizeofMutablePrimArray u
- let (dh, dl) = case ld of
- 4 -> (d3, d2)
- 3 -> (d2, d1)
- 2 -> (d1, d0)
- _ -> error "ppad-fixed (quotrem_knuth): bad index"
- !rec = recip_2by1 dh
- loop !j
- | j < 0 = pure ()
- | otherwise = do
- !u2 <- PA.readPrimArray u (j + ld)
- !u1 <- PA.readPrimArray u (j + ld - 1)
- !u0 <- PA.readPrimArray u (j + ld - 2)
- let !qhat
- | u2 >= dh = 0xffff_ffff_ffff_ffff
- | otherwise =
- let !(P qh rh) = quotrem_2by1 u2 u1 dh rec
- !(P ph pl) = mul_c qh dl
- in if ph > rh || (ph == rh && pl > u0)
- then qh - 1
- else qh
-
- borrow <- sub_mul u j d ld qhat
- PA.writePrimArray u (j + ld) (u2 - borrow)
- if u2 < borrow
- then do
- let !qh = qhat - 1
- r <- add_to u j d ld
- PA.writePrimArray u (j + ld) r
- PA.writePrimArray quo j qh
- else
- PA.writePrimArray quo j qhat
- loop (pred j)
- loop (lu - ld - 1)
+sub_mul :: Word576 -> Int -> Word256 -> Int -> Word64 -> Word640
+sub_mul u u_start (Word256 d0 d1 d2 d3) d_len m = case d_len of
+ 2 -> let !(Word640 u_0 b_0) = step0 u 0
+ in step1 u_0 b_0
+ 3 -> let !(Word640 u_0 b_0) = step0 u 0
+ !(Word640 u_1 b_1) = step1 u_0 b_0
+ in step2 u_1 b_1
+ 4 -> let !(Word640 u_0 b_0) = step0 u 0
+ !(Word640 u_1 b_1) = step1 u_0 b_0
+ !(Word640 u_2 b_2) = step2 u_1 b_1
+ in step3 u_2 b_2
+ _ ->
+ error "ppad-fixed (sub_mul): bad index"
+ where
+ step0 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) bor = case u_start of
+ 0 ->
+ let !(P s car1) = sub_b u0 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 t u1 u2 u3 u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 1 ->
+ let !(P s car1) = sub_b u1 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 t u2 u3 u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 2 ->
+ let !(P s car1) = sub_b u2 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 t u3 u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 3 ->
+ let !(P s car1) = sub_b u3 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 t u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 4 ->
+ let !(P s car1) = sub_b u4 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 t u5 u6 u7 u8) (ph + car1 + car2)
+ 5 ->
+ let !(P s car1) = sub_b u5 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 t u6 u7 u8) (ph + car1 + car2)
+ 6 ->
+ let !(P s car1) = sub_b u6 bor 0
+ !(P ph pl) = mul_c d0 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 t u7 u8) (ph + car1 + car2)
+ _ ->
+ error "ppad-fixed (step0): bad index"
+
+ step1 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) bor = case u_start of
+ 0 ->
+ let !(P s car1) = sub_b u1 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 t u2 u3 u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 1 ->
+ let !(P s car1) = sub_b u2 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 t u3 u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 2 ->
+ let !(P s car1) = sub_b u3 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u3 t u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 3 ->
+ let !(P s car1) = sub_b u4 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u3 u4 t u5 u6 u7 u8) (ph + car1 + car2)
+ 4 ->
+ let !(P s car1) = sub_b u5 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u3 u4 u5 t u6 u7 u8) (ph + car1 + car2)
+ 5 ->
+ let !(P s car1) = sub_b u6 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u3 u4 u5 u6 t u7 u8) (ph + car1 + car2)
+ 6 ->
+ let !(P s car1) = sub_b u7 bor 0
+ !(P ph pl) = mul_c d1 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u3 u4 u5 u6 t u7 u8) (ph + car1 + car2)
+ _ ->
+ error "ppad-fixed (step1): bad index"
+
+ step2 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) bor = case u_start of
+ 0 ->
+ let !(P s car1) = sub_b u2 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 t u3 u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 1 ->
+ let !(P s car1) = sub_b u3 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 t u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 2 ->
+ let !(P s car1) = sub_b u4 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 t u5 u6 u7 u8) (ph + car1 + car2)
+ 3 ->
+ let !(P s car1) = sub_b u5 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 t u6 u7 u8) (ph + car1 + car2)
+ 4 ->
+ let !(P s car1) = sub_b u6 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 t u7 u8) (ph + car1 + car2)
+ 5 ->
+ let !(P s car1) = sub_b u7 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 t u8) (ph + car1 + car2)
+ 6 ->
+ let !(P s car1) = sub_b u8 bor 0
+ !(P ph pl) = mul_c d2 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 t) (ph + car1 + car2)
+ _ ->
+ error "ppad-fixed (step2): bad index"
+
+ step3 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) bor = case u_start of
+ 0 ->
+ let !(P s car1) = sub_b u3 bor 0
+ !(P ph pl) = mul_c d3 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 t u4 u5 u6 u7 u8) (ph + car1 + car2)
+ 1 ->
+ let !(P s car1) = sub_b u4 bor 0
+ !(P ph pl) = mul_c d3 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 t u5 u6 u7 u8) (ph + car1 + car2)
+ 2 ->
+ let !(P s car1) = sub_b u5 bor 0
+ !(P ph pl) = mul_c d3 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 t u6 u7 u8) (ph + car1 + car2)
+ 3 ->
+ let !(P s car1) = sub_b u6 bor 0
+ !(P ph pl) = mul_c d3 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 t u7 u8) (ph + car1 + car2)
+ 4 ->
+ let !(P s car1) = sub_b u7 bor 0
+ !(P ph pl) = mul_c d3 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 t u8) (ph + car1 + car2)
+ 5 ->
+ let !(P s car1) = sub_b u8 bor 0
+ !(P ph pl) = mul_c d3 m
+ !(P t car2) = sub_b s pl 0
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 t) (ph + car1 + car2)
+ _ ->
+ error "ppad-fixed (step3): bad index"
+
+add_big :: Word576 -> Int -> Word256 -> Int -> Word640
+add_big u u_start (Word256 d0 d1 d2 d3) d_len = case d_len of
+ 2 ->
+ let !(Word640 u_0 c_0) = step0 u 0
+ in step1 u_0 c_0
+ 3 ->
+ let !(Word640 u_0 c_0) = step0 u 0
+ !(Word640 u_1 c_1) = step1 u_0 c_0
+ in step2 u_1 c_1
+ 4 ->
+ let !(Word640 u_0 c_0) = step0 u 0
+ !(Word640 u_1 c_1) = step1 u_0 c_0
+ !(Word640 u_2 c_2) = step2 u_1 c_1
+ in step3 u_2 c_2
+ _ ->
+ error "ppad-fixed (add_big): bad index"
+ where
+ step0 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) car = case u_start of
+ 0 ->
+ let !(P t car1) = add_c u0 d0 car
+ in Word640 (Word576 t u1 u2 u3 u4 u5 u6 u7 u8) car1
+ 1 ->
+ let !(P t car1) = add_c u1 d0 car
+ in Word640 (Word576 u0 t u2 u3 u4 u5 u6 u7 u8) car1
+ 2 ->
+ let !(P t car1) = add_c u2 d0 car
+ in Word640 (Word576 u0 u1 t u3 u4 u5 u6 u7 u8) car1
+ 3 ->
+ let !(P t car1) = add_c u3 d0 car
+ in Word640 (Word576 u0 u1 u2 t u4 u5 u6 u7 u8) car1
+ 4 ->
+ let !(P t car1) = add_c u4 d0 car
+ in Word640 (Word576 u0 u1 u2 u3 t u5 u6 u7 u8) car1
+ 5 ->
+ let !(P t car1) = add_c u5 d0 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 t u6 u7 u8) car1
+ 6 ->
+ let !(P t car1) = add_c u6 d0 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 t u7 u8) car1
+ _ ->
+ error "ppad-fixed (step0): bad index"
+
+ step1 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) car = case u_start of
+ 0 ->
+ let !(P t car1) = add_c u1 d1 car
+ in Word640 (Word576 u0 t u2 u3 u4 u5 u6 u7 u8) car1
+ 1 ->
+ let !(P t car1) = add_c u2 d1 car
+ in Word640 (Word576 u0 u1 t u3 u4 u5 u6 u7 u8) car1
+ 2 ->
+ let !(P t car1) = add_c u3 d1 car
+ in Word640 (Word576 u0 u1 u3 t u4 u5 u6 u7 u8) car1
+ 3 ->
+ let !(P t car1) = add_c u4 d1 car
+ in Word640 (Word576 u0 u1 u3 u4 t u5 u6 u7 u8) car1
+ 4 ->
+ let !(P t car1) = add_c u5 d1 car
+ in Word640 (Word576 u0 u1 u3 u4 u5 t u6 u7 u8) car1
+ 5 ->
+ let !(P t car1) = add_c u6 d1 car
+ in Word640 (Word576 u0 u1 u3 u4 u5 u6 t u7 u8) car1
+ 6 ->
+ let !(P t car1) = add_c u7 d1 car
+ in Word640 (Word576 u0 u1 u3 u4 u5 u6 t u7 u8) car1
+ _ ->
+ error "ppad-fixed (step1): bad index"
+
+ step2 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) car = case u_start of
+ 0 ->
+ let !(P t car1) = add_c u2 d2 car
+ in Word640 (Word576 u0 u1 t u3 u4 u5 u6 u7 u8) car1
+ 1 ->
+ let !(P t car1) = add_c u3 d2 car
+ in Word640 (Word576 u0 u1 u2 t u4 u5 u6 u7 u8) car1
+ 2 ->
+ let !(P t car1) = add_c u4 d2 car
+ in Word640 (Word576 u0 u1 u2 u3 t u5 u6 u7 u8) car1
+ 3 ->
+ let !(P t car1) = add_c u5 d2 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 t u6 u7 u8) car1
+ 4 ->
+ let !(P t car1) = add_c u6 d2 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 t u7 u8) car1
+ 5 ->
+ let !(P t car1) = add_c u7 d2 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 t u8) car1
+ 6 ->
+ let !(P t car1) = add_c u8 d2 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 t) car1
+ _ ->
+ error "ppad-fixed (step2): bad index"
+
+ step3 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) car = case u_start of
+ 0 ->
+ let !(P t car1) = add_c u3 d3 car
+ in Word640 (Word576 u0 u1 u2 t u4 u5 u6 u7 u8) car1
+ 1 ->
+ let !(P t car1) = add_c u4 d3 car
+ in Word640 (Word576 u0 u1 u2 u3 t u5 u6 u7 u8) car1
+ 2 ->
+ let !(P t car1) = add_c u5 d3 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 t u6 u7 u8) car1
+ 3 ->
+ let !(P t car1) = add_c u6 d3 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 t u7 u8) car1
+ 4 ->
+ let !(P t car1) = add_c u7 d3 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 t u8) car1
+ 5 ->
+ let !(P t car1) = add_c u8 d3 car
+ in Word640 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 t) car1
+ _ ->
+ error "ppad-fixed (step3): bad index"
-- quotient, remainder of (hi, lo) divided by y
-- translated from Div64 in go's math/bits package
@@ -663,50 +795,261 @@ quotrem_2by1 uh ul d rec =
then P (qh_y + 1) (r_y - d)
else P qh_y r_y
+quotrem_by1
+ :: Word576 -- dividend
+ -> Int -- dividend length
+ -> Word64 -- divisor
+ -> Word640
+quotrem_by1 (Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) ulen d = case ulen of
+ 9 ->
+ let !r_0 = u8
+ !(Word640 q0 r0) = step7 zero576 r_0
+ !(Word640 q1 r1) = step6 q0 r0
+ !(Word640 q2 r2) = step5 q1 r1
+ !(Word640 q3 r3) = step4 q2 r2
+ !(Word640 q4 r4) = step3 q3 r3
+ !(Word640 q5 r5) = step2 q4 r4
+ !(Word640 q6 r6) = step1 q5 r5
+ in step0 q6 r6
+ 8 ->
+ let !r_0 = u7
+ !(Word640 q0 r0) = step6 zero576 r_0
+ !(Word640 q1 r1) = step5 q0 r0
+ !(Word640 q2 r2) = step4 q1 r1
+ !(Word640 q3 r3) = step3 q2 r2
+ !(Word640 q4 r4) = step2 q3 r3
+ !(Word640 q5 r5) = step1 q4 r4
+ in step0 q5 r5
+ 7 ->
+ let !r_0 = u6
+ !(Word640 q0 r0) = step5 zero576 r_0
+ !(Word640 q1 r1) = step4 q0 r0
+ !(Word640 q2 r2) = step3 q1 r1
+ !(Word640 q3 r3) = step2 q2 r2
+ !(Word640 q4 r4) = step1 q3 r3
+ in step0 q4 r4
+ 6 ->
+ let !r_0 = u5
+ !(Word640 q0 r0) = step4 zero576 r_0
+ !(Word640 q1 r1) = step3 q0 r0
+ !(Word640 q2 r2) = step2 q1 r1
+ !(Word640 q3 r3) = step1 q2 r2
+ in step0 q3 r3
+ 5 ->
+ let !r_0 = u4
+ !(Word640 q0 r0) = step3 zero576 r_0
+ !(Word640 q1 r1) = step2 q0 r0
+ !(Word640 q2 r2) = step1 q1 r1
+ in step0 q2 r2
+ 4 ->
+ let !r_0 = u3
+ !(Word640 q0 r0) = step2 zero576 r_0
+ !(Word640 q1 r1) = step1 q0 r0
+ in step0 q1 r1
+ 3 ->
+ let !r_0 = u2
+ !(Word640 q0 r0) = step1 zero576 r_0
+ in step0 q0 r0
+ 2 ->
+ let !r_0 = u1
+ in step0 zero576 r_0
+ _ ->
+ error "ppad-fixed (quotrem_by1): bad index"
+ where
+ !rec = recip_2by1 d
+
+ step0 (Word576 _ q1 q2 q3 q4 q5 q6 q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u0 d rec
+ in Word640 (Word576 q q1 q2 q3 q4 q5 q6 q7 q8) nr
+
+ step1 (Word576 q0 _ q2 q3 q4 q5 q6 q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u1 d rec
+ in Word640 (Word576 q0 q q2 q3 q4 q5 q6 q7 q8) nr
+
+ step2 (Word576 q0 q1 _ q3 q4 q5 q6 q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u2 d rec
+ in Word640 (Word576 q0 q1 q q3 q4 q5 q6 q7 q8) nr
+
+ step3 (Word576 q0 q1 q2 _ q4 q5 q6 q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u3 d rec
+ in Word640 (Word576 q0 q1 q2 q q4 q5 q6 q7 q8) nr
+
+ step4 (Word576 q0 q1 q2 q3 _ q5 q6 q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u4 d rec
+ in Word640 (Word576 q0 q1 q2 q3 q q5 q6 q7 q8) nr
+
+ step5 (Word576 q0 q1 q2 q3 q4 _ q6 q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u5 d rec
+ in Word640 (Word576 q0 q1 q2 q3 q4 q q6 q7 q8) nr
+ step6 (Word576 q0 q1 q2 q3 q4 q5 _ q7 q8) r =
+ let !(P q nr) = quotrem_2by1 r u6 d rec
+ in Word640 (Word576 q0 q1 q2 q3 q4 q5 q q7 q8) nr
+
+ step7 (Word576 q0 q1 q2 q3 q4 q5 q6 _ q8) r =
+ let !(P q nr) = quotrem_2by1 r u7 d rec
+ in Word640 (Word576 q0 q1 q2 q3 q4 q5 q6 q q8) nr
+
+ -- XX expensive
+quotrem_knuth
+ :: Word576
+ -> Int
+ -> Word256
+ -> Int
+ -> Word1152
+quotrem_knuth u ulen d dlen = loop (ulen - dlen - 1) zero576 u where
+ !d_hi = sel256 d (dlen - 1)
+ !d_lo = sel256 d (dlen - 2)
+ !rec = recip_2by1 d_hi
+ loop j !qacc !uacc
+ | j < 0 = Word1152 qacc uacc
+ | otherwise =
+ let !u_2 = sel576 u (j + dlen)
+ !u_1 = sel576 u (j + dlen - 1)
+ !u_0 = sel576 u (j + dlen - 2)
+ !qhat
+ | u_2 >= d_hi = 0xffff_ffff_ffff_ffff
+ | otherwise =
+ let !(P qh rh) = quotrem_2by1 u_2 u_1 d_hi rec
+ !(P ph pl) = mul_c qh d_lo
+ in if ph > rh || (ph == rh && pl > u_0)
+ then qh - 1
+ else qh
+
+ !(Word640 u0 borrow) = sub_mul uacc j d dlen qhat
+ !u1 = set576 u0 (j + dlen) (u_2 - borrow)
+ in if u_2 < borrow
+ then
+ let !qh = qhat - 1
+ !(Word640 u2 r) = add_big u1 j d dlen
+ !u3 = set576 u2 (j + dlen) r
+ !q = set576 qacc j qh
+ in loop (pred j) q u3
+ else
+ let !q = set576 qacc j qhat
+ in loop (pred j) q u1
+
+quotrem
+ :: Word576
+ -> Word256
+ -> Word832
+quotrem u@(Word576 u0 u1 u2 u3 u4 u5 u6 u7 u8) d@(Word256 d0 d1 d2 d3) =
+ let !dlen = setlen_256 d
+ !shift = B.countLeadingZeros d3
+ !dn = fill256 (dlen - 1) shift
+ !ulen = setlen_576 u
+ in if ulen < dlen
+ then Word832 zero576 (Word256 u0 u1 u2 u3)
+ else
+ let !u_ulen = sel576 u (ulen - 1)
+ !un_pre0 = set576 zero576 ulen (u_ulen .>>. (64 - shift))
+ !un = fill576 (ulen - 1) un_pre0 shift
+ in if dlen == 1
+ then
+ let !dn_0 = sel256 dn 0
+ !(Word640 q r) = quotrem_by1 un (ulen + 1) dn_0
+ in Word832 q (Word256 (r .>>. shift) 0 0 0)
+ else
+ let !(Word1152 q un0) = quotrem_knuth un (ulen + 1) dn dlen
+ !r_pre = fill_rem dlen un0 shift
+ !un_dlen_1 = sel576 un0 (dlen - 1)
+ !r = set256 r_pre (dlen - 1) (un_dlen_1 .>>.shift)
+ in Word832 q r
+ where
+ fill_rem !dl !(Word576 w0 w1 w2 w3 w4 _ _ _ _) !s =
+ let v0 = (w0 .>>. s) .|. (w1 .<<. (64 - s))
+ v1 = (w1 .>>. s) .|. (w2 .<<. (64 - s))
+ v2 = (w2 .>>. s) .|. (w3 .<<. (64 - s))
+ v3 = (w3 .>>. s) .|. (w4 .<<. (64 - s))
+ in case dl of
+ 0 -> zero
+ 1 -> Word256 v0 0 0 0
+ 2 -> Word256 v0 v1 0 0
+ 3 -> Word256 v0 v1 v2 0
+ 4 -> Word256 v0 v1 v2 v3
+ _ -> error "ppad-fixed (fill_rem): bad index"
+
+ fill576
+ !start
+ (Word576 _ tar1 tar2 tar3 tar4 tar5 tar6 tar7 tar8)
+ !s =
+ let v8 = (u8 .<<. s) .|. (u7 .>>. (64 - s))
+ v7 = (u7 .<<. s) .|. (u6 .>>. (64 - s))
+ v6 = (u6 .<<. s) .|. (u5 .>>. (64 - s))
+ v5 = (u5 .<<. s) .|. (u4 .>>. (64 - s))
+ v4 = (u4 .<<. s) .|. (u3 .>>. (64 - s))
+ v3 = (u3 .<<. s) .|. (u2 .>>. (64 - s))
+ v2 = (u2 .<<. s) .|. (u1 .>>. (64 - s))
+ v1 = (u1 .<<. s) .|. (u0 .>>. (64 - s))
+ in case start of
+ 8 -> Word576 (u0 .<<. s) v1 v2 v3 v4 v5 v6 v7 v8
+ 7 -> Word576 (u0 .<<. s) v1 v2 v3 v4 v5 v6 v7 tar8
+ 6 -> Word576 (u0 .<<. s) v1 v2 v3 v4 v5 v6 tar7 tar8
+ 5 -> Word576 (u0 .<<. s) v1 v2 v3 v4 v5 tar6 tar7 tar8
+ 4 -> Word576 (u0 .<<. s) v1 v2 v3 v4 tar5 tar6 tar7 tar8
+ 3 -> Word576 (u0 .<<. s) v1 v2 v3 tar4 tar5 tar6 tar7 tar8
+ 2 -> Word576 (u0 .<<. s) v1 v2 tar3 tar4 tar5 tar6 tar7 tar8
+ 1 -> Word576 (u0 .<<. s) v1 tar2 tar3 tar4 tar5 tar6 tar7 tar8
+ 0 -> Word576 (u0 .<<. s) tar1 tar2 tar3 tar4 tar5 tar6 tar7 tar8
+ _ -> error "ppad-fixed (fill576): bad index"
+
+ fill256 !start !s =
+ let v3 = (d3 .<<. s) .|. (d2 .>>. (64 - s))
+ v2 = (d2 .<<. s) .|. (d1 .>>. (64 - s))
+ v1 = (d1 .<<. s) .|. (d0 .>>. (64 - s))
+ in case start of
+ 3 -> Word256 (d0 .<<. s) v1 v2 v3
+ 2 -> Word256 (d0 .<<. s) v1 v2 0
+ 1 -> Word256 (d0 .<<. s) v1 0 0
+ 0 -> Word256 (d0 .<<. s) 0 0 0
+ _ -> error "ppad-fixed (fill576): bad index"
+
+ setlen_256 :: Word256 -> Int
+ setlen_256 (Word256 z0 z1 z2 z3)
+ | z3 /= 0 = 4
+ | z2 /= 0 = 3
+ | z1 /= 0 = 2
+ | z0 /= 0 = 1
+ | otherwise = error "ppad-fixed (quotrem_256): division by zero"
+
+ setlen_576 :: Word576 -> Int
+ setlen_576 (Word576 z0 z1 z2 z3 z4 z5 z6 z7 z8)
+ | z8 /= 0 = 9
+ | z7 /= 0 = 8
+ | z6 /= 0 = 7
+ | z5 /= 0 = 6
+ | z4 /= 0 = 5
+ | z3 /= 0 = 4
+ | z2 /= 0 = 3
+ | z1 /= 0 = 2
+ | z0 /= 0 = 1
+ | otherwise = error "ppad-fixed (quotrem_256): division by zero"
+
+-- | Division on 'Word256' values.
+--
+-- >>> to_word256 0xFFFFFFFFFF `div` to_word256 0xFFFFFF
+-- 65536
div :: Word256 -> Word256 -> Word256
-div u@(Word256 u0 u1 u2 u3) d@(Word256 d0 _ _ _)
- | is_zero d || d `gt` u = zero -- ?
- | u == d = one
- | is_word64 u = Word256 (u0 `quot` d0) 0 0 0
- | otherwise = runST $ do
- -- allocate quotient
- quo <- PA.newPrimArray 4
- PA.setPrimArray quo 0 4 0
- -- allocate dividend
- u_arr <- PA.newPrimArray 4
- PA.setPrimArray u_arr 0 4 0
- PA.writePrimArray u_arr 0 u0
- PA.writePrimArray u_arr 1 u1
- PA.writePrimArray u_arr 2 u2
- PA.writePrimArray u_arr 3 u3
- u_final <- PA.unsafeFreezePrimArray u_arr
- _ <- quotrem quo u_final d
- q0 <- PA.readPrimArray quo 0
- q1 <- PA.readPrimArray quo 1
- q2 <- PA.readPrimArray quo 2
- q3 <- PA.readPrimArray quo 3
- pure (Word256 q0 q1 q2 q3)
+div a@(Word256 a0 a1 a2 a3) b@(Word256 b0 _ _ _)
+ | is_zero b || b `gt` a = zero -- ?
+ | a == b = one
+ | is_word64 a = Word256 (a0 `quot` b0) 0 0 0
+ | otherwise =
+ let !u = Word576 a0 a1 a2 a3 0 0 0 0 0
+ !(Word832 (Word576 q0 q1 q2 q3 _ _ _ _ _) _) = quotrem u b
+ in Word256 q0 q1 q2 q3
-- | Modulo operation on 'Word256' values.
--
-- >>> to_word256 0xFFFFFFFFFF `mod` to_word256 0xFFFFFF
-- 65535
mod :: Word256 -> Word256 -> Word256
-mod u@(Word256 u0 u1 u2 u3) d@(Word256 d0 _ _ _)
- | is_zero d || d `gt` u = zero -- ?
- | u == d = one
- | is_word64 u = Word256 (u0 `quot` d0) 0 0 0
- | otherwise = runST $ do
- -- allocate quotient
- quo <- PA.newPrimArray 4
- PA.setPrimArray quo 0 4 0
- -- allocate dividend
- u_arr <- PA.newPrimArray 4
- PA.setPrimArray u_arr 0 4 0
- PA.writePrimArray u_arr 0 u0
- PA.writePrimArray u_arr 1 u1
- PA.writePrimArray u_arr 2 u2
- PA.writePrimArray u_arr 3 u3
- u_final <- PA.unsafeFreezePrimArray u_arr
- quotrem quo u_final d
+mod a@(Word256 a0 a1 a2 a3) b@(Word256 b0 _ _ _)
+ | is_zero b || a == b = zero -- ?
+ | a `lt` b = a
+ | is_word64 a = Word256 (a0 `Prelude.rem` b0) 0 0 0
+ | otherwise =
+ let !u = Word576 a0 a1 a2 a3 0 0 0 0 0
+ !(Word832 _ r) = quotrem u b
+ in r
+
diff --git a/ppad-fixed.cabal b/ppad-fixed.cabal
@@ -27,7 +27,6 @@ library
build-depends:
base >= 4.9 && < 5
, deepseq >= 1.5 && < 1.6
- , primitive >= 0.8 && < 0.10
test-suite fixed-tests
type: exitcode-stdio-1.0
@@ -59,7 +58,6 @@ benchmark fixed-bench
base
, criterion
, ppad-fixed
- , primitive
benchmark fixed-weigh
type: exitcode-stdio-1.0
@@ -73,7 +71,6 @@ benchmark fixed-weigh
build-depends:
base
, ppad-fixed
- , primitive
, weigh
executable fixed-profile
diff --git a/test/Main.hs b/test/Main.hs
@@ -184,6 +184,93 @@ quotrem_2by1_case0 = do
!o = quotrem_2by1 8 4 d (recip_2by1 d)
H.assertEqual mempty (P 8 2052) o
+quotrem_by1_case0 :: H.Assertion
+quotrem_by1_case0 = do
+ let !u = Word576 8 4 0 0 0 0 0 0 0
+ !d = B.complement 0xFF :: Word64
+ !(Word640 q r) = quotrem_by1 u 2 d
+ let pec_quo = Word576 4 0 0 0 0 0 0 0 0
+ pec_rem = 1032
+ H.assertEqual "remainder matches" pec_rem r
+ H.assertEqual "quotient matches" pec_quo q
+
+quotrem_by1_case1 :: H.Assertion
+quotrem_by1_case1 = do
+ let !u = Word576 8 26 0 0 0 0 0 0 0
+ !d = B.complement 0xFF :: Word64
+ !(Word640 q r) = quotrem_by1 u 2 d
+ let pec_quo = Word576 26 0 0 0 0 0 0 0 0
+ pec_rem = 6664
+ H.assertEqual "remainder matches" pec_rem r
+ H.assertEqual "quotient matches" pec_quo q
+
+quotrem_knuth_case0 :: H.Assertion
+quotrem_knuth_case0 = do
+ let !u = Word576
+ 2162362899639802732
+ 8848548347662387477
+ 13702897166684377657
+ 16799544643779908154
+ 1
+ 0 0 0 0
+ !d = Word256
+ 16950798510782491100
+ 2612788699139816405
+ 5146719872810836952
+ 14966148379609982000
+ !(Word1152 q nu) = quotrem_knuth u 5 d 4
+ !pec_q = Word576 2 0 0 0 0 0 0 0 0
+ !pec_u = Word576
+ 5154254025493923764
+ 3622970949382754665
+ 3409457421062703753
+ 5313991958269495770
+ 0
+ 0 0 0 0
+ H.assertEqual "divisor matches" pec_u nu
+ H.assertEqual "quotient matches" pec_q q
+
+quotrem_case0 :: H.Assertion
+quotrem_case0 = do
+ let !u = Word576
+ 0x1234567890ABCDEF
+ 0xFEDCBA0987654321
+ 0x123456789ABCDEF0
+ 0 0 0 0 0 0
+ !d = Word256 0x0 0x0 0x1 0x100000000
+ !(Word832 q r) = quotrem u d
+ !pec_q = Word576 0 0 0 0 0 0 0 0 0
+ !pec_r = Word256
+ 1311768467294899695
+ 18364757930599072545
+ 1311768467463790320
+ 0
+ H.assertEqual "remainder matches" pec_r r
+ H.assertEqual "quotient matches" pec_q q
+
+quotrem_case1 :: H.Assertion
+quotrem_case1 = do
+ let !u = Word576
+ 5152276743337338587
+ 6823823105342984773
+ 12649096328525870222
+ 8811572179372364942
+ 0 0 0 0 0
+ !d = Word256
+ 8849385646123010679
+ 653197174784954101
+ 1286679968202709238
+ 3741537094902495500
+ !(Word832 q r) = quotrem u d
+ !pec_q = Word576 2 0 0 0 0 0 0 0 0
+ !pec_r = Word256
+ 5900249524800868845
+ 5517428755773076570
+ 10075736392120451746
+ 1328497989567373942
+ H.assertEqual "remainder matches" pec_r r
+ H.assertEqual "quotient matches" pec_q q
+
-- main -----------------------------------------------------------------------
comparison :: TestTree
@@ -252,6 +339,11 @@ main = defaultMain $
, H.testCase "recip_2by1 matches case0" recip_2by1_case0
, H.testCase "recip_2by1 matches case1" recip_2by1_case1
, H.testCase "quotrem_2by1 matches case0" quotrem_2by1_case0
+ , H.testCase "quotrem_by1 matches case0" quotrem_by1_case0
+ , H.testCase "quotrem_by1 matches case1" quotrem_by1_case1
+ , H.testCase "quotrem_knuth matches case0" quotrem_knuth_case0
+ , H.testCase "quotrem matches case0" quotrem_case0
+ , H.testCase "quotrem matches case1" quotrem_case1
]
]