commit 318cd0eeb92cf38a4e9140f0f297973169b9c1b3
parent 32bb0252ce0d0b570efca037a9cffb48533ac52a
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 23 Jan 2025 22:22:06 +0400
src: profiling executable
Diffstat:
7 files changed, 96 insertions(+), 48 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
dist-newstyle/
+*.prof
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -14,7 +14,7 @@ import qualified Prelude (div)
instance NFData W.Word256
instance NFData W.Word512
-instance NFData W.Word256WithOverflow
+instance NFData W.Word320
or_baseline :: Benchmark
or_baseline = bench "or (baseline)" $ nf ((.|.) w0) w1 where
@@ -169,10 +169,10 @@ main :: IO ()
main = defaultMain [
quotrem_by1
, quotrem_by1_256
- -- mul_baseline
- -- mul
- --, div_baseline
- --, div
+ , mul_baseline
+ , mul
+ , div_baseline
+ , div
--, mod_baseline
--, mod
--, div_baseline_small
diff --git a/bench/Weight.hs b/bench/Weight.hs
@@ -13,7 +13,7 @@ import qualified Weigh as W
instance NFData E.Word256
instance NFData E.Word512
-instance NFData E.Word256WithOverflow
+instance NFData E.Word320
i0, i1 :: Integer
i0 = 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed
diff --git a/lib/Data/Word/Extended.hs b/lib/Data/Word/Extended.hs
@@ -144,24 +144,24 @@ add_c w64_0 w64_1 c =
in P s n
-- | A 'Word256' and overflow result, if any.
-data Word256WithOverflow = Word256WithOverflow
+data Word320 = Word320
!Word256
{-# UNPACK #-} !Word64
deriving (Eq, Show, Generic)
-- addition with overflow indication
-add_of :: Word256 -> Word256 -> Word256WithOverflow
+add_of :: Word256 -> Word256 -> Word320
add_of (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
let !(P s0 c0) = add_c a0 b0 0
!(P s1 c1) = add_c a1 b1 c0
!(P s2 c2) = add_c a2 b2 c1
!(P s3 c3) = add_c a3 b3 c2
- in Word256WithOverflow (Word256 s0 s1 s2 s3) c3
+ in Word320 (Word256 s0 s1 s2 s3) c3
-- | Addition on 'Word256' values.
add :: Word256 -> Word256 -> Word256
add w0 w1 = s where
- !(Word256WithOverflow s _) = add_of w0 w1
+ !(Word320 s _) = add_of w0 w1
-- subtract-with-borrow
--
@@ -177,18 +177,18 @@ sub_b w64_0 w64_1 b =
| otherwise = 0
in P d n
-sub_of :: Word256 -> Word256 -> Word256WithOverflow
+sub_of :: Word256 -> Word256 -> Word320
sub_of (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
let !(P s0 c0) = sub_b a0 b0 0
!(P s1 c1) = sub_b a1 b1 c0
!(P s2 c2) = sub_b a2 b2 c1
!(P s3 c3) = sub_b a3 b3 c2
- in Word256WithOverflow (Word256 s0 s1 s2 s3) c3
+ in Word320 (Word256 s0 s1 s2 s3) c3
-- | Subtraction on 'Word256' values.
sub :: Word256 -> Word256 -> Word256
sub w0 w1 = d where
- !(Word256WithOverflow d _) = sub_of w0 w1
+ !(Word320 d _) = sub_of w0 w1
-- multiplication -------------------------------------------------------------
@@ -299,29 +299,32 @@ sub_mul_to x x_offset y m = do
loop (succ j) (ph + carry1 + carry2)
loop 0 0
--- XX requires a way to select fields via the offset passed
-sub_mul256 :: Word256 -> Word256 -> Word64 -> Word256WithOverflow
-sub_mul256 (Word256 x0 x1 x2 x3) (Word256 y0 y1 y2 y3) m =
- let !s0 = x0
- !(P ph0 pl0) = mul_c y0 m
- !(P z0 c0) = sub_b s0 pl0 0
- !b0 = ph0 + c0
-
- !(P s1 c1) = sub_b x1 b0 0
- !(P ph1 pl1) = mul_c y1 m
- !(P z1 c2) = sub_b s1 pl1 0
- !b1 = ph1 + c1 + c2
-
- !(P s2 c3) = sub_b x2 b1 0
- !(P ph2 pl2) = mul_c y2 m
- !(P z2 c4) = sub_b s2 pl2 0
- !b2 = ph2 + c3 + c4
-
- !(P s3 c5) = sub_b x3 b2 0
- !(P ph3 pl3) = mul_c y3 m
- !(P z3 c6) = sub_b s3 pl3 0
- !b3 = ph3 + c5 + c6
- in Word256WithOverflow (Word256 z0 z1 z2 z3) b3
+sub_mul256 :: Word256 -> Int -> Word256 -> Word64 -> Word320
+sub_mul256 (Word256 x0 x1 x2 x3) offset (Word256 y0 y1 y2 y3) m =
+ let !s0 = x 0
+ !(P ph0 pl0) = mul_c y0 m
+ !(P z0 c0) = sub_b s0 pl0 0
+ !b0 = ph0 + c0
+
+ !(P s1 c1) = sub_b (x 1) b0 0
+ !(P ph1 pl1) = mul_c y1 m
+ !(P z1 c2) = sub_b s1 pl1 0
+ !b1 = ph1 + c1 + c2
+
+ !(P s2 c3) = sub_b (x 2) b1 0
+ !(P ph2 pl2) = mul_c y2 m
+ !(P z2 c4) = sub_b s2 pl2 0
+ !b2 = ph2 + c3 + c4
+
+ !(P s3 c5) = sub_b (x 3) b2 0
+ !(P ph3 pl3) = mul_c y3 m
+ !(P z3 c6) = sub_b s3 pl3 0
+ !b3 = ph3 + c5 + c6
+ in Word320 (Word256 z0 z1 z2 z3) b3
+ where
+ x j = case j + offset of
+ 0 -> x0; 1 -> x1; 2 -> x2; 3 -> x3
+ _ -> error "sub_mul256: invalid index"
-- XX primarray
-- requires (len x - x_offset) >= len y > 0
@@ -429,14 +432,14 @@ quotrem_by1 quo u d = do
loop (pred j) rnex
loop (lu - 2) r0
-quotrem_by1_256 :: Word256 -> Word64 -> Word256WithOverflow
+quotrem_by1_256 :: Word256 -> Word64 -> Word320
quotrem_by1_256 (Word256 u0 u1 u2 u3) d =
let !rec = recip_2by1 d
!r0 = u3
!(P q2 r1) = quotrem_2by1 r0 u2 d rec
!(P q1 r2) = quotrem_2by1 r1 u1 d rec
!(P q0 r3) = quotrem_2by1 r2 u0 d rec
- in Word256WithOverflow (Word256 q0 q1 q2 0) r3
+ in Word320 (Word256 q0 q1 q2 0) r3
-- XX primarray
quotrem_knuth
@@ -478,7 +481,7 @@ quotrem_knuth quo u d = do
loop (pred j)
loop (lu - ld - 1)
--- XX primarray
+-- XX primarray; dynamic size requirements
quotrem
:: PrimMonad m
=> PA.MutablePrimArray (PrimState m) Word64
diff --git a/ppad-fixed.cabal b/ppad-fixed.cabal
@@ -78,3 +78,19 @@ benchmark fixed-weigh
, primitive
, weigh
+executable fixed-profile
+ type: exitcode-stdio-1.0
+ default-language: Haskell2010
+ hs-source-dirs: src
+ main-is: Main.hs
+
+ ghc-options:
+ -rtsopts -O2 -Wall
+
+ build-depends:
+ base
+ , criterion
+ , deepseq
+ , ppad-fixed
+ , primitive
+
diff --git a/src/Main.hs b/src/Main.hs
@@ -0,0 +1,37 @@
+{-# LANGUAGE BangPatterns #-}
+
+module Main where
+
+import qualified Data.Primitive.PrimArray as PA
+import Data.Word.Extended
+
+main :: IO ()
+main = do
+ let !u = PA.primArrayFromList [
+ 5152276743337338587
+ , 6823823105342984773
+ , 12649096328525870222
+ , 8811572179372364942
+ ]
+ !d = PA.primArrayFromList [
+ 8849385646123010679
+ , 653197174784954101
+ , 1286679968202709238
+ , 3741537094902495500
+ ]
+
+ re <- PA.newPrimArray 4
+ PA.setPrimArray re 0 4 0
+ quo <- PA.newPrimArray 5
+ let go !j
+ | j == 10000 = pure ()
+ | otherwise = do
+ PA.setPrimArray quo 0 5 0
+ quotrem quo u d (Just re)
+ go (succ j)
+ go 0
+ q <- PA.unsafeFreezePrimArray quo
+ r <- PA.unsafeFreezePrimArray re
+ print r
+ print q
+
diff --git a/test/Main.hs b/test/Main.hs
@@ -100,13 +100,6 @@ umul_step_predicate_holds z x y c =
!rite = fi z + (fi x * fi y) + fi c :: Integer
in left == rite
-sub_mul_matches :: MulMonotonic -> Bool
-sub_mul_matches (MulMonotonic (x, y, m)) =
- let !left = to_word256 (x - y * m)
- !(Word256WithOverflow rite r)
- = sub_mul (to_word256 x) (to_word256 y) (fi m)
- in (left == rite && r == 0)
-
to_word256_inverts_to_integer :: Word256 -> Bool
to_word256_inverts_to_integer w256 =
to_word256 (to_integer w256) == w256
@@ -356,8 +349,6 @@ utils = testGroup "utils" [
Q.withMaxSuccess 1000 umul_hop_predicate_holds
, Q.testProperty "umul_step: (hi * 2 ^ 64 + lo) = z + (x * y) + c" $
Q.withMaxSuccess 1000 umul_step_predicate_holds
- , Q.testProperty "sub_mul matches integer sub_mul (nonneg, monotonic)" $
- Q.withMaxSuccess 1000 sub_mul_matches
]
main :: IO ()