bench: impure vs pure - fixed - Pure Haskell large fixed-width integers.

commit 32bb0252ce0d0b570efca037a9cffb48533ac52a
parent ead3bb7936b693deb349e0015eba99f84439f90f
Author: Jared Tobin <jared@jtobin.io>
Date:   Thu, 23 Jan 2025 20:58:40 +0400

bench: impure vs pure

Unsurprisingly, the pure quotrem_by1_256 function outperforms the impure
(and allocation-heavy) quotrem_by1 by a factor of 2-3. nfAppIO is
required to suss this out.

Per criterion:

> [nfAppIO] is useful when the bulk of the work performed by the
> function is not bound by IO, but rather by pure computations that may
> optimize away if the argument is known statically, as in nfIO/whnfIO.

Diffstat:
M bench/Main.hs  | 65 +++++++++++++++++++++++++++++++++++++++++++++--------------------
M bench/Weight.hs  | 57 +++++++++++++++++++++++++++++++++++++--------------------
M ppad-fixed.cabal  | 2 ++

3 files changed, 84 insertions(+), 40 deletions(-)
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -3,15 +3,18 @@
 
 module Main where
 
-import Data.Bits ((.|.), (.&.), (.^.))
-import qualified Data.Word.Extended as W
 import Control.DeepSeq
 import Criterion.Main
+import Data.Bits ((.|.), (.&.), (.^.))
+import qualified Data.Bits as B
+import qualified Data.Word.Extended as W
+import qualified Data.Primitive.PrimArray as PA
 import Prelude hiding (or, and, div, mod)
 import qualified Prelude (div)
 
 instance NFData W.Word256
 instance NFData W.Word512
+instance NFData W.Word256WithOverflow
 
 or_baseline :: Benchmark
 or_baseline = bench "or (baseline)" $ nf ((.|.) w0) w1 where
@@ -142,25 +145,47 @@ mod = bench "mod" $ nf (W.mod w0) w1 where
   !w1 = W.to_word256
     0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06
 
+quotrem_by1 :: Benchmark
+quotrem_by1 = env setup $ \ ~(quo, u, d) ->
+    bench "quotrem_by1" $ nfAppIO (W.quotrem_by1 quo u) d
+  where
+    setup = do
+      quo <- PA.newPrimArray 5
+      PA.setPrimArray quo 0 5 0
+      let u = PA.primArrayFromList [
+              300
+            , 200
+            , 100
+            ]
+          d = B.complement 50
+      pure (quo, u, d)
+
+quotrem_by1_256 :: Benchmark
+quotrem_by1_256 =
+  bench "quotrem_by1_256" $
+    nf (W.quotrem_by1_256 (W.Word256 300 200 100 0)) (B.complement 50)
+
 main :: IO ()
 main = defaultMain [
-    mul_baseline
-  , mul
-  , div_baseline
-  , div
-  , mod_baseline
-  , mod
-  , div_baseline_small
-  , div_small
-  , or_baseline
-  , or
-  , and_baseline
-  , and
-  , xor_baseline
-  , xor
-  , add_baseline
-  , add
-  , sub_baseline
-  , sub
+    quotrem_by1
+  , quotrem_by1_256
+  --  mul_baseline
+  --  mul
+  --, div_baseline
+  --, div
+  --, mod_baseline
+  --, mod
+  --, div_baseline_small
+  --, div_small
+  --, or_baseline
+  --, or
+  --, and_baseline
+  --, and
+  --, xor_baseline
+  --, xor
+  --, add_baseline
+  --, add
+  --, sub_baseline
+  --, sub
   ]
 
diff --git a/bench/Weight.hs b/bench/Weight.hs
@@ -1,42 +1,59 @@
 {-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE BangPatterns #-}
 {-# LANGUAGE OverloadedStrings #-}
 {-# LANGUAGE PackageImports #-}
 
 module Main where
 
 import Control.DeepSeq
-import qualified Data.Word.Extended as W
+import qualified Data.Bits as B
+import qualified Data.Primitive.PrimArray as PA
+import qualified Data.Word.Extended as E
 import qualified Weigh as W
 
-instance NFData W.Word256
-instance NFData W.Word512
+instance NFData E.Word256
+instance NFData E.Word512
+instance NFData E.Word256WithOverflow
 
 i0, i1 :: Integer
 i0 = 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed
 i1 = 0x7fffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffed
 
-w0, w1 :: W.Word256
-w0 = W.to_word256 i0
-w1 = W.to_word256 i1
+w0, w1 :: E.Word256
+w0 = E.to_word256 i0
+w1 = E.to_word256 i1
 
 i2, i3 :: Integer
 i2 = 0x41cf50c7d0d65afabcf5ba37750dba71c7db29ec9f20a216d3ef013a59b9188a
 i3 = 0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06
 
-w2, w3 :: W.Word256
-w2 = W.to_word256 i2
-w3 = W.to_word256 i3
+w2, w3 :: E.Word256
+w2 = E.to_word256 i2
+w3 = E.to_word256 i3
 
 main :: IO ()
-main = W.mainWith $ do
-  W.func "add (baseline)" ((+) i0) i1
-  W.func "add" (W.add w0) w1
-  W.func "sub (baseline)" ((-) i0) i1
-  W.func "sub" (W.sub w0) w1
-  W.func "mul (baseline)" ((-) i0) i1
-  W.func "mul" (W.mul_512 w0) w1
-  W.func "mul128 (baseline)" ((-) i0) i1
-  W.func "mul128" (W.mul w0) w1
-  W.func "div (baseline)" (Prelude.div i2) i3
-  W.func "div" (W.div w2) w3
+main = do
+  !(!q, !u, !d) <- do
+      quo <- PA.newPrimArray 5
+      PA.setPrimArray quo 0 5 0
+      let uf = PA.primArrayFromList [
+              300
+            , 200
+            , 100
+            ]
+          df = B.complement 50
+      pure (quo, uf, df)
+
+  W.mainWith $ do
+    W.func "add (baseline)" ((+) i0) i1
+    W.func "add" (E.add w0) w1
+    W.func "sub (baseline)" ((-) i0) i1
+    W.func "sub" (E.sub w0) w1
+    W.func "mul (baseline)" ((*) i0) i1
+    W.func "mul" (E.mul w0) w1
+    W.func "div (baseline)" (Prelude.div i2) i3
+    W.func "div" (E.div w2) w3
+    W.io "quotrem_by1" (E.quotrem_by1 q u) d
+    W.func "quotrem_by1_256"
+      (E.quotrem_by1_256 (E.Word256 300 200 100 0)) (B.complement 50)
 
diff --git a/ppad-fixed.cabal b/ppad-fixed.cabal
@@ -60,6 +60,7 @@ benchmark fixed-bench
     , criterion
     , deepseq
     , ppad-fixed
+    , primitive
 
 benchmark fixed-weigh
   type:                exitcode-stdio-1.0
@@ -74,5 +75,6 @@ benchmark fixed-weigh
       base
     , deepseq
     , ppad-fixed
+    , primitive
     , weigh

	fixed Pure Haskell large fixed-width integers.
	git clone git://git.ppad.tech/fixed.git
	Log \| Files \| Refs \| README \| LICENSE

M	bench/Main.hs	\|	65	+++++++++++++++++++++++++++++++++++++++++++++--------------------
M	bench/Weight.hs	\|	57	+++++++++++++++++++++++++++++++++++++--------------------
M	ppad-fixed.cabal	\|	2	++