commit 32bb0252ce0d0b570efca037a9cffb48533ac52a
parent ead3bb7936b693deb349e0015eba99f84439f90f
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 23 Jan 2025 20:58:40 +0400
bench: impure vs pure
Unsurprisingly, the pure quotrem_by1_256 function outperforms the impure
(and allocation-heavy) quotrem_by1 by a factor of 2-3. nfAppIO is
required to suss this out.
Per criterion:
> [nfAppIO] is useful when the bulk of the work performed by the
> function is not bound by IO, but rather by pure computations that may
> optimize away if the argument is known statically, as in nfIO/whnfIO.
Diffstat:
3 files changed, 84 insertions(+), 40 deletions(-)
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -3,15 +3,18 @@
module Main where
-import Data.Bits ((.|.), (.&.), (.^.))
-import qualified Data.Word.Extended as W
import Control.DeepSeq
import Criterion.Main
+import Data.Bits ((.|.), (.&.), (.^.))
+import qualified Data.Bits as B
+import qualified Data.Word.Extended as W
+import qualified Data.Primitive.PrimArray as PA
import Prelude hiding (or, and, div, mod)
import qualified Prelude (div)
instance NFData W.Word256
instance NFData W.Word512
+instance NFData W.Word256WithOverflow
or_baseline :: Benchmark
or_baseline = bench "or (baseline)" $ nf ((.|.) w0) w1 where
@@ -142,25 +145,47 @@ mod = bench "mod" $ nf (W.mod w0) w1 where
!w1 = W.to_word256
0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06
+quotrem_by1 :: Benchmark
+quotrem_by1 = env setup $ \ ~(quo, u, d) ->
+ bench "quotrem_by1" $ nfAppIO (W.quotrem_by1 quo u) d
+ where
+ setup = do
+ quo <- PA.newPrimArray 5
+ PA.setPrimArray quo 0 5 0
+ let u = PA.primArrayFromList [
+ 300
+ , 200
+ , 100
+ ]
+ d = B.complement 50
+ pure (quo, u, d)
+
+quotrem_by1_256 :: Benchmark
+quotrem_by1_256 =
+ bench "quotrem_by1_256" $
+ nf (W.quotrem_by1_256 (W.Word256 300 200 100 0)) (B.complement 50)
+
main :: IO ()
main = defaultMain [
- mul_baseline
- , mul
- , div_baseline
- , div
- , mod_baseline
- , mod
- , div_baseline_small
- , div_small
- , or_baseline
- , or
- , and_baseline
- , and
- , xor_baseline
- , xor
- , add_baseline
- , add
- , sub_baseline
- , sub
+ quotrem_by1
+ , quotrem_by1_256
+ -- mul_baseline
+ -- mul
+ --, div_baseline
+ --, div
+ --, mod_baseline
+ --, mod
+ --, div_baseline_small
+ --, div_small
+ --, or_baseline
+ --, or
+ --, and_baseline
+ --, and
+ --, xor_baseline
+ --, xor
+ --, add_baseline
+ --, add
+ --, sub_baseline
+ --, sub
]
diff --git a/bench/Weight.hs b/bench/Weight.hs
@@ -1,42 +1,59 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PackageImports #-}
module Main where
import Control.DeepSeq
-import qualified Data.Word.Extended as W
+import qualified Data.Bits as B
+import qualified Data.Primitive.PrimArray as PA
+import qualified Data.Word.Extended as E
import qualified Weigh as W
-instance NFData W.Word256
-instance NFData W.Word512
+instance NFData E.Word256
+instance NFData E.Word512
+instance NFData E.Word256WithOverflow
i0, i1 :: Integer
i0 = 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed
i1 = 0x7fffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffed
-w0, w1 :: W.Word256
-w0 = W.to_word256 i0
-w1 = W.to_word256 i1
+w0, w1 :: E.Word256
+w0 = E.to_word256 i0
+w1 = E.to_word256 i1
i2, i3 :: Integer
i2 = 0x41cf50c7d0d65afabcf5ba37750dba71c7db29ec9f20a216d3ef013a59b9188a
i3 = 0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06
-w2, w3 :: W.Word256
-w2 = W.to_word256 i2
-w3 = W.to_word256 i3
+w2, w3 :: E.Word256
+w2 = E.to_word256 i2
+w3 = E.to_word256 i3
main :: IO ()
-main = W.mainWith $ do
- W.func "add (baseline)" ((+) i0) i1
- W.func "add" (W.add w0) w1
- W.func "sub (baseline)" ((-) i0) i1
- W.func "sub" (W.sub w0) w1
- W.func "mul (baseline)" ((-) i0) i1
- W.func "mul" (W.mul_512 w0) w1
- W.func "mul128 (baseline)" ((-) i0) i1
- W.func "mul128" (W.mul w0) w1
- W.func "div (baseline)" (Prelude.div i2) i3
- W.func "div" (W.div w2) w3
+main = do
+ !(!q, !u, !d) <- do
+ quo <- PA.newPrimArray 5
+ PA.setPrimArray quo 0 5 0
+ let uf = PA.primArrayFromList [
+ 300
+ , 200
+ , 100
+ ]
+ df = B.complement 50
+ pure (quo, uf, df)
+
+ W.mainWith $ do
+ W.func "add (baseline)" ((+) i0) i1
+ W.func "add" (E.add w0) w1
+ W.func "sub (baseline)" ((-) i0) i1
+ W.func "sub" (E.sub w0) w1
+ W.func "mul (baseline)" ((*) i0) i1
+ W.func "mul" (E.mul w0) w1
+ W.func "div (baseline)" (Prelude.div i2) i3
+ W.func "div" (E.div w2) w3
+ W.io "quotrem_by1" (E.quotrem_by1 q u) d
+ W.func "quotrem_by1_256"
+ (E.quotrem_by1_256 (E.Word256 300 200 100 0)) (B.complement 50)
diff --git a/ppad-fixed.cabal b/ppad-fixed.cabal
@@ -60,6 +60,7 @@ benchmark fixed-bench
, criterion
, deepseq
, ppad-fixed
+ , primitive
benchmark fixed-weigh
type: exitcode-stdio-1.0
@@ -74,5 +75,6 @@ benchmark fixed-weigh
base
, deepseq
, ppad-fixed
+ , primitive
, weigh