fixed

Pure Haskell large fixed-width integers.
git clone git://git.ppad.tech/fixed.git
Log | Files | Refs | README | LICENSE

commit 32bb0252ce0d0b570efca037a9cffb48533ac52a
parent ead3bb7936b693deb349e0015eba99f84439f90f
Author: Jared Tobin <jared@jtobin.io>
Date:   Thu, 23 Jan 2025 20:58:40 +0400

bench: impure vs pure

Unsurprisingly, the pure quotrem_by1_256 function outperforms the impure
(and allocation-heavy) quotrem_by1 by a factor of 2-3. nfAppIO is
required to suss this out.

Per criterion:

> [nfAppIO] is useful when the bulk of the work performed by the
> function is not bound by IO, but rather by pure computations that may
> optimize away if the argument is known statically, as in nfIO/whnfIO.

Diffstat:
Mbench/Main.hs | 65+++++++++++++++++++++++++++++++++++++++++++++--------------------
Mbench/Weight.hs | 57+++++++++++++++++++++++++++++++++++++--------------------
Mppad-fixed.cabal | 2++
3 files changed, 84 insertions(+), 40 deletions(-)

diff --git a/bench/Main.hs b/bench/Main.hs @@ -3,15 +3,18 @@ module Main where -import Data.Bits ((.|.), (.&.), (.^.)) -import qualified Data.Word.Extended as W import Control.DeepSeq import Criterion.Main +import Data.Bits ((.|.), (.&.), (.^.)) +import qualified Data.Bits as B +import qualified Data.Word.Extended as W +import qualified Data.Primitive.PrimArray as PA import Prelude hiding (or, and, div, mod) import qualified Prelude (div) instance NFData W.Word256 instance NFData W.Word512 +instance NFData W.Word256WithOverflow or_baseline :: Benchmark or_baseline = bench "or (baseline)" $ nf ((.|.) w0) w1 where @@ -142,25 +145,47 @@ mod = bench "mod" $ nf (W.mod w0) w1 where !w1 = W.to_word256 0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06 +quotrem_by1 :: Benchmark +quotrem_by1 = env setup $ \ ~(quo, u, d) -> + bench "quotrem_by1" $ nfAppIO (W.quotrem_by1 quo u) d + where + setup = do + quo <- PA.newPrimArray 5 + PA.setPrimArray quo 0 5 0 + let u = PA.primArrayFromList [ + 300 + , 200 + , 100 + ] + d = B.complement 50 + pure (quo, u, d) + +quotrem_by1_256 :: Benchmark +quotrem_by1_256 = + bench "quotrem_by1_256" $ + nf (W.quotrem_by1_256 (W.Word256 300 200 100 0)) (B.complement 50) + main :: IO () main = defaultMain [ - mul_baseline - , mul - , div_baseline - , div - , mod_baseline - , mod - , div_baseline_small - , div_small - , or_baseline - , or - , and_baseline - , and - , xor_baseline - , xor - , add_baseline - , add - , sub_baseline - , sub + quotrem_by1 + , quotrem_by1_256 + -- mul_baseline + -- mul + --, div_baseline + --, div + --, mod_baseline + --, mod + --, div_baseline_small + --, div_small + --, or_baseline + --, or + --, and_baseline + --, and + --, xor_baseline + --, xor + --, add_baseline + --, add + --, sub_baseline + --, sub ] diff --git a/bench/Weight.hs b/bench/Weight.hs @@ -1,42 +1,59 @@ {-# OPTIONS_GHC -fno-warn-orphans #-} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE PackageImports #-} module Main where import Control.DeepSeq -import qualified Data.Word.Extended as W +import qualified Data.Bits as B +import qualified Data.Primitive.PrimArray as PA +import qualified Data.Word.Extended as E import qualified Weigh as W -instance NFData W.Word256 -instance NFData W.Word512 +instance NFData E.Word256 +instance NFData E.Word512 +instance NFData E.Word256WithOverflow i0, i1 :: Integer i0 = 0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed i1 = 0x7fffffffffffffffffffffffffffffffffffffffffffffbfffffffffffffffed -w0, w1 :: W.Word256 -w0 = W.to_word256 i0 -w1 = W.to_word256 i1 +w0, w1 :: E.Word256 +w0 = E.to_word256 i0 +w1 = E.to_word256 i1 i2, i3 :: Integer i2 = 0x41cf50c7d0d65afabcf5ba37750dba71c7db29ec9f20a216d3ef013a59b9188a i3 = 0x066bd4c3c10e30260cb6e7832af25f15527b089b258a1fef13b6eec3ce73bf06 -w2, w3 :: W.Word256 -w2 = W.to_word256 i2 -w3 = W.to_word256 i3 +w2, w3 :: E.Word256 +w2 = E.to_word256 i2 +w3 = E.to_word256 i3 main :: IO () -main = W.mainWith $ do - W.func "add (baseline)" ((+) i0) i1 - W.func "add" (W.add w0) w1 - W.func "sub (baseline)" ((-) i0) i1 - W.func "sub" (W.sub w0) w1 - W.func "mul (baseline)" ((-) i0) i1 - W.func "mul" (W.mul_512 w0) w1 - W.func "mul128 (baseline)" ((-) i0) i1 - W.func "mul128" (W.mul w0) w1 - W.func "div (baseline)" (Prelude.div i2) i3 - W.func "div" (W.div w2) w3 +main = do + !(!q, !u, !d) <- do + quo <- PA.newPrimArray 5 + PA.setPrimArray quo 0 5 0 + let uf = PA.primArrayFromList [ + 300 + , 200 + , 100 + ] + df = B.complement 50 + pure (quo, uf, df) + + W.mainWith $ do + W.func "add (baseline)" ((+) i0) i1 + W.func "add" (E.add w0) w1 + W.func "sub (baseline)" ((-) i0) i1 + W.func "sub" (E.sub w0) w1 + W.func "mul (baseline)" ((*) i0) i1 + W.func "mul" (E.mul w0) w1 + W.func "div (baseline)" (Prelude.div i2) i3 + W.func "div" (E.div w2) w3 + W.io "quotrem_by1" (E.quotrem_by1 q u) d + W.func "quotrem_by1_256" + (E.quotrem_by1_256 (E.Word256 300 200 100 0)) (B.complement 50) diff --git a/ppad-fixed.cabal b/ppad-fixed.cabal @@ -60,6 +60,7 @@ benchmark fixed-bench , criterion , deepseq , ppad-fixed + , primitive benchmark fixed-weigh type: exitcode-stdio-1.0 @@ -74,5 +75,6 @@ benchmark fixed-weigh base , deepseq , ppad-fixed + , primitive , weigh