fixed

Pure Haskell large fixed-width integers.
git clone git://git.ppad.tech/fixed.git
Log | Files | Refs | README | LICENSE

commit ba69286001068e7400376fbdf761041714569214
parent 2217c94f454868508766bc7142ff385a704debeb
Author: Jared Tobin <jared@jtobin.io>
Date:   Wed, 22 Jan 2025 13:29:53 +0400

lib: mul_512 (mul on w256 without overflow)

Diffstat:
Mlib/Data/Word/Extended.hs | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/lib/Data/Word/Extended.hs b/lib/Data/Word/Extended.hs @@ -22,6 +22,18 @@ data Word256 = Word256 {-# UNPACK #-} !Word64 deriving (Eq, Show, Generic) +-- | Little-endian Word512. +data Word512 = Word512 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + {-# UNPACK #-} !Word64 + deriving (Eq, Show, Generic) + -- conversion ----------------------------------------------------------------- to_integer :: Word256 -> Integer @@ -40,6 +52,32 @@ to_word256 n = !w3 = fi ((n .>>. 192) .&. mask64) in Word256 w0 w1 w2 w3 +-- for testing +word512_to_integer :: Word512 -> Integer +word512_to_integer (Word512 w0 w1 w2 w3 w4 w5 w6 w7) = + fi w7 .<<. 448 + .|. fi w6 .<<. 384 + .|. fi w5 .<<. 320 + .|. fi w4 .<<. 256 + .|. fi w3 .<<. 192 + .|. fi w2 .<<. 128 + .|. fi w1 .<<. 64 + .|. fi w0 + +-- for testing +to_word512 :: Integer -> Word512 +to_word512 n = + let !mask64 = 2 ^ (64 :: Int) - 1 + !w0 = fi (n .&. mask64) + !w1 = fi ((n .>>. 64) .&. mask64) + !w2 = fi ((n .>>. 128) .&. mask64) + !w3 = fi ((n .>>. 192) .&. mask64) + !w4 = fi ((n .>>. 256) .&. mask64) + !w5 = fi ((n .>>. 320) .&. mask64) + !w6 = fi ((n .>>. 384) .&. mask64) + !w7 = fi ((n .>>. 448) .&. mask64) + in Word512 w0 w1 w2 w3 w4 w5 w6 w7 + -- addition, subtraction ------------------------------------------------------ -- strict, unboxed pair of Word64 @@ -102,8 +140,8 @@ sub w0 w1 = d where -- multiplication ------------------------------------------------------------- --- note that this is available in a single MULX instruction on e.g. --- x86_64 with BMI2 +-- this is available in a single MULX instruction on e.g. x86_64 +-- with BMI2 -- -- translated from Mul64 in go's math/bits package mul_c :: Word64 -> Word64 -> W64Pair @@ -142,6 +180,7 @@ umul_step z x y c = !(W64P hi _) = add_c hi_1 0 c_1 in W64P hi lo +-- | Multiplication on 'Word256' values, with overflow. mul :: Word256 -> Word256 -> Word256 mul (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) = let !(W64P c0_0 s0) = mul_c a0 b0 @@ -156,3 +195,27 @@ mul (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) = !s3 = a3 * b0 + a2 * b1 + a0 * b3 + a1 * b2 + c0_2 + c1_1 + c2 in Word256 s0 s1 s2 s3 +-- | Multiplication on 'Word256' values, to 'Word512'. +mul_512 :: Word256 -> Word256 -> Word512 +mul_512 (Word256 x0 x1 x2 x3) (Word256 y0 y1 y2 y3) = + let !(W64P c4_0 r0) = mul_c x0 y0 + !(W64P c4_1 r0_1) = umul_hop c4_0 x1 y0 + !(W64P c4_2 r0_2) = umul_hop c4_1 x2 y0 + !(W64P c4 r0_3) = umul_hop c4_2 x3 y0 + + !(W64P c5_0 r1) = umul_hop r0_1 x0 y1 + !(W64P c5_1 r1_2) = umul_step r0_2 x1 y1 c5_0 + !(W64P c5_2 r1_3) = umul_step r0_3 x2 y1 c5_1 + !(W64P c5 r1_4) = umul_step c4 x3 y1 c5_2 + + !(W64P c6_0 r2) = umul_hop r1_2 x0 y2 + !(W64P c6_1 r2_3) = umul_step r1_3 x1 y2 c6_0 + !(W64P c6_2 r2_4) = umul_step r1_4 x2 y2 c6_1 + !(W64P c6 r2_5) = umul_step c5 x3 y2 c6_2 + + !(W64P c7_0 r3) = umul_hop r2_3 x0 y3 + !(W64P c7_1 r4) = umul_step r2_4 x1 y3 c7_0 + !(W64P c7_2 r5) = umul_step r2_5 x2 y3 c7_1 + !(W64P r7 r6) = umul_step c6 x3 y3 c7_2 + in Word512 r0 r1 r2 r3 r4 r5 r6 r7 +