commit ba69286001068e7400376fbdf761041714569214
parent 2217c94f454868508766bc7142ff385a704debeb
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 22 Jan 2025 13:29:53 +0400
lib: mul_512 (mul on w256 without overflow)
Diffstat:
1 file changed, 65 insertions(+), 2 deletions(-)
diff --git a/lib/Data/Word/Extended.hs b/lib/Data/Word/Extended.hs
@@ -22,6 +22,18 @@ data Word256 = Word256
{-# UNPACK #-} !Word64
deriving (Eq, Show, Generic)
+-- | Little-endian Word512.
+data Word512 = Word512
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ {-# UNPACK #-} !Word64
+ deriving (Eq, Show, Generic)
+
-- conversion -----------------------------------------------------------------
to_integer :: Word256 -> Integer
@@ -40,6 +52,32 @@ to_word256 n =
!w3 = fi ((n .>>. 192) .&. mask64)
in Word256 w0 w1 w2 w3
+-- for testing
+word512_to_integer :: Word512 -> Integer
+word512_to_integer (Word512 w0 w1 w2 w3 w4 w5 w6 w7) =
+ fi w7 .<<. 448
+ .|. fi w6 .<<. 384
+ .|. fi w5 .<<. 320
+ .|. fi w4 .<<. 256
+ .|. fi w3 .<<. 192
+ .|. fi w2 .<<. 128
+ .|. fi w1 .<<. 64
+ .|. fi w0
+
+-- for testing
+to_word512 :: Integer -> Word512
+to_word512 n =
+ let !mask64 = 2 ^ (64 :: Int) - 1
+ !w0 = fi (n .&. mask64)
+ !w1 = fi ((n .>>. 64) .&. mask64)
+ !w2 = fi ((n .>>. 128) .&. mask64)
+ !w3 = fi ((n .>>. 192) .&. mask64)
+ !w4 = fi ((n .>>. 256) .&. mask64)
+ !w5 = fi ((n .>>. 320) .&. mask64)
+ !w6 = fi ((n .>>. 384) .&. mask64)
+ !w7 = fi ((n .>>. 448) .&. mask64)
+ in Word512 w0 w1 w2 w3 w4 w5 w6 w7
+
-- addition, subtraction ------------------------------------------------------
-- strict, unboxed pair of Word64
@@ -102,8 +140,8 @@ sub w0 w1 = d where
-- multiplication -------------------------------------------------------------
--- note that this is available in a single MULX instruction on e.g.
--- x86_64 with BMI2
+-- this is available in a single MULX instruction on e.g. x86_64
+-- with BMI2
--
-- translated from Mul64 in go's math/bits package
mul_c :: Word64 -> Word64 -> W64Pair
@@ -142,6 +180,7 @@ umul_step z x y c =
!(W64P hi _) = add_c hi_1 0 c_1
in W64P hi lo
+-- | Multiplication on 'Word256' values, with overflow.
mul :: Word256 -> Word256 -> Word256
mul (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
let !(W64P c0_0 s0) = mul_c a0 b0
@@ -156,3 +195,27 @@ mul (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
!s3 = a3 * b0 + a2 * b1 + a0 * b3 + a1 * b2 + c0_2 + c1_1 + c2
in Word256 s0 s1 s2 s3
+-- | Multiplication on 'Word256' values, to 'Word512'.
+mul_512 :: Word256 -> Word256 -> Word512
+mul_512 (Word256 x0 x1 x2 x3) (Word256 y0 y1 y2 y3) =
+ let !(W64P c4_0 r0) = mul_c x0 y0
+ !(W64P c4_1 r0_1) = umul_hop c4_0 x1 y0
+ !(W64P c4_2 r0_2) = umul_hop c4_1 x2 y0
+ !(W64P c4 r0_3) = umul_hop c4_2 x3 y0
+
+ !(W64P c5_0 r1) = umul_hop r0_1 x0 y1
+ !(W64P c5_1 r1_2) = umul_step r0_2 x1 y1 c5_0
+ !(W64P c5_2 r1_3) = umul_step r0_3 x2 y1 c5_1
+ !(W64P c5 r1_4) = umul_step c4 x3 y1 c5_2
+
+ !(W64P c6_0 r2) = umul_hop r1_2 x0 y2
+ !(W64P c6_1 r2_3) = umul_step r1_3 x1 y2 c6_0
+ !(W64P c6_2 r2_4) = umul_step r1_4 x2 y2 c6_1
+ !(W64P c6 r2_5) = umul_step c5 x3 y2 c6_2
+
+ !(W64P c7_0 r3) = umul_hop r2_3 x0 y3
+ !(W64P c7_1 r4) = umul_step r2_4 x1 y3 c7_0
+ !(W64P c7_2 r5) = umul_step r2_5 x2 y3 c7_1
+ !(W64P r7 r6) = umul_step c6 x3 y3 c7_2
+ in Word512 r0 r1 r2 r3 r4 r5 r6 r7
+