lib: mul_512 (mul on w256 without overflow) - fixed - Pure Haskell large fixed-width integers.

commit ba69286001068e7400376fbdf761041714569214
parent 2217c94f454868508766bc7142ff385a704debeb
Author: Jared Tobin <jared@jtobin.io>
Date:   Wed, 22 Jan 2025 13:29:53 +0400

lib: mul_512 (mul on w256 without overflow)

Diffstat:
M lib/Data/Word/Extended.hs  | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--

1 file changed, 65 insertions(+), 2 deletions(-)
diff --git a/lib/Data/Word/Extended.hs b/lib/Data/Word/Extended.hs
@@ -22,6 +22,18 @@ data Word256 = Word256
     {-# UNPACK #-} !Word64
   deriving (Eq, Show, Generic)
 
+-- | Little-endian Word512.
+data Word512 = Word512
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+    {-# UNPACK #-} !Word64
+  deriving (Eq, Show, Generic)
+
 -- conversion -----------------------------------------------------------------
 
 to_integer :: Word256 -> Integer
@@ -40,6 +52,32 @@ to_word256 n =
       !w3 = fi ((n .>>. 192) .&. mask64)
   in  Word256 w0 w1 w2 w3
 
+-- for testing
+word512_to_integer :: Word512 -> Integer
+word512_to_integer (Word512 w0 w1 w2 w3 w4 w5 w6 w7) =
+      fi w7 .<<. 448
+  .|. fi w6 .<<. 384
+  .|. fi w5 .<<. 320
+  .|. fi w4 .<<. 256
+  .|. fi w3 .<<. 192
+  .|. fi w2 .<<. 128
+  .|. fi w1 .<<. 64
+  .|. fi w0
+
+-- for testing
+to_word512 :: Integer -> Word512
+to_word512 n =
+  let !mask64 = 2 ^ (64 :: Int) - 1
+      !w0 = fi (n .&. mask64)
+      !w1 = fi ((n .>>. 64) .&. mask64)
+      !w2 = fi ((n .>>. 128) .&. mask64)
+      !w3 = fi ((n .>>. 192) .&. mask64)
+      !w4 = fi ((n .>>. 256) .&. mask64)
+      !w5 = fi ((n .>>. 320) .&. mask64)
+      !w6 = fi ((n .>>. 384) .&. mask64)
+      !w7 = fi ((n .>>. 448) .&. mask64)
+  in  Word512 w0 w1 w2 w3 w4 w5 w6 w7
+
 -- addition, subtraction ------------------------------------------------------
 
 -- strict, unboxed pair of Word64
@@ -102,8 +140,8 @@ sub w0 w1 = d where
 
 -- multiplication -------------------------------------------------------------
 
--- note that this is available in a single MULX instruction on e.g.
--- x86_64 with BMI2
+-- this is available in a single MULX instruction on e.g. x86_64
+-- with BMI2
 --
 -- translated from Mul64 in go's math/bits package
 mul_c :: Word64 -> Word64 -> W64Pair
@@ -142,6 +180,7 @@ umul_step z x y c =
       !(W64P hi _)      = add_c hi_1 0 c_1
   in  W64P hi lo
 
+-- | Multiplication on 'Word256' values, with overflow.
 mul :: Word256 -> Word256 -> Word256
 mul (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
   let !(W64P c0_0 s0) = mul_c a0 b0
@@ -156,3 +195,27 @@ mul (Word256 a0 a1 a2 a3) (Word256 b0 b1 b2 b3) =
       !s3 = a3 * b0 + a2 * b1 + a0 * b3 + a1 * b2 + c0_2 + c1_1 + c2
   in  Word256 s0 s1 s2 s3
 
+-- | Multiplication on 'Word256' values, to 'Word512'.
+mul_512 :: Word256 -> Word256 -> Word512
+mul_512 (Word256 x0 x1 x2 x3) (Word256 y0 y1 y2 y3) =
+  let !(W64P c4_0   r0) = mul_c x0 y0
+      !(W64P c4_1 r0_1) = umul_hop c4_0 x1 y0
+      !(W64P c4_2 r0_2) = umul_hop c4_1 x2 y0
+      !(W64P c4   r0_3) = umul_hop c4_2 x3 y0
+
+      !(W64P c5_0   r1) = umul_hop r0_1 x0 y1
+      !(W64P c5_1 r1_2) = umul_step r0_2 x1 y1 c5_0
+      !(W64P c5_2 r1_3) = umul_step r0_3 x2 y1 c5_1
+      !(W64P c5   r1_4) = umul_step c4 x3 y1 c5_2
+
+      !(W64P c6_0   r2) = umul_hop r1_2 x0 y2
+      !(W64P c6_1 r2_3) = umul_step r1_3 x1 y2 c6_0
+      !(W64P c6_2 r2_4) = umul_step r1_4 x2 y2 c6_1
+      !(W64P c6   r2_5) = umul_step c5 x3 y2 c6_2
+
+      !(W64P c7_0   r3) = umul_hop r2_3 x0 y3
+      !(W64P c7_1   r4) = umul_step r2_4 x1 y3 c7_0
+      !(W64P c7_2   r5) = umul_step r2_5 x2 y3 c7_1
+      !(W64P r7     r6) = umul_step c6 x3 y3 c7_2
+  in  Word512 r0 r1 r2 r3 r4 r5 r6 r7
+

	fixed Pure Haskell large fixed-width integers.
	git clone git://git.ppad.tech/fixed.git
	Log \| Files \| Refs \| README \| LICENSE