commit f87b6a482554680e197358ae21d3e44e729659dd
parent 9a67f66c8d0a9292aa5b86a620be6c4d8c7d66d2
Author: Jared Tobin <jared@jtobin.io>
Date: Fri, 26 Dec 2025 21:10:08 -0330
lib: fixed refactor
Diffstat:
5 files changed, 213 insertions(+), 57 deletions(-)
diff --git a/flake.lock b/flake.lock
@@ -65,6 +65,37 @@
"url": "git://git.ppad.tech/base16.git"
}
},
+ "ppad-fixed": {
+ "inputs": {
+ "flake-utils": [
+ "ppad-fixed",
+ "ppad-nixpkgs",
+ "flake-utils"
+ ],
+ "nixpkgs": [
+ "ppad-fixed",
+ "ppad-nixpkgs",
+ "nixpkgs"
+ ],
+ "ppad-nixpkgs": [
+ "ppad-nixpkgs"
+ ]
+ },
+ "locked": {
+ "lastModified": 1766767330,
+ "narHash": "sha256-YtJJJT/TwKxIFUHk5DCqLV64mtC1O5z6ncSbxcIct7E=",
+ "ref": "master",
+ "rev": "e9498b179a8c88328af5f0c1518870bd1b5dd42e",
+ "revCount": 268,
+ "type": "git",
+ "url": "git://git.ppad.tech/fixed.git"
+ },
+ "original": {
+ "ref": "master",
+ "type": "git",
+ "url": "git://git.ppad.tech/fixed.git"
+ }
+ },
"ppad-nixpkgs": {
"inputs": {
"flake-utils": "flake-utils",
@@ -96,6 +127,7 @@
"nixpkgs"
],
"ppad-base16": "ppad-base16",
+ "ppad-fixed": "ppad-fixed",
"ppad-nixpkgs": "ppad-nixpkgs"
}
},
diff --git a/flake.nix b/flake.nix
@@ -8,6 +8,12 @@
ref = "master";
inputs.ppad-nixpkgs.follows = "ppad-nixpkgs";
};
+ ppad-fixed = {
+ type = "git";
+ url = "git://git.ppad.tech/fixed.git";
+ ref = "master";
+ inputs.ppad-nixpkgs.follows = "ppad-nixpkgs";
+ };
ppad-nixpkgs = {
type = "git";
url = "git://git.ppad.tech/nixpkgs.git";
@@ -18,7 +24,7 @@
};
outputs = { self, nixpkgs, flake-utils, ppad-nixpkgs
- , ppad-base16
+ , ppad-base16, ppad-fixed
}:
flake-utils.lib.eachDefaultSystem (system:
let
@@ -26,10 +32,20 @@
pkgs = import nixpkgs { inherit system; };
hlib = pkgs.haskell.lib;
+ llvm = pkgs.llvmPackages_15.llvm;
+
+ fixed = ppad-fixed.packages.${system}.default;
+ fixed-llvm =
+ hlib.addBuildTools
+ (hlib.enableCabalFlag fixed "llvm")
+ [ llvm ];
hpkgs = pkgs.haskell.packages.ghc981.extend (new: old: {
- ${lib} = old.callCabal2nixWithOptions lib ./. "--enable-profiling" {};
ppad-base16 = ppad-base16.packages.${system}.default;
+ ppad-fixed = fixed-llvm;
+ ${lib} = new.callCabal2nixWithOptions lib ./. "--enable-profiling" {
+ ppad-fixed = new.ppad-fixed;
+ };
});
cc = pkgs.stdenv.cc;
@@ -47,10 +63,9 @@
buildInputs = [
cabal
cc
+ llvm
];
- inputsFrom = builtins.attrValues self.packages.${system};
-
doBenchmark = true;
shellHook = ''
@@ -59,6 +74,7 @@
echo "cc: $(${cc}/bin/cc --version)"
echo "ghc: $(${ghc}/bin/ghc --version)"
echo "cabal: $(${cabal}/bin/cabal --version)"
+ echo "llc: $(${llvm}/bin/llc --version | head -2 | tail -1)"
'';
};
}
diff --git a/lib/Crypto/MAC/Poly1305.hs b/lib/Crypto/MAC/Poly1305.hs
@@ -1,7 +1,9 @@
{-# OPTIONS_HADDOCK prune #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
{-# LANGUAGE ViewPatterns #-}
+{-# LANGUAGE UnboxedTuples #-}
-- |
-- Module: Crypto.MAC.Poly1305
@@ -18,44 +20,113 @@ module Crypto.MAC.Poly1305 (
-- testing
, _poly1305_loop
- , _roll
+ , _roll16
) where
-import Data.Bits ((.&.), (.|.), (.<<.), (.>>.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
+import qualified Data.ByteString.Unsafe as BU
+import Data.Word (Word8)
+import Data.Word.Limb (Limb(..))
+import qualified Data.Word.Limb as L
+import Data.Word.Wider (Wider(..))
+import qualified Data.Word.Wider as W
+import qualified Foreign.Storable as Storable (pokeByteOff)
+import qualified GHC.Exts as Exts
+import qualified GHC.Word (Word8(..))
-fi :: (Integral a, Num b) => a -> b
-fi = fromIntegral
-{-# INLINE fi #-}
-
--- arbitrary-size little-endian bytestring decoding
-_roll :: BS.ByteString -> Integer
-_roll = BS.foldr alg 0 where
- alg (fi -> !b) !a = (a .<<. 8) .|. b
-{-# INLINE _roll #-}
-
--- little-endian bytestring encoding
-unroll :: Integer -> BS.ByteString
-unroll i = case i of
- 0 -> BS.singleton 0
- _ -> BS.unfoldr coalg i
+-- utilities ------------------------------------------------------------------
+
+-- convert a Word8 to a Limb
+limb :: Word8 -> Limb
+limb (GHC.Word.W8# (Exts.word8ToWord# -> w)) = Limb w
+{-# INLINABLE limb #-}
+
+-- convert a Limb to a Word8
+word8 :: Limb -> Word8
+word8 (Limb w) = GHC.Word.W8# (Exts.wordToWord8# w)
+{-# INLINABLE word8 #-}
+
+-- convert a Limb to a Word8 after right-shifting
+word8s :: Limb -> Exts.Int# -> Word8
+word8s l s =
+ let !(Limb w) = L.shr# l s
+ in GHC.Word.W8# (Exts.wordToWord8# w)
+{-# INLINABLE word8s #-}
+
+-- 128-bit little-endian bytestring decoding
+_roll16 :: BS.ByteString -> Wider
+_roll16 bs@(BI.PS _ _ l) =
+ let byte :: Int -> Limb
+ byte i
+ | i < l = limb (BU.unsafeIndex bs i)
+ | otherwise = Limb 0##
+ {-# INLINE byte #-}
+ !w0 = (byte 07 `L.shl#` 56#)
+ `L.or#` (byte 06 `L.shl#` 48#)
+ `L.or#` (byte 05 `L.shl#` 40#)
+ `L.or#` (byte 04 `L.shl#` 32#)
+ `L.or#` (byte 03 `L.shl#` 24#)
+ `L.or#` (byte 02 `L.shl#` 16#)
+ `L.or#` (byte 01 `L.shl#` 08#)
+ `L.or#` byte 00
+ !w1 = (byte 15 `L.shl#` 56#)
+ `L.or#` (byte 14 `L.shl#` 48#)
+ `L.or#` (byte 13 `L.shl#` 40#)
+ `L.or#` (byte 12 `L.shl#` 32#)
+ `L.or#` (byte 11 `L.shl#` 24#)
+ `L.or#` (byte 10 `L.shl#` 16#)
+ `L.or#` (byte 09 `L.shl#` 08#)
+ `L.or#` byte 08
+ in Wider (# w0, w1, Limb 0##, Limb 0## #)
+{-# INLINE _roll16 #-}
+
+-- 128-bit little-endian bytestring encoding
+unroll16 :: Wider -> BS.ByteString
+unroll16 (Wider (# w0, w1, _, _ #)) =
+ BI.unsafeCreate 16 $ \ptr -> do
+ -- w0
+ Storable.pokeByteOff ptr 00 (word8 w0)
+ Storable.pokeByteOff ptr 01 (word8s w0 08#)
+ Storable.pokeByteOff ptr 02 (word8s w0 16#)
+ Storable.pokeByteOff ptr 03 (word8s w0 24#)
+ Storable.pokeByteOff ptr 04 (word8s w0 32#)
+ Storable.pokeByteOff ptr 05 (word8s w0 40#)
+ Storable.pokeByteOff ptr 06 (word8s w0 48#)
+ Storable.pokeByteOff ptr 07 (word8s w0 56#)
+ -- w1
+ Storable.pokeByteOff ptr 08 (word8 w1)
+ Storable.pokeByteOff ptr 09 (word8s w1 08#)
+ Storable.pokeByteOff ptr 10 (word8s w1 16#)
+ Storable.pokeByteOff ptr 11 (word8s w1 24#)
+ Storable.pokeByteOff ptr 12 (word8s w1 32#)
+ Storable.pokeByteOff ptr 13 (word8s w1 40#)
+ Storable.pokeByteOff ptr 14 (word8s w1 48#)
+ Storable.pokeByteOff ptr 15 (word8s w1 56#)
+{-# INLINABLE unroll16 #-}
+
+-- set high bit for chunk of length l (max 16)
+set_hi :: Int -> Wider
+set_hi l
+ | l < 8 = W.shl_limb 1 (8 * l)
+ | l < 16 = Wider (# Limb 0##, L.shl# (Limb 1##) s, Limb 0##, Limb 0## #)
+ | otherwise = Wider (# Limb 0##, Limb 0##, Limb 1##, Limb 0## #)
where
- coalg = \case
- 0 -> Nothing
- m -> Just $! (fi m, m .>>. 8)
-{-# INLINE unroll #-}
-
--- little-endian bytestring encoding for 128-bit ints, right-padding
--- with zeros
-unroll16 :: Integer -> BS.ByteString
-unroll16 (unroll -> u@(BI.PS _ _ l))
- | l < 16 = u <> BS.replicate (16 - l) 0
- | otherwise = u
-{-# INLINE unroll16 #-}
-
-clamp :: Integer -> Integer
-clamp r = r .&. 0x0ffffffc0ffffffc0ffffffc0fffffff
+ !(Exts.I# s) = 8 * (l - 8)
+{-# INLINE set_hi #-}
+
+-- bespoke constant-time 130-bit right shift
+shr130 :: Wider -> Wider
+shr130 (Wider (# _, _, l2, l3 #)) =
+ let !r0 = L.or# (L.shr# l2 2#) (L.shl# l3 62#)
+ !r1 = L.shr# l3 2#
+ in Wider (# r0, r1, Limb 0##, Limb 0## #)
+{-# INLINE shr130 #-}
+
+-------------------------------------------------------------------------------
+
+clamp :: Wider -> Wider
+clamp r = r `W.and` 0x0ffffffc0ffffffc0ffffffc0fffffff
{-# INLINE clamp #-}
-- | Produce a Poly1305 MAC for the provided message, given the provided
@@ -75,20 +146,49 @@ mac
mac key@(BI.PS _ _ kl) msg
| kl /= 32 = Nothing
| otherwise =
- let (clamp . _roll -> r, _roll -> s) = BS.splitAt 16 key
+ let (clamp . _roll16 -> r, _roll16 -> s) = BS.splitAt 16 key
in pure (_poly1305_loop r s msg)
-_poly1305_loop :: Integer -> Integer -> BS.ByteString -> BS.ByteString
+-- p = 2^130 - 5
+--
+-- mask for the low 130 bits
+mask130 :: Wider
+mask130 = 0x3ffffffffffffffffffffffffffffffff
+{-# INLINE mask130 #-}
+
+-- partial reduction to [0, 2 ^ 131)
+reduce_partial :: Wider -> Wider
+reduce_partial x =
+ let !lo = x `W.and` mask130
+ !hi = shr130 x
+ in lo + 5 * hi
+{-# INLINE reduce_partial #-}
+
+-- [0, 2 ^ 131) -> [0, p)
+reduce_full :: Wider -> Wider
+reduce_full h =
+ let !lo = h `W.and` mask130
+ !hi = shr130 h
+ !h' = lo + 5 * hi
+ !h_5 = h' + 5
+ !reduced = h_5 `W.and` mask130
+ !carry = shr130 h_5
+ !gte = W.lt 0 carry
+ in W.select h' reduced gte
+{-# INLINE reduce_full #-}
+
+_poly1305_loop :: Wider -> Wider -> BS.ByteString -> BS.ByteString
_poly1305_loop !r !s !msg =
let loop !acc !bs = case BS.splitAt 16 bs of
(chunk@(BI.PS _ _ l), etc)
- | l == 0 -> BS.take 16 (unroll16 (acc + s))
+ | l == 0 ->
+ let !final = reduce_full (reduce_partial acc)
+ in unroll16 (final + s)
| otherwise ->
- let !n = _roll chunk .|. (0x01 .<<. (8 * l))
- !nacc = r * (acc + n) `rem` p
+ let !n = _roll16 chunk `W.or` set_hi l
+ !prod = r * (acc + n)
+ !nacc = reduce_partial (reduce_partial prod)
in loop nacc etc
in loop 0 msg
- where
- p = 1361129467683753853853498429727072845819 -- (1 << 130) - 5
{-# INLINE _poly1305_loop #-}
diff --git a/ppad-poly1305.cabal b/ppad-poly1305.cabal
@@ -14,6 +14,11 @@ description:
A pure Poly1305 message authentication code, per
[RFC8439](https://datatracker.ietf.org/doc/html/rfc8439).
+flag llvm
+ description: Use GHC's LLVM backend.
+ default: False
+ manual: True
+
source-repository head
type: git
location: git.ppad.tech/poly1305.git
@@ -23,11 +28,14 @@ library
hs-source-dirs: lib
ghc-options:
-Wall
+ if flag(llvm)
+ ghc-options: -fllvm -O2
exposed-modules:
Crypto.MAC.Poly1305
build-depends:
base >= 4.9 && < 5
, bytestring >= 0.9 && < 0.13
+ , ppad-fixed >= 0.1 && < 0.2
test-suite poly1305-tests
type: exitcode-stdio-1.0
diff --git a/test/Main.hs b/test/Main.hs
@@ -83,9 +83,9 @@ mac4 = H.testCase "mac (A.3 #4)" $ do
mac5 :: TestTree
mac5 = H.testCase "mac (A.3 #5)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"02000000000000000000000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"00000000000000000000000000000000"
Just msg = B16.decode
"ffffffffffffffffffffffffffffffff"
@@ -96,9 +96,9 @@ mac5 = H.testCase "mac (A.3 #5)" $ do
mac6 :: TestTree
mac6 = H.testCase "mac (A.3 #6)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"02000000000000000000000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"ffffffffffffffffffffffffffffffff"
Just msg = B16.decode
"02000000000000000000000000000000"
@@ -109,9 +109,9 @@ mac6 = H.testCase "mac (A.3 #6)" $ do
mac7 :: TestTree
mac7 = H.testCase "mac (A.3 #7)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"01000000000000000000000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"00000000000000000000000000000000"
Just msg = B16.decode
"fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000"
@@ -122,9 +122,9 @@ mac7 = H.testCase "mac (A.3 #7)" $ do
mac8 :: TestTree
mac8 = H.testCase "mac (A.3 #8)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"01000000000000000000000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"00000000000000000000000000000000"
Just msg = B16.decode
"fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101"
@@ -135,9 +135,9 @@ mac8 = H.testCase "mac (A.3 #8)" $ do
mac9 :: TestTree
mac9 = H.testCase "mac (A.3 #9)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"02000000000000000000000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"00000000000000000000000000000000"
Just msg = B16.decode
"fdffffffffffffffffffffffffffffff"
@@ -148,9 +148,9 @@ mac9 = H.testCase "mac (A.3 #9)" $ do
mac10 :: TestTree
mac10 = H.testCase "mac (A.3 #10)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"01000000000000000400000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"00000000000000000000000000000000"
Just msg = B16.decode
"e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000"
@@ -161,9 +161,9 @@ mac10 = H.testCase "mac (A.3 #10)" $ do
mac11 :: TestTree
mac11 = H.testCase "mac (A.3 #11)" $ do
- let Just (Poly1305._roll -> r) = B16.decode $
+ let Just (Poly1305._roll16 -> r) = B16.decode $
"01000000000000000400000000000000"
- Just (Poly1305._roll -> s) = B16.decode $
+ Just (Poly1305._roll16 -> s) = B16.decode $
"00000000000000000000000000000000"
Just msg = B16.decode
"e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000"