poly1305

The Poly1305 message authentication code (docs.ppad.tech/poly1305).
git clone git://git.ppad.tech/poly1305.git
Log | Files | Refs | README | LICENSE

commit f87b6a482554680e197358ae21d3e44e729659dd
parent 9a67f66c8d0a9292aa5b86a620be6c4d8c7d66d2
Author: Jared Tobin <jared@jtobin.io>
Date:   Fri, 26 Dec 2025 21:10:08 -0330

lib: fixed refactor

Diffstat:
Mflake.lock | 32++++++++++++++++++++++++++++++++
Mflake.nix | 24++++++++++++++++++++----
Mlib/Crypto/MAC/Poly1305.hs | 178+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mppad-poly1305.cabal | 8++++++++
Mtest/Main.hs | 28++++++++++++++--------------
5 files changed, 213 insertions(+), 57 deletions(-)

diff --git a/flake.lock b/flake.lock @@ -65,6 +65,37 @@ "url": "git://git.ppad.tech/base16.git" } }, + "ppad-fixed": { + "inputs": { + "flake-utils": [ + "ppad-fixed", + "ppad-nixpkgs", + "flake-utils" + ], + "nixpkgs": [ + "ppad-fixed", + "ppad-nixpkgs", + "nixpkgs" + ], + "ppad-nixpkgs": [ + "ppad-nixpkgs" + ] + }, + "locked": { + "lastModified": 1766767330, + "narHash": "sha256-YtJJJT/TwKxIFUHk5DCqLV64mtC1O5z6ncSbxcIct7E=", + "ref": "master", + "rev": "e9498b179a8c88328af5f0c1518870bd1b5dd42e", + "revCount": 268, + "type": "git", + "url": "git://git.ppad.tech/fixed.git" + }, + "original": { + "ref": "master", + "type": "git", + "url": "git://git.ppad.tech/fixed.git" + } + }, "ppad-nixpkgs": { "inputs": { "flake-utils": "flake-utils", @@ -96,6 +127,7 @@ "nixpkgs" ], "ppad-base16": "ppad-base16", + "ppad-fixed": "ppad-fixed", "ppad-nixpkgs": "ppad-nixpkgs" } }, diff --git a/flake.nix b/flake.nix @@ -8,6 +8,12 @@ ref = "master"; inputs.ppad-nixpkgs.follows = "ppad-nixpkgs"; }; + ppad-fixed = { + type = "git"; + url = "git://git.ppad.tech/fixed.git"; + ref = "master"; + inputs.ppad-nixpkgs.follows = "ppad-nixpkgs"; + }; ppad-nixpkgs = { type = "git"; url = "git://git.ppad.tech/nixpkgs.git"; @@ -18,7 +24,7 @@ }; outputs = { self, nixpkgs, flake-utils, ppad-nixpkgs - , ppad-base16 + , ppad-base16, ppad-fixed }: flake-utils.lib.eachDefaultSystem (system: let @@ -26,10 +32,20 @@ pkgs = import nixpkgs { inherit system; }; hlib = pkgs.haskell.lib; + llvm = pkgs.llvmPackages_15.llvm; + + fixed = ppad-fixed.packages.${system}.default; + fixed-llvm = + hlib.addBuildTools + (hlib.enableCabalFlag fixed "llvm") + [ llvm ]; hpkgs = pkgs.haskell.packages.ghc981.extend (new: old: { - ${lib} = old.callCabal2nixWithOptions lib ./. "--enable-profiling" {}; ppad-base16 = ppad-base16.packages.${system}.default; + ppad-fixed = fixed-llvm; + ${lib} = new.callCabal2nixWithOptions lib ./. "--enable-profiling" { + ppad-fixed = new.ppad-fixed; + }; }); cc = pkgs.stdenv.cc; @@ -47,10 +63,9 @@ buildInputs = [ cabal cc + llvm ]; - inputsFrom = builtins.attrValues self.packages.${system}; - doBenchmark = true; shellHook = '' @@ -59,6 +74,7 @@ echo "cc: $(${cc}/bin/cc --version)" echo "ghc: $(${ghc}/bin/ghc --version)" echo "cabal: $(${cabal}/bin/cabal --version)" + echo "llc: $(${llvm}/bin/llc --version | head -2 | tail -1)" ''; }; } diff --git a/lib/Crypto/MAC/Poly1305.hs b/lib/Crypto/MAC/Poly1305.hs @@ -1,7 +1,9 @@ {-# OPTIONS_HADDOCK prune #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE UnboxedTuples #-} -- | -- Module: Crypto.MAC.Poly1305 @@ -18,44 +20,113 @@ module Crypto.MAC.Poly1305 ( -- testing , _poly1305_loop - , _roll + , _roll16 ) where -import Data.Bits ((.&.), (.|.), (.<<.), (.>>.)) import qualified Data.ByteString as BS import qualified Data.ByteString.Internal as BI +import qualified Data.ByteString.Unsafe as BU +import Data.Word (Word8) +import Data.Word.Limb (Limb(..)) +import qualified Data.Word.Limb as L +import Data.Word.Wider (Wider(..)) +import qualified Data.Word.Wider as W +import qualified Foreign.Storable as Storable (pokeByteOff) +import qualified GHC.Exts as Exts +import qualified GHC.Word (Word8(..)) -fi :: (Integral a, Num b) => a -> b -fi = fromIntegral -{-# INLINE fi #-} - --- arbitrary-size little-endian bytestring decoding -_roll :: BS.ByteString -> Integer -_roll = BS.foldr alg 0 where - alg (fi -> !b) !a = (a .<<. 8) .|. b -{-# INLINE _roll #-} - --- little-endian bytestring encoding -unroll :: Integer -> BS.ByteString -unroll i = case i of - 0 -> BS.singleton 0 - _ -> BS.unfoldr coalg i +-- utilities ------------------------------------------------------------------ + +-- convert a Word8 to a Limb +limb :: Word8 -> Limb +limb (GHC.Word.W8# (Exts.word8ToWord# -> w)) = Limb w +{-# INLINABLE limb #-} + +-- convert a Limb to a Word8 +word8 :: Limb -> Word8 +word8 (Limb w) = GHC.Word.W8# (Exts.wordToWord8# w) +{-# INLINABLE word8 #-} + +-- convert a Limb to a Word8 after right-shifting +word8s :: Limb -> Exts.Int# -> Word8 +word8s l s = + let !(Limb w) = L.shr# l s + in GHC.Word.W8# (Exts.wordToWord8# w) +{-# INLINABLE word8s #-} + +-- 128-bit little-endian bytestring decoding +_roll16 :: BS.ByteString -> Wider +_roll16 bs@(BI.PS _ _ l) = + let byte :: Int -> Limb + byte i + | i < l = limb (BU.unsafeIndex bs i) + | otherwise = Limb 0## + {-# INLINE byte #-} + !w0 = (byte 07 `L.shl#` 56#) + `L.or#` (byte 06 `L.shl#` 48#) + `L.or#` (byte 05 `L.shl#` 40#) + `L.or#` (byte 04 `L.shl#` 32#) + `L.or#` (byte 03 `L.shl#` 24#) + `L.or#` (byte 02 `L.shl#` 16#) + `L.or#` (byte 01 `L.shl#` 08#) + `L.or#` byte 00 + !w1 = (byte 15 `L.shl#` 56#) + `L.or#` (byte 14 `L.shl#` 48#) + `L.or#` (byte 13 `L.shl#` 40#) + `L.or#` (byte 12 `L.shl#` 32#) + `L.or#` (byte 11 `L.shl#` 24#) + `L.or#` (byte 10 `L.shl#` 16#) + `L.or#` (byte 09 `L.shl#` 08#) + `L.or#` byte 08 + in Wider (# w0, w1, Limb 0##, Limb 0## #) +{-# INLINE _roll16 #-} + +-- 128-bit little-endian bytestring encoding +unroll16 :: Wider -> BS.ByteString +unroll16 (Wider (# w0, w1, _, _ #)) = + BI.unsafeCreate 16 $ \ptr -> do + -- w0 + Storable.pokeByteOff ptr 00 (word8 w0) + Storable.pokeByteOff ptr 01 (word8s w0 08#) + Storable.pokeByteOff ptr 02 (word8s w0 16#) + Storable.pokeByteOff ptr 03 (word8s w0 24#) + Storable.pokeByteOff ptr 04 (word8s w0 32#) + Storable.pokeByteOff ptr 05 (word8s w0 40#) + Storable.pokeByteOff ptr 06 (word8s w0 48#) + Storable.pokeByteOff ptr 07 (word8s w0 56#) + -- w1 + Storable.pokeByteOff ptr 08 (word8 w1) + Storable.pokeByteOff ptr 09 (word8s w1 08#) + Storable.pokeByteOff ptr 10 (word8s w1 16#) + Storable.pokeByteOff ptr 11 (word8s w1 24#) + Storable.pokeByteOff ptr 12 (word8s w1 32#) + Storable.pokeByteOff ptr 13 (word8s w1 40#) + Storable.pokeByteOff ptr 14 (word8s w1 48#) + Storable.pokeByteOff ptr 15 (word8s w1 56#) +{-# INLINABLE unroll16 #-} + +-- set high bit for chunk of length l (max 16) +set_hi :: Int -> Wider +set_hi l + | l < 8 = W.shl_limb 1 (8 * l) + | l < 16 = Wider (# Limb 0##, L.shl# (Limb 1##) s, Limb 0##, Limb 0## #) + | otherwise = Wider (# Limb 0##, Limb 0##, Limb 1##, Limb 0## #) where - coalg = \case - 0 -> Nothing - m -> Just $! (fi m, m .>>. 8) -{-# INLINE unroll #-} - --- little-endian bytestring encoding for 128-bit ints, right-padding --- with zeros -unroll16 :: Integer -> BS.ByteString -unroll16 (unroll -> u@(BI.PS _ _ l)) - | l < 16 = u <> BS.replicate (16 - l) 0 - | otherwise = u -{-# INLINE unroll16 #-} - -clamp :: Integer -> Integer -clamp r = r .&. 0x0ffffffc0ffffffc0ffffffc0fffffff + !(Exts.I# s) = 8 * (l - 8) +{-# INLINE set_hi #-} + +-- bespoke constant-time 130-bit right shift +shr130 :: Wider -> Wider +shr130 (Wider (# _, _, l2, l3 #)) = + let !r0 = L.or# (L.shr# l2 2#) (L.shl# l3 62#) + !r1 = L.shr# l3 2# + in Wider (# r0, r1, Limb 0##, Limb 0## #) +{-# INLINE shr130 #-} + +------------------------------------------------------------------------------- + +clamp :: Wider -> Wider +clamp r = r `W.and` 0x0ffffffc0ffffffc0ffffffc0fffffff {-# INLINE clamp #-} -- | Produce a Poly1305 MAC for the provided message, given the provided @@ -75,20 +146,49 @@ mac mac key@(BI.PS _ _ kl) msg | kl /= 32 = Nothing | otherwise = - let (clamp . _roll -> r, _roll -> s) = BS.splitAt 16 key + let (clamp . _roll16 -> r, _roll16 -> s) = BS.splitAt 16 key in pure (_poly1305_loop r s msg) -_poly1305_loop :: Integer -> Integer -> BS.ByteString -> BS.ByteString +-- p = 2^130 - 5 +-- +-- mask for the low 130 bits +mask130 :: Wider +mask130 = 0x3ffffffffffffffffffffffffffffffff +{-# INLINE mask130 #-} + +-- partial reduction to [0, 2 ^ 131) +reduce_partial :: Wider -> Wider +reduce_partial x = + let !lo = x `W.and` mask130 + !hi = shr130 x + in lo + 5 * hi +{-# INLINE reduce_partial #-} + +-- [0, 2 ^ 131) -> [0, p) +reduce_full :: Wider -> Wider +reduce_full h = + let !lo = h `W.and` mask130 + !hi = shr130 h + !h' = lo + 5 * hi + !h_5 = h' + 5 + !reduced = h_5 `W.and` mask130 + !carry = shr130 h_5 + !gte = W.lt 0 carry + in W.select h' reduced gte +{-# INLINE reduce_full #-} + +_poly1305_loop :: Wider -> Wider -> BS.ByteString -> BS.ByteString _poly1305_loop !r !s !msg = let loop !acc !bs = case BS.splitAt 16 bs of (chunk@(BI.PS _ _ l), etc) - | l == 0 -> BS.take 16 (unroll16 (acc + s)) + | l == 0 -> + let !final = reduce_full (reduce_partial acc) + in unroll16 (final + s) | otherwise -> - let !n = _roll chunk .|. (0x01 .<<. (8 * l)) - !nacc = r * (acc + n) `rem` p + let !n = _roll16 chunk `W.or` set_hi l + !prod = r * (acc + n) + !nacc = reduce_partial (reduce_partial prod) in loop nacc etc in loop 0 msg - where - p = 1361129467683753853853498429727072845819 -- (1 << 130) - 5 {-# INLINE _poly1305_loop #-} diff --git a/ppad-poly1305.cabal b/ppad-poly1305.cabal @@ -14,6 +14,11 @@ description: A pure Poly1305 message authentication code, per [RFC8439](https://datatracker.ietf.org/doc/html/rfc8439). +flag llvm + description: Use GHC's LLVM backend. + default: False + manual: True + source-repository head type: git location: git.ppad.tech/poly1305.git @@ -23,11 +28,14 @@ library hs-source-dirs: lib ghc-options: -Wall + if flag(llvm) + ghc-options: -fllvm -O2 exposed-modules: Crypto.MAC.Poly1305 build-depends: base >= 4.9 && < 5 , bytestring >= 0.9 && < 0.13 + , ppad-fixed >= 0.1 && < 0.2 test-suite poly1305-tests type: exitcode-stdio-1.0 diff --git a/test/Main.hs b/test/Main.hs @@ -83,9 +83,9 @@ mac4 = H.testCase "mac (A.3 #4)" $ do mac5 :: TestTree mac5 = H.testCase "mac (A.3 #5)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "02000000000000000000000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "00000000000000000000000000000000" Just msg = B16.decode "ffffffffffffffffffffffffffffffff" @@ -96,9 +96,9 @@ mac5 = H.testCase "mac (A.3 #5)" $ do mac6 :: TestTree mac6 = H.testCase "mac (A.3 #6)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "02000000000000000000000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "ffffffffffffffffffffffffffffffff" Just msg = B16.decode "02000000000000000000000000000000" @@ -109,9 +109,9 @@ mac6 = H.testCase "mac (A.3 #6)" $ do mac7 :: TestTree mac7 = H.testCase "mac (A.3 #7)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "01000000000000000000000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "00000000000000000000000000000000" Just msg = B16.decode "fffffffffffffffffffffffffffffffff0ffffffffffffffffffffffffffffff11000000000000000000000000000000" @@ -122,9 +122,9 @@ mac7 = H.testCase "mac (A.3 #7)" $ do mac8 :: TestTree mac8 = H.testCase "mac (A.3 #8)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "01000000000000000000000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "00000000000000000000000000000000" Just msg = B16.decode "fffffffffffffffffffffffffffffffffbfefefefefefefefefefefefefefefe01010101010101010101010101010101" @@ -135,9 +135,9 @@ mac8 = H.testCase "mac (A.3 #8)" $ do mac9 :: TestTree mac9 = H.testCase "mac (A.3 #9)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "02000000000000000000000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "00000000000000000000000000000000" Just msg = B16.decode "fdffffffffffffffffffffffffffffff" @@ -148,9 +148,9 @@ mac9 = H.testCase "mac (A.3 #9)" $ do mac10 :: TestTree mac10 = H.testCase "mac (A.3 #10)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "01000000000000000400000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "00000000000000000000000000000000" Just msg = B16.decode "e33594d7505e43b900000000000000003394d7505e4379cd01000000000000000000000000000000000000000000000001000000000000000000000000000000" @@ -161,9 +161,9 @@ mac10 = H.testCase "mac (A.3 #10)" $ do mac11 :: TestTree mac11 = H.testCase "mac (A.3 #11)" $ do - let Just (Poly1305._roll -> r) = B16.decode $ + let Just (Poly1305._roll16 -> r) = B16.decode $ "01000000000000000400000000000000" - Just (Poly1305._roll -> s) = B16.decode $ + Just (Poly1305._roll16 -> s) = B16.decode $ "00000000000000000000000000000000" Just msg = B16.decode "e33594d7505e43b900000000000000003394d7505e4379cd010000000000000000000000000000000000000000000000"