commit 725e01232d29ad669cf47a9310778a36f9a1b9d8
parent f04dcb80e361dc63b0ea5f78f36d32e7c5a980c9
Author: Jared Tobin <jared@jtobin.io>
Date: Sun, 1 Feb 2026 15:03:03 +0400
lib: major refactoring
This amazing work was performed entirely by Opus, which, being told
"make the analogous changes I just made in the ppad-sha256 library,"
one-shotted the task.
Diffstat:
6 files changed, 1243 insertions(+), 554 deletions(-)
diff --git a/cbits/sha512_arm.c b/cbits/sha512_arm.c
@@ -52,11 +52,11 @@ static const uint64_t K[80] = {
* Process one 128-byte block using ARM SHA512 crypto instructions.
*
* state: pointer to 8 uint64_t words (a,b,c,d,e,f,g,h)
- * block: pointer to 128 bytes of message data
+ * block: pointer to 16 uint64_t words (already native endian)
*
* The state is updated in place.
*/
-void sha512_block_arm(uint64_t *state, const uint8_t *block) {
+void sha512_block_arm(uint64_t *state, const uint64_t *block) {
/* Load current hash state */
uint64x2_t ab = vld1q_u64(&state[0]);
uint64x2_t cd = vld1q_u64(&state[2]);
@@ -69,15 +69,15 @@ void sha512_block_arm(uint64_t *state, const uint8_t *block) {
uint64x2_t ef_orig = ef;
uint64x2_t gh_orig = gh;
- /* Load message and convert from big-endian */
- uint64x2_t m0 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[0])));
- uint64x2_t m1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[16])));
- uint64x2_t m2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[32])));
- uint64x2_t m3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[48])));
- uint64x2_t m4 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[64])));
- uint64x2_t m5 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[80])));
- uint64x2_t m6 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[96])));
- uint64x2_t m7 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(&block[112])));
+ /* Load message (already native endian) */
+ uint64x2_t m0 = vld1q_u64(&block[0]);
+ uint64x2_t m1 = vld1q_u64(&block[2]);
+ uint64x2_t m2 = vld1q_u64(&block[4]);
+ uint64x2_t m3 = vld1q_u64(&block[6]);
+ uint64x2_t m4 = vld1q_u64(&block[8]);
+ uint64x2_t m5 = vld1q_u64(&block[10]);
+ uint64x2_t m6 = vld1q_u64(&block[12]);
+ uint64x2_t m7 = vld1q_u64(&block[14]);
uint64x2_t tmp;
@@ -454,7 +454,7 @@ int sha512_arm_available(void) {
#else
/* Stub implementations when ARM SHA512 is not available */
-void sha512_block_arm(uint64_t *state, const uint8_t *block) {
+void sha512_block_arm(uint64_t *state, const uint64_t *block) {
(void)state;
(void)block;
/* Should never be called - use pure Haskell fallback */
diff --git a/flake.nix b/flake.nix
@@ -61,8 +61,6 @@
llvm
];
- inputsFrom = builtins.attrValues self.packages.${system};
-
doBenchmark = true;
shellHook = ''
diff --git a/lib/Crypto/Hash/SHA512.hs b/lib/Crypto/Hash/SHA512.hs
@@ -1,5 +1,9 @@
{-# OPTIONS_HADDOCK prune #-}
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedNewtypes #-}
-- |
-- Module: Crypto.Hash.SHA512
@@ -25,24 +29,29 @@ module Crypto.Hash.SHA512 (
, MAC(..)
, hmac
, Lazy.hmac_lazy
+
+ -- low-level specialized HMAC primitives
+ , _hmac_rr
+ , _hmac_rsb
) where
-import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
-import Data.Word (Word64)
-import Crypto.Hash.SHA512.Arm
+import Data.Word (Word8, Word64)
+import Foreign.Ptr (Ptr)
+import qualified GHC.Exts as Exts
+import qualified Crypto.Hash.SHA512.Arm as Arm
import Crypto.Hash.SHA512.Internal
import qualified Crypto.Hash.SHA512.Lazy as Lazy
--- utils ---------------------------------------------------------------------
+-- utilities ------------------------------------------------------------------
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
--- hash ----------------------------------------------------------------------
+-- hash -----------------------------------------------------------------------
-- | Compute a condensed representation of a strict bytestring via
-- SHA-512.
@@ -53,34 +62,41 @@ fi = fromIntegral
-- "<strict 512-bit message digest>"
hash :: BS.ByteString -> BS.ByteString
hash m
- | sha512_arm_available = hash_arm m
- | otherwise = cat (process m)
-
--- process a message, given the specified iv
-process_with :: Registers -> Word64 -> BS.ByteString -> Registers
-process_with acc0 el m@(BI.PS _ _ l) = finalize (go acc0 0) where
- go !acc !j
- | j + 128 <= l = go (block_hash acc (prepare_schedule (parse_block m j)))
- (j + 128)
- | otherwise = acc
-
- finalize !acc
- | len < 112 = block_hash acc (prepare_schedule (parse_block padded 0))
- | otherwise = block_hash
- (block_hash acc (prepare_schedule (parse_block padded 0)))
- (prepare_schedule (parse_block padded 128))
- where
- !remaining@(BI.PS _ _ len) = BU.unsafeDrop (l - l `rem` 128) m
- !padded = unsafe_padding remaining (el + fi l)
-
-process :: BS.ByteString -> Registers
-process = process_with iv 0
+ | Arm.sha512_arm_available = Arm.hash m
+ | otherwise = cat (_hash 0 (iv ()) m)
+{-# INLINABLE hash #-}
--- hmac ----------------------------------------------------------------------
+_hash
+ :: Word64 -- ^ extra prefix length for padding calculations
+ -> Registers -- ^ register state
+ -> BS.ByteString -- ^ input
+ -> Registers
+_hash el rs m@(BI.PS _ _ l) = do
+ let !state = _hash_blocks rs m
+ !fin@(BI.PS _ _ ll) = BU.unsafeDrop (l - l `rem` 128) m
+ !total = el + fi l
+ if ll < 112
+ then
+ let !ult = parse_pad1 fin total
+ in update state ult
+ else
+ let !(# pen, ult #) = parse_pad2 fin total
+ in update (update state pen) ult
+{-# INLINABLE _hash #-}
-data KeyAndLen = KeyAndLen
- {-# UNPACK #-} !BS.ByteString
- {-# UNPACK #-} !Int
+_hash_blocks
+ :: Registers -- ^ state
+ -> BS.ByteString -- ^ input
+ -> Registers
+_hash_blocks rs m@(BI.PS _ _ l) = loop rs 0 where
+ loop !acc !j
+ | j + 128 > l = acc
+ | otherwise =
+ let !nacc = update acc (parse m j)
+ in loop nacc (j + 128)
+{-# INLINABLE _hash_blocks #-}
+
+-- hmac ----------------------------------------------------------------------
-- | Produce a message authentication code for a strict bytestring,
-- based on the provided (strict, bytestring) key, via SHA-512.
@@ -92,22 +108,117 @@ data KeyAndLen = KeyAndLen
--
-- >>> hmac "strict bytestring key" "strict bytestring input"
-- "<strict 512-bit MAC>"
-hmac
- :: BS.ByteString -- ^ key
- -> BS.ByteString -- ^ text
- -> MAC
-hmac mk@(BI.PS _ _ l) text
- | sha512_arm_available =
- let !inner = hash_arm_with ipad 128 text
- in MAC (hash_arm (opad <> inner))
- | otherwise =
- let !ipad_state = block_hash iv (prepare_schedule (parse_block ipad 0))
- !inner = cat (process_with ipad_state 128 text)
- in MAC (hash (opad <> inner))
- where
- !step1 = k <> BS.replicate (128 - lk) 0x00
- !ipad = BS.map (B.xor 0x36) step1
- !opad = BS.map (B.xor 0x5C) step1
- !(KeyAndLen k lk)
- | l > 128 = KeyAndLen (hash mk) 64
- | otherwise = KeyAndLen mk l
+hmac :: BS.ByteString -> BS.ByteString -> MAC
+hmac k m
+ | Arm.sha512_arm_available = MAC (Arm.hmac k m)
+ | otherwise = MAC (cat (_hmac (prep_key k) m))
+{-# INLINABLE hmac #-}
+
+prep_key :: BS.ByteString -> Block
+prep_key k@(BI.PS _ _ l)
+ | l > 128 = parse_key (hash k)
+ | otherwise = parse_key k
+{-# INLINABLE prep_key #-}
+
+_hmac
+ :: Block -- ^ padded key
+ -> BS.ByteString -- ^ message
+ -> Registers
+_hmac k m =
+ let !rs0 = update (iv ()) (xor k (Exts.wordToWord64# 0x3636363636363636##))
+ !block = pad_registers_with_length (_hash 128 rs0 m)
+ !rs1 = update (iv ()) (xor k (Exts.wordToWord64# 0x5C5C5C5C5C5C5C5C##))
+ in update rs1 block
+{-# INLINABLE _hmac #-}
+
+-- the following functions are useful when we want to avoid allocating certain
+-- components of the HMAC key and message on the heap.
+
+-- Computes hmac(k, v) when k and v are Registers.
+--
+-- The 64-byte result is written to the destination pointer.
+_hmac_rr
+ :: Ptr Word64 -- ^ destination (8 Word64s)
+ -> Ptr Word64 -- ^ scratch block buffer (16 Word64s)
+ -> Registers -- ^ key
+ -> Registers -- ^ message
+ -> IO ()
+_hmac_rr rp bp k m
+ | Arm.sha512_arm_available = Arm._hmac_rr rp bp k m
+ | otherwise = do
+ let !key = pad_registers k
+ !block = pad_registers_with_length m
+ !rs = _hmac_bb key block
+ poke_registers rp rs
+{-# INLINABLE _hmac_rr #-}
+
+_hmac_bb
+ :: Block -- ^ key
+ -> Block -- ^ message
+ -> Registers
+_hmac_bb k m =
+ let !rs0 = update (iv ()) (xor k (Exts.wordToWord64# 0x3636363636363636##))
+ !rs1 = update rs0 m
+ !inner = pad_registers_with_length rs1
+ !rs2 = update (iv ()) (xor k (Exts.wordToWord64# 0x5C5C5C5C5C5C5C5C##))
+ in update rs2 inner
+{-# INLINABLE _hmac_bb #-}
+
+-- Calculate hmac(k, m) where m is the concatenation of v (registers), a
+-- separator byte, and a ByteString. This avoids allocating 'v' on the
+-- heap.
+--
+-- The 64-byte result is written to the destination pointer.
+_hmac_rsb
+ :: Ptr Word64 -- ^ destination pointer (8 x Word64)
+ -> Ptr Word64 -- ^ scratch block pointer (16 x Word64)
+ -> Registers -- ^ k
+ -> Registers -- ^ v
+ -> Word8 -- ^ separator byte
+ -> BS.ByteString -- ^ data
+ -> IO ()
+_hmac_rsb rp bp k v sep dat
+ | Arm.sha512_arm_available = Arm._hmac_rsb rp bp k v sep dat
+ | otherwise = do
+ let !key = pad_registers k
+ !rs0 = update (iv ()) (xor key (Exts.wordToWord64# 0x3636363636363636##))
+ !inner = _hash_vsb 128 rs0 v sep dat
+ !block = pad_registers_with_length inner
+ !rs1 = update (iv ()) (xor key (Exts.wordToWord64# 0x5C5C5C5C5C5C5C5C##))
+ !rs = update rs1 block
+ poke_registers rp rs
+{-# INLINABLE _hmac_rsb #-}
+
+-- hash(v || sep || dat) with a custom initial state and extra
+-- prefix length. used for producing a more specialized hmac.
+_hash_vsb
+ :: Word64 -- ^ extra prefix length
+ -> Registers -- ^ initial state
+ -> Registers -- ^ v
+ -> Word8 -- ^ sep
+ -> BS.ByteString -- ^ dat
+ -> Registers
+_hash_vsb el rs0 v sep dat@(BI.PS _ _ l)
+ | l >= 63 =
+ -- first block is complete
+ let !b0 = parse_vsb v sep dat
+ !rs1 = update rs0 b0
+ !rest = BU.unsafeDrop 63 dat
+ !rlen = l - 63
+ !rs2 = _hash_blocks rs1 rest
+ !flen = rlen `rem` 128
+ !fin = BU.unsafeDrop (rlen - flen) rest
+ !total = el + 65 + fi l
+ in if flen < 112
+ then update rs2 (parse_pad1 fin total)
+ else let !(# pen, ult #) = parse_pad2 fin total
+ in update (update rs2 pen) ult
+ | otherwise =
+ -- message < 128 bytes, goes straight to padding
+ let !total = el + 65 + fi l
+ in if 65 + l < 112
+ then update rs0 (parse_pad1_vsb v sep dat total)
+ else let !(# pen, ult #) = parse_pad2_vsb v sep dat total
+ in update (update rs0 pen) ult
+{-# INLINABLE _hash_vsb #-}
+
diff --git a/lib/Crypto/Hash/SHA512/Arm.hs b/lib/Crypto/Hash/SHA512/Arm.hs
@@ -1,5 +1,8 @@
{-# OPTIONS_HADDOCK hide #-}
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE UnboxedTuples #-}
-- |
-- Module: Crypto.Hash.SHA512.Arm
@@ -11,117 +14,254 @@
module Crypto.Hash.SHA512.Arm (
sha512_arm_available
- , hash_arm
- , hash_arm_with
+ , hash
+ , hmac
+ , _hmac_rr
+ , _hmac_rsb
) where
-import Control.Monad (unless, when)
-import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
import Data.Word (Word8, Word64)
import Foreign.Marshal.Alloc (allocaBytes)
-import Foreign.Ptr (Ptr, plusPtr)
-import Foreign.Storable (poke, peek)
-import Crypto.Hash.SHA512.Internal (unsafe_padding)
-import System.IO.Unsafe (unsafePerformIO)
+import Foreign.Ptr (Ptr)
+import qualified GHC.Exts as Exts
+import qualified GHC.IO (IO(..))
+import qualified GHC.Ptr
+import Crypto.Hash.SHA512.Internal hiding (update)
+import System.IO.Unsafe (unsafeDupablePerformIO)
--- ffi -----------------------------------------------------------------------
+-- ffi ------------------------------------------------------------------------
foreign import ccall unsafe "sha512_block_arm"
- c_sha512_block :: Ptr Word64 -> Ptr Word8 -> IO ()
+ c_sha512_block :: Ptr Word64 -> Ptr Word64 -> IO ()
foreign import ccall unsafe "sha512_arm_available"
c_sha512_arm_available :: IO Int
--- utilities -----------------------------------------------------------------
+-- utilities ------------------------------------------------------------------
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
+peek_registers
+ :: Ptr Word64
+ -> Registers
+peek_registers (GHC.Ptr.Ptr addr) = R
+ (Exts.indexWord64OffAddr# addr 0#)
+ (Exts.indexWord64OffAddr# addr 1#)
+ (Exts.indexWord64OffAddr# addr 2#)
+ (Exts.indexWord64OffAddr# addr 3#)
+ (Exts.indexWord64OffAddr# addr 4#)
+ (Exts.indexWord64OffAddr# addr 5#)
+ (Exts.indexWord64OffAddr# addr 6#)
+ (Exts.indexWord64OffAddr# addr 7#)
+{-# INLINE peek_registers #-}
+
+poke_block :: Ptr Word64 -> Block -> IO ()
+poke_block
+ (GHC.Ptr.Ptr addr)
+ (B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15)
+ = GHC.IO.IO $ \s00 ->
+ case Exts.writeWord64OffAddr# addr 00# w00 s00 of { s01 ->
+ case Exts.writeWord64OffAddr# addr 01# w01 s01 of { s02 ->
+ case Exts.writeWord64OffAddr# addr 02# w02 s02 of { s03 ->
+ case Exts.writeWord64OffAddr# addr 03# w03 s03 of { s04 ->
+ case Exts.writeWord64OffAddr# addr 04# w04 s04 of { s05 ->
+ case Exts.writeWord64OffAddr# addr 05# w05 s05 of { s06 ->
+ case Exts.writeWord64OffAddr# addr 06# w06 s06 of { s07 ->
+ case Exts.writeWord64OffAddr# addr 07# w07 s07 of { s08 ->
+ case Exts.writeWord64OffAddr# addr 08# w08 s08 of { s09 ->
+ case Exts.writeWord64OffAddr# addr 09# w09 s09 of { s10 ->
+ case Exts.writeWord64OffAddr# addr 10# w10 s10 of { s11 ->
+ case Exts.writeWord64OffAddr# addr 11# w11 s11 of { s12 ->
+ case Exts.writeWord64OffAddr# addr 12# w12 s12 of { s13 ->
+ case Exts.writeWord64OffAddr# addr 13# w13 s13 of { s14 ->
+ case Exts.writeWord64OffAddr# addr 14# w14 s14 of { s15 ->
+ case Exts.writeWord64OffAddr# addr 15# w15 s15 of { s16 ->
+ (# s16, () #) }}}}}}}}}}}}}}}}
+{-# INLINE poke_block #-}
+
+-- update ---------------------------------------------------------------------
+
+update :: Ptr Word64 -> Ptr Word64 -> Block -> IO ()
+update rp bp block = do
+ poke_block bp block
+ c_sha512_block rp bp
+{-# INLINE update #-}
+
-- api -----------------------------------------------------------------------
+-- | Are ARM +sha512 extensions available?
sha512_arm_available :: Bool
-sha512_arm_available = unsafePerformIO c_sha512_arm_available /= 0
+sha512_arm_available = unsafeDupablePerformIO c_sha512_arm_available /= 0
{-# NOINLINE sha512_arm_available #-}
-hash_arm :: BS.ByteString -> BS.ByteString
-hash_arm = hash_arm_with mempty 0
-
--- | Hash with optional 128-byte prefix and extra length for padding.
-hash_arm_with
- :: BS.ByteString -- ^ optional 128-byte prefix (or empty)
- -> Word64 -- ^ extra length to add for padding
- -> BS.ByteString -- ^ message
+hash
+ :: BS.ByteString
-> BS.ByteString
-hash_arm_with prefix el m@(BI.PS fp off l) = unsafePerformIO $
- allocaBytes 64 $ \state -> do
- poke_iv state
- -- process prefix block if provided
- unless (BS.null prefix) $ do
- let BI.PS pfp poff _ = prefix
- BI.unsafeWithForeignPtr pfp $ \src ->
- c_sha512_block state (src `plusPtr` poff)
-
- go state 0
-
- let !remaining@(BI.PS _ _ rlen) = BU.unsafeDrop (l - l `rem` 128) m
- BI.PS padfp padoff _ = unsafe_padding remaining (el + fi l)
- BI.unsafeWithForeignPtr padfp $ \src -> do
- c_sha512_block state (src `plusPtr` padoff)
- when (rlen >= 112) $
- c_sha512_block state (src `plusPtr` (padoff + 128))
-
- read_state state
- where
- go !state !j
- | j + 128 <= l = do
- BI.unsafeWithForeignPtr fp $ \src ->
- c_sha512_block state (src `plusPtr` (off + j))
- go state (j + 128)
- | otherwise = pure ()
-
--- arm helpers ---------------------------------------------------------------
-
-poke_iv :: Ptr Word64 -> IO ()
-poke_iv !state = do
- poke state (0x6a09e667f3bcc908 :: Word64)
- poke (state `plusPtr` 8) (0xbb67ae8584caa73b :: Word64)
- poke (state `plusPtr` 16) (0x3c6ef372fe94f82b :: Word64)
- poke (state `plusPtr` 24) (0xa54ff53a5f1d36f1 :: Word64)
- poke (state `plusPtr` 32) (0x510e527fade682d1 :: Word64)
- poke (state `plusPtr` 40) (0x9b05688c2b3e6c1f :: Word64)
- poke (state `plusPtr` 48) (0x1f83d9abfb41bd6b :: Word64)
- poke (state `plusPtr` 56) (0x5be0cd19137e2179 :: Word64)
-
-read_state :: Ptr Word64 -> IO BS.ByteString
-read_state !state = BI.create 64 $ \out -> do
- h0 <- peek state :: IO Word64
- h1 <- peek (state `plusPtr` 8) :: IO Word64
- h2 <- peek (state `plusPtr` 16) :: IO Word64
- h3 <- peek (state `plusPtr` 24) :: IO Word64
- h4 <- peek (state `plusPtr` 32) :: IO Word64
- h5 <- peek (state `plusPtr` 40) :: IO Word64
- h6 <- peek (state `plusPtr` 48) :: IO Word64
- h7 <- peek (state `plusPtr` 56) :: IO Word64
- poke_word64be out 0 h0
- poke_word64be out 8 h1
- poke_word64be out 16 h2
- poke_word64be out 24 h3
- poke_word64be out 32 h4
- poke_word64be out 40 h5
- poke_word64be out 48 h6
- poke_word64be out 56 h7
-
-poke_word64be :: Ptr Word8 -> Int -> Word64 -> IO ()
-poke_word64be !p !off !w = do
- poke (p `plusPtr` off) (fi (w `B.unsafeShiftR` 56) :: Word8)
- poke (p `plusPtr` (off + 1)) (fi (w `B.unsafeShiftR` 48) :: Word8)
- poke (p `plusPtr` (off + 2)) (fi (w `B.unsafeShiftR` 40) :: Word8)
- poke (p `plusPtr` (off + 3)) (fi (w `B.unsafeShiftR` 32) :: Word8)
- poke (p `plusPtr` (off + 4)) (fi (w `B.unsafeShiftR` 24) :: Word8)
- poke (p `plusPtr` (off + 5)) (fi (w `B.unsafeShiftR` 16) :: Word8)
- poke (p `plusPtr` (off + 6)) (fi (w `B.unsafeShiftR` 8) :: Word8)
- poke (p `plusPtr` (off + 7)) (fi w :: Word8)
+hash m = unsafeDupablePerformIO $
+ allocaBytes 64 $ \rp ->
+ allocaBytes 128 $ \bp -> do
+ poke_registers rp (iv ())
+ _hash rp bp 0 m
+ let !rs = peek_registers rp
+ pure (cat rs)
+
+_hash
+ :: Ptr Word64 -- ^ register state
+ -> Ptr Word64 -- ^ block state
+ -> Word64 -- ^ extra prefix length, for padding calculation
+ -> BS.ByteString -- ^ input
+ -> IO ()
+_hash rp bp el m@(BI.PS _ _ l) = do
+ hash_blocks rp bp m
+ let !fin@(BI.PS _ _ ll) = BU.unsafeDrop (l - l `rem` 128) m
+ !total = el + fi l
+ if ll < 112
+ then do
+ let !ult = parse_pad1 fin total
+ update rp bp ult
+ else do
+ let !(# pen, ult #) = parse_pad2 fin total
+ update rp bp pen
+ update rp bp ult
+{-# INLINABLE _hash #-}
+
+hash_blocks
+ :: Ptr Word64 -- ^ register state
+ -> Ptr Word64 -- ^ block state
+ -> BS.ByteString -- ^ input
+ -> IO ()
+hash_blocks rp bp m@(BI.PS _ _ l) = loop 0 where
+ loop !j
+ | j + 128 > l = pure ()
+ | otherwise = do
+ let !block = parse m j
+ update rp bp block
+ loop (j + 128)
+{-# INLINE hash_blocks #-}
+
+-- hmac -----------------------------------------------------------------------
+
+hmac :: BS.ByteString -> BS.ByteString -> BS.ByteString
+hmac k m = unsafeDupablePerformIO $
+ allocaBytes 64 $ \rp ->
+ allocaBytes 128 $ \bp -> do
+ _hmac rp bp (prep_key k) m
+ pure (cat (peek_registers rp))
+
+prep_key :: BS.ByteString -> Block
+prep_key k@(BI.PS _ _ l)
+ | l > 128 = parse_key (hash k)
+ | otherwise = parse_key k
+{-# INLINABLE prep_key #-}
+
+-- assume padded key as block.
+_hmac
+ :: Ptr Word64 -- ^ register state
+ -> Ptr Word64 -- ^ block state
+ -> Block -- ^ padded key
+ -> BS.ByteString -- ^ message
+ -> IO ()
+_hmac rp bp k m = do
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord64# 0x3636363636363636##))
+ _hash rp bp 128 m
+ let !block = pad_registers_with_length (peek_registers rp)
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord64# 0x5C5C5C5C5C5C5C5C##))
+ update rp bp block
+{-# NOINLINE _hmac #-}
+
+_hmac_rr
+ :: Ptr Word64 -- ^ register state
+ -> Ptr Word64 -- ^ block state
+ -> Registers -- ^ key
+ -> Registers -- ^ message
+ -> IO ()
+_hmac_rr rp bp k m = do
+ let !key = pad_registers k
+ !block = pad_registers_with_length m
+ _hmac_bb rp bp key block
+{-# INLINABLE _hmac_rr #-}
+
+_hmac_bb
+ :: Ptr Word64 -- ^ register state
+ -> Ptr Word64 -- ^ block state
+ -> Block -- ^ padded key
+ -> Block -- ^ padded message
+ -> IO ()
+_hmac_bb rp bp k m = do
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord64# 0x3636363636363636##))
+ update rp bp m
+ let !inner = pad_registers_with_length (peek_registers rp)
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord64# 0x5C5C5C5C5C5C5C5C##))
+ update rp bp inner
+{-# INLINABLE _hmac_bb #-}
+
+-- | HMAC(key, v || sep || data) using ARM crypto extensions.
+-- Writes result to destination pointer.
+_hmac_rsb
+ :: Ptr Word64 -- ^ destination (8 Word64s)
+ -> Ptr Word64 -- ^ scratch block buffer (16 Word64s)
+ -> Registers -- ^ key
+ -> Registers -- ^ v
+ -> Word8 -- ^ separator byte
+ -> BS.ByteString -- ^ data
+ -> IO ()
+_hmac_rsb rp bp k v sep dat = do
+ poke_registers rp (iv ())
+ let !key = pad_registers k
+ update rp bp (xor key (Exts.wordToWord64# 0x3636363636363636##))
+ _hash_vsb rp bp 128 v sep dat
+ let !inner = pad_registers_with_length (peek_registers rp)
+ poke_registers rp (iv ())
+ update rp bp (xor key (Exts.wordToWord64# 0x5C5C5C5C5C5C5C5C##))
+ update rp bp inner
+{-# INLINABLE _hmac_rsb #-}
+
+-- | Hash (v || sep || dat) with ARM crypto extensions.
+-- Assumes register state already initialized at rp.
+_hash_vsb
+ :: Ptr Word64 -- ^ register state
+ -> Ptr Word64 -- ^ block buffer
+ -> Word64 -- ^ extra prefix length
+ -> Registers -- ^ v
+ -> Word8 -- ^ sep
+ -> BS.ByteString -- ^ dat
+ -> IO ()
+_hash_vsb rp bp el v sep dat@(BI.PS _ _ l)
+ | l >= 63 = do
+ -- first block is complete: v || sep || dat[0:63]
+ let !b0 = parse_vsb v sep dat
+ update rp bp b0
+ -- hash remaining complete blocks from dat[63:]
+ let !rest = BU.unsafeDrop 63 dat
+ !restLen = l - 63
+ hash_blocks rp bp rest
+ -- handle final padding
+ let !finLen = restLen `rem` 128
+ !fin = BU.unsafeDrop (restLen - finLen) rest
+ !total = el + 65 + fi l
+ if finLen < 112
+ then update rp bp (parse_pad1 fin total)
+ else do
+ let !(# pen, ult #) = parse_pad2 fin total
+ update rp bp pen
+ update rp bp ult
+ | otherwise = do
+ -- message < 128 bytes total, straight to padding
+ let !total = el + 65 + fi l
+ if 65 + l < 112
+ then update rp bp (parse_pad1_vsb v sep dat total)
+ else do
+ let !(# pen, ult #) = parse_pad2_vsb v sep dat total
+ update rp bp pen
+ update rp bp ult
+{-# INLINABLE _hash_vsb #-}
+
diff --git a/lib/Crypto/Hash/SHA512/Internal.hs b/lib/Crypto/Hash/SHA512/Internal.hs
@@ -2,7 +2,11 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
-{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedNewtypes #-}
+{-# LANGUAGE ViewPatterns #-}
-- |
-- Module: Crypto.Hash.SHA512.Internal
@@ -13,40 +17,51 @@
-- SHA-512 internals.
module Crypto.Hash.SHA512.Internal (
- Registers(..)
- , Block(..)
- , Schedule(..)
-
+ -- * Types
+ Block(B, ..)
+ , Registers(R, ..)
, MAC(..)
- , iv
- , block_hash
- , prepare_schedule
- , parse_block
+ -- * Parsing
+ , parse
+ , parse_pad1
+ , parse_pad2
+
+ -- * Serializing
, cat
- , unsafe_hash_alg
- , unsafe_parse
- , unsafe_padding
+ , cat_into
+
+ -- * Hash function internals
+ , update
+ , iv
+
+ -- * HMAC utilities
+ , pad_registers
+ , pad_registers_with_length
+ , xor
+ , parse_key
- , WSPair(..)
- , unsafe_word64be
- , unsafe_parseWsPair
+ -- * HMAC-DRBG utilities
+ , parse_vsb
+ , parse_pad1_vsb
+ , parse_pad2_vsb
+
+ -- * Pointer-based IO utilities
+ , poke_registers
) where
import qualified Data.Bits as B
-import Data.Bits ((.|.), (.&.))
import qualified Data.ByteString as BS
-import qualified Data.ByteString.Builder as BSB
-import qualified Data.ByteString.Builder.Extra as BE
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
import Data.Word (Word8, Word64)
-import Foreign.ForeignPtr (plusForeignPtr)
-import Foreign.Marshal.Utils (copyBytes, fillBytes)
-import Foreign.Ptr (Ptr, plusPtr)
-import Foreign.Storable (poke)
+import qualified GHC.IO (IO(..))
+import GHC.Ptr (Ptr(..))
+import GHC.Exts (Int#)
+import qualified GHC.Exts as Exts
+import qualified GHC.Word (Word64(..), Word8(..))
--- MAC type ------------------------------------------------------------------
+-- types ----------------------------------------------------------------------
-- | A message authentication code.
--
@@ -71,139 +86,52 @@ instance Eq MAC where
| la /= lb = False
| otherwise = BS.foldl' (B..|.) 0 (BS.packZipWith B.xor a b) == 0
--- preliminary utils ---------------------------------------------------------
-
-fi :: (Integral a, Num b) => a -> b
-fi = fromIntegral
-{-# INLINE fi #-}
-
--- parse strict ByteString in BE order to Word64
+-- | SHA512 block.
+newtype Block = Block
+ (# Exts.Word64#, Exts.Word64#, Exts.Word64#, Exts.Word64#
+ , Exts.Word64#, Exts.Word64#, Exts.Word64#, Exts.Word64#
+ , Exts.Word64#, Exts.Word64#, Exts.Word64#, Exts.Word64#
+ , Exts.Word64#, Exts.Word64#, Exts.Word64#, Exts.Word64#
+ #)
+
+pattern B
+ :: Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Block
+pattern B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15 =
+ Block
+ (# w00, w01, w02, w03, w04, w05, w06, w07
+ , w08, w09, w10, w11, w12, w13, w14, w15
+ #)
+{-# COMPLETE B #-}
+
+-- | SHA512 state.
+newtype Registers = Registers
+ (# Exts.Word64#, Exts.Word64#, Exts.Word64#, Exts.Word64#
+ , Exts.Word64#, Exts.Word64#, Exts.Word64#, Exts.Word64#
+ #)
+
+pattern R
+ :: Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Registers
+pattern R w00 w01 w02 w03 w04 w05 w06 w07 = Registers
+ (# w00, w01, w02, w03
+ , w04, w05, w06, w07
+ #)
+{-# COMPLETE R #-}
+
+-- parsing (nonfinal input) ---------------------------------------------------
+
+-- | Given a bytestring and offset, parse a full block.
--
--- invariant:
--- the input bytestring is at least 64 bits in length
-unsafe_word64be :: BS.ByteString -> Word64
-unsafe_word64be s =
- (fi (s `BU.unsafeIndex` 0) `B.unsafeShiftL` 56) .|.
- (fi (s `BU.unsafeIndex` 1) `B.unsafeShiftL` 48) .|.
- (fi (s `BU.unsafeIndex` 2) `B.unsafeShiftL` 40) .|.
- (fi (s `BU.unsafeIndex` 3) `B.unsafeShiftL` 32) .|.
- (fi (s `BU.unsafeIndex` 4) `B.unsafeShiftL` 24) .|.
- (fi (s `BU.unsafeIndex` 5) `B.unsafeShiftL` 16) .|.
- (fi (s `BU.unsafeIndex` 6) `B.unsafeShiftL` 8) .|.
- (fi (s `BU.unsafeIndex` 7) )
-{-# INLINE unsafe_word64be #-}
-
-data WSPair = WSPair {-# UNPACK #-} !Word64 {-# UNPACK #-} !BS.ByteString
-
--- variant of Data.ByteString.splitAt that behaves like an incremental
--- Word64 parser
---
--- invariant:
--- the input bytestring is at least 64 bits in length
-unsafe_parseWsPair :: BS.ByteString -> WSPair
-unsafe_parseWsPair (BI.BS x l) =
- WSPair (unsafe_word64be (BI.BS x 8)) (BI.BS (plusForeignPtr x 8) (l - 8))
-{-# INLINE unsafe_parseWsPair #-}
-
--- builder realization strategy
-
-to_strict_small :: BSB.Builder -> BS.ByteString
-to_strict_small = BS.toStrict . BE.toLazyByteStringWith
- (BE.safeStrategy 128 BE.smallChunkSize) mempty
-
--- functions and constants ---------------------------------------------------
--- https://datatracker.ietf.org/doc/html/rfc6234#section-5.1
-
-ch :: Word64 -> Word64 -> Word64 -> Word64
-ch x y z = (x .&. y) `B.xor` (B.complement x .&. z)
-{-# INLINE ch #-}
-
-maj :: Word64 -> Word64 -> Word64 -> Word64
-maj x y z = (x .&. (y .|. z)) .|. (y .&. z)
-{-# INLINE maj #-}
-
-bsig0 :: Word64 -> Word64
-bsig0 x = B.rotateR x 28 `B.xor` B.rotateR x 34 `B.xor` B.rotateR x 39
-{-# INLINE bsig0 #-}
-
-bsig1 :: Word64 -> Word64
-bsig1 x = B.rotateR x 14 `B.xor` B.rotateR x 18 `B.xor` B.rotateR x 41
-{-# INLINE bsig1 #-}
-
-ssig0 :: Word64 -> Word64
-ssig0 x = B.rotateR x 1 `B.xor` B.rotateR x 8 `B.xor` B.unsafeShiftR x 7
-{-# INLINE ssig0 #-}
-
-ssig1 :: Word64 -> Word64
-ssig1 x = B.rotateR x 19 `B.xor` B.rotateR x 61 `B.xor` B.unsafeShiftR x 6
-{-# INLINE ssig1 #-}
-
-data Schedule = Schedule {
- w00 :: !Word64, w01 :: !Word64, w02 :: !Word64, w03 :: !Word64
- , w04 :: !Word64, w05 :: !Word64, w06 :: !Word64, w07 :: !Word64
- , w08 :: !Word64, w09 :: !Word64, w10 :: !Word64, w11 :: !Word64
- , w12 :: !Word64, w13 :: !Word64, w14 :: !Word64, w15 :: !Word64
- , w16 :: !Word64, w17 :: !Word64, w18 :: !Word64, w19 :: !Word64
- , w20 :: !Word64, w21 :: !Word64, w22 :: !Word64, w23 :: !Word64
- , w24 :: !Word64, w25 :: !Word64, w26 :: !Word64, w27 :: !Word64
- , w28 :: !Word64, w29 :: !Word64, w30 :: !Word64, w31 :: !Word64
- , w32 :: !Word64, w33 :: !Word64, w34 :: !Word64, w35 :: !Word64
- , w36 :: !Word64, w37 :: !Word64, w38 :: !Word64, w39 :: !Word64
- , w40 :: !Word64, w41 :: !Word64, w42 :: !Word64, w43 :: !Word64
- , w44 :: !Word64, w45 :: !Word64, w46 :: !Word64, w47 :: !Word64
- , w48 :: !Word64, w49 :: !Word64, w50 :: !Word64, w51 :: !Word64
- , w52 :: !Word64, w53 :: !Word64, w54 :: !Word64, w55 :: !Word64
- , w56 :: !Word64, w57 :: !Word64, w58 :: !Word64, w59 :: !Word64
- , w60 :: !Word64, w61 :: !Word64, w62 :: !Word64, w63 :: !Word64
- , w64 :: !Word64, w65 :: !Word64, w66 :: !Word64, w67 :: !Word64
- , w68 :: !Word64, w69 :: !Word64, w70 :: !Word64, w71 :: !Word64
- , w72 :: !Word64, w73 :: !Word64, w74 :: !Word64, w75 :: !Word64
- , w76 :: !Word64, w77 :: !Word64, w78 :: !Word64, w79 :: !Word64
- }
-
--- initialization ------------------------------------------------------------
--- https://datatracker.ietf.org/doc/html/rfc6234#section-6.1
-
-data Registers = Registers {
- h0 :: !Word64, h1 :: !Word64, h2 :: !Word64, h3 :: !Word64
- , h4 :: !Word64, h5 :: !Word64, h6 :: !Word64, h7 :: !Word64
- }
-
--- first 64 bits of the fractional parts of the square roots of the
--- first eight primes
-iv :: Registers
-iv = Registers
- 0x6a09e667f3bcc908 0xbb67ae8584caa73b 0x3c6ef372fe94f82b 0xa54ff53a5f1d36f1
- 0x510e527fade682d1 0x9b05688c2b3e6c1f 0x1f83d9abfb41bd6b 0x5be0cd19137e2179
-
--- processing ----------------------------------------------------------------
--- https://datatracker.ietf.org/doc/html/rfc6234#section-6.2
-
-data Block = Block {
- m00 :: !Word64, m01 :: !Word64, m02 :: !Word64, m03 :: !Word64
- , m04 :: !Word64, m05 :: !Word64, m06 :: !Word64, m07 :: !Word64
- , m08 :: !Word64, m09 :: !Word64, m10 :: !Word64, m11 :: !Word64
- , m12 :: !Word64, m13 :: !Word64, m14 :: !Word64, m15 :: !Word64
- }
-
--- given a bytestring and offset, parse word64. length not checked.
-word64be :: BS.ByteString -> Int -> Word64
-word64be bs off =
- (fi (bs `BU.unsafeIndex` off) `B.unsafeShiftL` 56) .|.
- (fi (bs `BU.unsafeIndex` (off + 1)) `B.unsafeShiftL` 48) .|.
- (fi (bs `BU.unsafeIndex` (off + 2)) `B.unsafeShiftL` 40) .|.
- (fi (bs `BU.unsafeIndex` (off + 3)) `B.unsafeShiftL` 32) .|.
- (fi (bs `BU.unsafeIndex` (off + 4)) `B.unsafeShiftL` 24) .|.
- (fi (bs `BU.unsafeIndex` (off + 5)) `B.unsafeShiftL` 16) .|.
- (fi (bs `BU.unsafeIndex` (off + 6)) `B.unsafeShiftL` 8) .|.
- (fi (bs `BU.unsafeIndex` (off + 7)))
-{-# INLINE word64be #-}
-
--- given a bytestring and block offset, parse block. length not checked.
-parse_block :: BS.ByteString -> Int -> Block
-parse_block bs m = Block
+-- The length of the input is not checked.
+parse :: BS.ByteString -> Int -> Block
+parse bs m = B
(word64be bs m)
- (word64be bs (m + 8))
+ (word64be bs (m + 08))
(word64be bs (m + 16))
(word64be bs (m + 24))
(word64be bs (m + 32))
@@ -218,240 +146,761 @@ parse_block bs m = Block
(word64be bs (m + 104))
(word64be bs (m + 112))
(word64be bs (m + 120))
-{-# INLINE parse_block #-}
+{-# INLINE parse #-}
--- parse strict bytestring to block
+-- | Parse the 64-bit word encoded at the given offset.
--
--- invariant:
--- the input bytestring is exactly 1024 bits long
-unsafe_parse :: BS.ByteString -> Block
-unsafe_parse bs =
- let !(WSPair m00 t00) = unsafe_parseWsPair bs
- !(WSPair m01 t01) = unsafe_parseWsPair t00
- !(WSPair m02 t02) = unsafe_parseWsPair t01
- !(WSPair m03 t03) = unsafe_parseWsPair t02
- !(WSPair m04 t04) = unsafe_parseWsPair t03
- !(WSPair m05 t05) = unsafe_parseWsPair t04
- !(WSPair m06 t06) = unsafe_parseWsPair t05
- !(WSPair m07 t07) = unsafe_parseWsPair t06
- !(WSPair m08 t08) = unsafe_parseWsPair t07
- !(WSPair m09 t09) = unsafe_parseWsPair t08
- !(WSPair m10 t10) = unsafe_parseWsPair t09
- !(WSPair m11 t11) = unsafe_parseWsPair t10
- !(WSPair m12 t12) = unsafe_parseWsPair t11
- !(WSPair m13 t13) = unsafe_parseWsPair t12
- !(WSPair m14 t14) = unsafe_parseWsPair t13
- !(WSPair m15 _) = unsafe_parseWsPair t14
- in Block {..}
-
--- RFC 6234 6.2 step 1
-prepare_schedule :: Block -> Schedule
-prepare_schedule Block {..} = Schedule {..} where
- w00 = m00; w01 = m01; w02 = m02; w03 = m03
- w04 = m04; w05 = m05; w06 = m06; w07 = m07
- w08 = m08; w09 = m09; w10 = m10; w11 = m11
- w12 = m12; w13 = m13; w14 = m14; w15 = m15
- w16 = ssig1 w14 + w09 + ssig0 w01 + w00
- w17 = ssig1 w15 + w10 + ssig0 w02 + w01
- w18 = ssig1 w16 + w11 + ssig0 w03 + w02
- w19 = ssig1 w17 + w12 + ssig0 w04 + w03
- w20 = ssig1 w18 + w13 + ssig0 w05 + w04
- w21 = ssig1 w19 + w14 + ssig0 w06 + w05
- w22 = ssig1 w20 + w15 + ssig0 w07 + w06
- w23 = ssig1 w21 + w16 + ssig0 w08 + w07
- w24 = ssig1 w22 + w17 + ssig0 w09 + w08
- w25 = ssig1 w23 + w18 + ssig0 w10 + w09
- w26 = ssig1 w24 + w19 + ssig0 w11 + w10
- w27 = ssig1 w25 + w20 + ssig0 w12 + w11
- w28 = ssig1 w26 + w21 + ssig0 w13 + w12
- w29 = ssig1 w27 + w22 + ssig0 w14 + w13
- w30 = ssig1 w28 + w23 + ssig0 w15 + w14
- w31 = ssig1 w29 + w24 + ssig0 w16 + w15
- w32 = ssig1 w30 + w25 + ssig0 w17 + w16
- w33 = ssig1 w31 + w26 + ssig0 w18 + w17
- w34 = ssig1 w32 + w27 + ssig0 w19 + w18
- w35 = ssig1 w33 + w28 + ssig0 w20 + w19
- w36 = ssig1 w34 + w29 + ssig0 w21 + w20
- w37 = ssig1 w35 + w30 + ssig0 w22 + w21
- w38 = ssig1 w36 + w31 + ssig0 w23 + w22
- w39 = ssig1 w37 + w32 + ssig0 w24 + w23
- w40 = ssig1 w38 + w33 + ssig0 w25 + w24
- w41 = ssig1 w39 + w34 + ssig0 w26 + w25
- w42 = ssig1 w40 + w35 + ssig0 w27 + w26
- w43 = ssig1 w41 + w36 + ssig0 w28 + w27
- w44 = ssig1 w42 + w37 + ssig0 w29 + w28
- w45 = ssig1 w43 + w38 + ssig0 w30 + w29
- w46 = ssig1 w44 + w39 + ssig0 w31 + w30
- w47 = ssig1 w45 + w40 + ssig0 w32 + w31
- w48 = ssig1 w46 + w41 + ssig0 w33 + w32
- w49 = ssig1 w47 + w42 + ssig0 w34 + w33
- w50 = ssig1 w48 + w43 + ssig0 w35 + w34
- w51 = ssig1 w49 + w44 + ssig0 w36 + w35
- w52 = ssig1 w50 + w45 + ssig0 w37 + w36
- w53 = ssig1 w51 + w46 + ssig0 w38 + w37
- w54 = ssig1 w52 + w47 + ssig0 w39 + w38
- w55 = ssig1 w53 + w48 + ssig0 w40 + w39
- w56 = ssig1 w54 + w49 + ssig0 w41 + w40
- w57 = ssig1 w55 + w50 + ssig0 w42 + w41
- w58 = ssig1 w56 + w51 + ssig0 w43 + w42
- w59 = ssig1 w57 + w52 + ssig0 w44 + w43
- w60 = ssig1 w58 + w53 + ssig0 w45 + w44
- w61 = ssig1 w59 + w54 + ssig0 w46 + w45
- w62 = ssig1 w60 + w55 + ssig0 w47 + w46
- w63 = ssig1 w61 + w56 + ssig0 w48 + w47
- w64 = ssig1 w62 + w57 + ssig0 w49 + w48
- w65 = ssig1 w63 + w58 + ssig0 w50 + w49
- w66 = ssig1 w64 + w59 + ssig0 w51 + w50
- w67 = ssig1 w65 + w60 + ssig0 w52 + w51
- w68 = ssig1 w66 + w61 + ssig0 w53 + w52
- w69 = ssig1 w67 + w62 + ssig0 w54 + w53
- w70 = ssig1 w68 + w63 + ssig0 w55 + w54
- w71 = ssig1 w69 + w64 + ssig0 w56 + w55
- w72 = ssig1 w70 + w65 + ssig0 w57 + w56
- w73 = ssig1 w71 + w66 + ssig0 w58 + w57
- w74 = ssig1 w72 + w67 + ssig0 w59 + w58
- w75 = ssig1 w73 + w68 + ssig0 w60 + w59
- w76 = ssig1 w74 + w69 + ssig0 w61 + w60
- w77 = ssig1 w75 + w70 + ssig0 w62 + w61
- w78 = ssig1 w76 + w71 + ssig0 w63 + w62
- w79 = ssig1 w77 + w72 + ssig0 w64 + w63
-
--- RFC 6234 6.2 steps 2, 3, 4
-block_hash :: Registers -> Schedule -> Registers
-block_hash r00@Registers {..} Schedule {..} =
- -- constants are the first 64 bits of the fractional parts of the
- -- cube roots of the first eighty prime numbers
- let r01 = step r00 0x428a2f98d728ae22 w00
- r02 = step r01 0x7137449123ef65cd w01
- r03 = step r02 0xb5c0fbcfec4d3b2f w02
- r04 = step r03 0xe9b5dba58189dbbc w03
- r05 = step r04 0x3956c25bf348b538 w04
- r06 = step r05 0x59f111f1b605d019 w05
- r07 = step r06 0x923f82a4af194f9b w06
- r08 = step r07 0xab1c5ed5da6d8118 w07
- r09 = step r08 0xd807aa98a3030242 w08
- r10 = step r09 0x12835b0145706fbe w09
- r11 = step r10 0x243185be4ee4b28c w10
- r12 = step r11 0x550c7dc3d5ffb4e2 w11
- r13 = step r12 0x72be5d74f27b896f w12
- r14 = step r13 0x80deb1fe3b1696b1 w13
- r15 = step r14 0x9bdc06a725c71235 w14
- r16 = step r15 0xc19bf174cf692694 w15
- r17 = step r16 0xe49b69c19ef14ad2 w16
- r18 = step r17 0xefbe4786384f25e3 w17
- r19 = step r18 0x0fc19dc68b8cd5b5 w18
- r20 = step r19 0x240ca1cc77ac9c65 w19
- r21 = step r20 0x2de92c6f592b0275 w20
- r22 = step r21 0x4a7484aa6ea6e483 w21
- r23 = step r22 0x5cb0a9dcbd41fbd4 w22
- r24 = step r23 0x76f988da831153b5 w23
- r25 = step r24 0x983e5152ee66dfab w24
- r26 = step r25 0xa831c66d2db43210 w25
- r27 = step r26 0xb00327c898fb213f w26
- r28 = step r27 0xbf597fc7beef0ee4 w27
- r29 = step r28 0xc6e00bf33da88fc2 w28
- r30 = step r29 0xd5a79147930aa725 w29
- r31 = step r30 0x06ca6351e003826f w30
- r32 = step r31 0x142929670a0e6e70 w31
- r33 = step r32 0x27b70a8546d22ffc w32
- r34 = step r33 0x2e1b21385c26c926 w33
- r35 = step r34 0x4d2c6dfc5ac42aed w34
- r36 = step r35 0x53380d139d95b3df w35
- r37 = step r36 0x650a73548baf63de w36
- r38 = step r37 0x766a0abb3c77b2a8 w37
- r39 = step r38 0x81c2c92e47edaee6 w38
- r40 = step r39 0x92722c851482353b w39
- r41 = step r40 0xa2bfe8a14cf10364 w40
- r42 = step r41 0xa81a664bbc423001 w41
- r43 = step r42 0xc24b8b70d0f89791 w42
- r44 = step r43 0xc76c51a30654be30 w43
- r45 = step r44 0xd192e819d6ef5218 w44
- r46 = step r45 0xd69906245565a910 w45
- r47 = step r46 0xf40e35855771202a w46
- r48 = step r47 0x106aa07032bbd1b8 w47
- r49 = step r48 0x19a4c116b8d2d0c8 w48
- r50 = step r49 0x1e376c085141ab53 w49
- r51 = step r50 0x2748774cdf8eeb99 w50
- r52 = step r51 0x34b0bcb5e19b48a8 w51
- r53 = step r52 0x391c0cb3c5c95a63 w52
- r54 = step r53 0x4ed8aa4ae3418acb w53
- r55 = step r54 0x5b9cca4f7763e373 w54
- r56 = step r55 0x682e6ff3d6b2b8a3 w55
- r57 = step r56 0x748f82ee5defb2fc w56
- r58 = step r57 0x78a5636f43172f60 w57
- r59 = step r58 0x84c87814a1f0ab72 w58
- r60 = step r59 0x8cc702081a6439ec w59
- r61 = step r60 0x90befffa23631e28 w60
- r62 = step r61 0xa4506cebde82bde9 w61
- r63 = step r62 0xbef9a3f7b2c67915 w62
- r64 = step r63 0xc67178f2e372532b w63
- r65 = step r64 0xca273eceea26619c w64
- r66 = step r65 0xd186b8c721c0c207 w65
- r67 = step r66 0xeada7dd6cde0eb1e w66
- r68 = step r67 0xf57d4f7fee6ed178 w67
- r69 = step r68 0x06f067aa72176fba w68
- r70 = step r69 0x0a637dc5a2c898a6 w69
- r71 = step r70 0x113f9804bef90dae w70
- r72 = step r71 0x1b710b35131c471b w71
- r73 = step r72 0x28db77f523047d84 w72
- r74 = step r73 0x32caab7b40c72493 w73
- r75 = step r74 0x3c9ebe0a15c9bebc w74
- r76 = step r75 0x431d67c49c100d4c w75
- r77 = step r76 0x4cc5d4becb3e42b6 w76
- r78 = step r77 0x597f299cfc657e2a w77
- r79 = step r78 0x5fcb6fab3ad6faec w78
- r80 = step r79 0x6c44198c4a475817 w79
- !(Registers a b c d e f g h) = r80
- in Registers
- (a + h0) (b + h1) (c + h2) (d + h3)
- (e + h4) (f + h5) (g + h6) (h + h7)
-
-step :: Registers -> Word64 -> Word64 -> Registers
-step (Registers a b c d e f g h) k w =
- let t1 = h + bsig1 e + ch e f g + k + w
- t2 = bsig0 a + maj a b c
- in Registers (t1 + t2) a b c (d + t1) e f g
-{-# INLINE step #-}
-
--- RFC 6234 6.2 block pipeline
---
--- invariant:
--- the input bytestring is exactly 1024 bits in length
-unsafe_hash_alg :: Registers -> BS.ByteString -> Registers
-unsafe_hash_alg rs bs = block_hash rs (prepare_schedule (unsafe_parse bs))
+-- The length of the input is not checked.
+word64be :: BS.ByteString -> Int -> Exts.Word64#
+word64be bs m =
+ let !(GHC.Word.W8# r0) = BU.unsafeIndex bs m
+ !(GHC.Word.W8# r1) = BU.unsafeIndex bs (m + 1)
+ !(GHC.Word.W8# r2) = BU.unsafeIndex bs (m + 2)
+ !(GHC.Word.W8# r3) = BU.unsafeIndex bs (m + 3)
+ !(GHC.Word.W8# r4) = BU.unsafeIndex bs (m + 4)
+ !(GHC.Word.W8# r5) = BU.unsafeIndex bs (m + 5)
+ !(GHC.Word.W8# r6) = BU.unsafeIndex bs (m + 6)
+ !(GHC.Word.W8# r7) = BU.unsafeIndex bs (m + 7)
+ !w0 = Exts.word8ToWord# r0
+ !w1 = Exts.word8ToWord# r1
+ !w2 = Exts.word8ToWord# r2
+ !w3 = Exts.word8ToWord# r3
+ !w4 = Exts.word8ToWord# r4
+ !w5 = Exts.word8ToWord# r5
+ !w6 = Exts.word8ToWord# r6
+ !w7 = Exts.word8ToWord# r7
+ !s0 = Exts.uncheckedShiftL# w0 56#
+ !s1 = Exts.uncheckedShiftL# w1 48#
+ !s2 = Exts.uncheckedShiftL# w2 40#
+ !s3 = Exts.uncheckedShiftL# w3 32#
+ !s4 = Exts.uncheckedShiftL# w4 24#
+ !s5 = Exts.uncheckedShiftL# w5 16#
+ !s6 = Exts.uncheckedShiftL# w6 8#
+ in Exts.wordToWord64#
+ (s0 `Exts.or#` s1 `Exts.or#` s2 `Exts.or#` s3 `Exts.or#`
+ s4 `Exts.or#` s5 `Exts.or#` s6 `Exts.or#` w7)
+{-# INLINE word64be #-}
--- register concatenation
-cat :: Registers -> BS.ByteString
-cat Registers {..} = to_strict_small $
- BSB.word64BE h0 <> BSB.word64BE h1 <> BSB.word64BE h2 <> BSB.word64BE h3
- <> BSB.word64BE h4 <> BSB.word64BE h5 <> BSB.word64BE h6 <> BSB.word64BE h7
-
--- RFC 6234 4.1 message padding
-unsafe_padding :: BS.ByteString -> Word64 -> BS.ByteString
-unsafe_padding (BI.PS fp off r) len
- | r < 112 = BI.unsafeCreate 128 $ \p -> do
- BI.unsafeWithForeignPtr fp $ \src ->
- copyBytes p (src `plusPtr` off) r
- poke (p `plusPtr` r) (0x80 :: Word8)
- fillBytes (p `plusPtr` (r + 1)) 0 (111 - r)
- poke_word64be (p `plusPtr` 112) 0
- poke_word64be (p `plusPtr` 120) (len * 8)
- | otherwise = BI.unsafeCreate 256 $ \p -> do
- BI.unsafeWithForeignPtr fp $ \src ->
- copyBytes p (src `plusPtr` off) r
- poke (p `plusPtr` r) (0x80 :: Word8)
- fillBytes (p `plusPtr` (r + 1)) 0 (127 - r)
- fillBytes (p `plusPtr` 128) 0 112
- poke_word64be (p `plusPtr` 240) 0
- poke_word64be (p `plusPtr` 248) (len * 8)
+-- parsing (final input) ------------------------------------------------------
+
+-- | Parse the final chunk of an input message, assuming it is less than
+-- 112 bytes in length (unchecked!).
+--
+-- Returns one block consisting of the chunk and padding.
+parse_pad1
+ :: BS.ByteString -- ^ final input chunk (< 112 bytes)
+ -> Word64 -- ^ length of all input
+ -> Block -- ^ resulting block
+parse_pad1 bs l =
+ let !bits = l * 8
+ !(GHC.Word.W64# llo) = bits
+ in B (w64_at bs 000) (w64_at bs 008) (w64_at bs 016) (w64_at bs 024)
+ (w64_at bs 032) (w64_at bs 040) (w64_at bs 048) (w64_at bs 056)
+ (w64_at bs 064) (w64_at bs 072) (w64_at bs 080) (w64_at bs 088)
+ (w64_at bs 096) (w64_at bs 104) (Exts.wordToWord64# 0##) llo
+{-# INLINABLE parse_pad1 #-}
+
+-- | Parse the final chunk of an input message, assuming it is at least 112
+-- bytes in length (unchecked!).
+--
+-- Returns two blocks consisting of the chunk and padding.
+parse_pad2
+ :: BS.ByteString -- ^ final input chunk (>= 112 bytes)
+ -> Word64 -- ^ length of all input
+ -> (# Block, Block #) -- ^ resulting blocks
+parse_pad2 bs l =
+ let !bits = l * 8
+ !z = Exts.wordToWord64# 0##
+ !(GHC.Word.W64# llo) = bits
+ !block0 = B
+ (w64_at bs 000) (w64_at bs 008) (w64_at bs 016) (w64_at bs 024)
+ (w64_at bs 032) (w64_at bs 040) (w64_at bs 048) (w64_at bs 056)
+ (w64_at bs 064) (w64_at bs 072) (w64_at bs 080) (w64_at bs 088)
+ (w64_at bs 096) (w64_at bs 104) (w64_at bs 112) (w64_at bs 120)
+ !block1 = B z z z z z z z z z z z z z z z llo
+ in (# block0, block1 #)
+{-# INLINABLE parse_pad2 #-}
+
+-- | Return the byte at offset 'i', or a padding separator or zero byte
+-- beyond the input bounds, as an unboxed word.
+w8_as_w64_at
+ :: BS.ByteString -- ^ input chunk
+ -> Int -- ^ offset
+ -> Exts.Word#
+w8_as_w64_at bs@(BI.PS _ _ l) i = case compare i l of
+ LT -> let !(GHC.Word.W8# w) = BU.unsafeIndex bs i
+ in Exts.word8ToWord# w
+ EQ -> 0x80##
+ _ -> 0x00##
+{-# INLINE w8_as_w64_at #-}
+
+-- | Return the 64-bit word encoded by eight consecutive bytes at the
+-- provided offset.
+w64_at
+ :: BS.ByteString
+ -> Int
+ -> Exts.Word64#
+w64_at bs i =
+ let !w0 = w8_as_w64_at bs i `Exts.uncheckedShiftL#` 56#
+ !w1 = w8_as_w64_at bs (i + 1) `Exts.uncheckedShiftL#` 48#
+ !w2 = w8_as_w64_at bs (i + 2) `Exts.uncheckedShiftL#` 40#
+ !w3 = w8_as_w64_at bs (i + 3) `Exts.uncheckedShiftL#` 32#
+ !w4 = w8_as_w64_at bs (i + 4) `Exts.uncheckedShiftL#` 24#
+ !w5 = w8_as_w64_at bs (i + 5) `Exts.uncheckedShiftL#` 16#
+ !w6 = w8_as_w64_at bs (i + 6) `Exts.uncheckedShiftL#` 08#
+ !w7 = w8_as_w64_at bs (i + 7)
+ in Exts.wordToWord64#
+ (w0 `Exts.or#` w1 `Exts.or#` w2 `Exts.or#` w3 `Exts.or#`
+ w4 `Exts.or#` w5 `Exts.or#` w6 `Exts.or#` w7)
+{-# INLINE w64_at #-}
+
+-- update ---------------------------------------------------------------------
+
+-- | Update register state, given new input block.
+update :: Registers -> Block -> Registers
+update
+ (R h0 h1 h2 h3 h4 h5 h6 h7)
+ (B b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15)
+ =
+ let -- message schedule
+ !w00 = b00; !w01 = b01; !w02 = b02; !w03 = b03
+ !w04 = b04; !w05 = b05; !w06 = b06; !w07 = b07
+ !w08 = b08; !w09 = b09; !w10 = b10; !w11 = b11
+ !w12 = b12; !w13 = b13; !w14 = b14; !w15 = b15
+ !w16 = ssig1# w14 `p` w09 `p` ssig0# w01 `p` w00
+ !w17 = ssig1# w15 `p` w10 `p` ssig0# w02 `p` w01
+ !w18 = ssig1# w16 `p` w11 `p` ssig0# w03 `p` w02
+ !w19 = ssig1# w17 `p` w12 `p` ssig0# w04 `p` w03
+ !w20 = ssig1# w18 `p` w13 `p` ssig0# w05 `p` w04
+ !w21 = ssig1# w19 `p` w14 `p` ssig0# w06 `p` w05
+ !w22 = ssig1# w20 `p` w15 `p` ssig0# w07 `p` w06
+ !w23 = ssig1# w21 `p` w16 `p` ssig0# w08 `p` w07
+ !w24 = ssig1# w22 `p` w17 `p` ssig0# w09 `p` w08
+ !w25 = ssig1# w23 `p` w18 `p` ssig0# w10 `p` w09
+ !w26 = ssig1# w24 `p` w19 `p` ssig0# w11 `p` w10
+ !w27 = ssig1# w25 `p` w20 `p` ssig0# w12 `p` w11
+ !w28 = ssig1# w26 `p` w21 `p` ssig0# w13 `p` w12
+ !w29 = ssig1# w27 `p` w22 `p` ssig0# w14 `p` w13
+ !w30 = ssig1# w28 `p` w23 `p` ssig0# w15 `p` w14
+ !w31 = ssig1# w29 `p` w24 `p` ssig0# w16 `p` w15
+ !w32 = ssig1# w30 `p` w25 `p` ssig0# w17 `p` w16
+ !w33 = ssig1# w31 `p` w26 `p` ssig0# w18 `p` w17
+ !w34 = ssig1# w32 `p` w27 `p` ssig0# w19 `p` w18
+ !w35 = ssig1# w33 `p` w28 `p` ssig0# w20 `p` w19
+ !w36 = ssig1# w34 `p` w29 `p` ssig0# w21 `p` w20
+ !w37 = ssig1# w35 `p` w30 `p` ssig0# w22 `p` w21
+ !w38 = ssig1# w36 `p` w31 `p` ssig0# w23 `p` w22
+ !w39 = ssig1# w37 `p` w32 `p` ssig0# w24 `p` w23
+ !w40 = ssig1# w38 `p` w33 `p` ssig0# w25 `p` w24
+ !w41 = ssig1# w39 `p` w34 `p` ssig0# w26 `p` w25
+ !w42 = ssig1# w40 `p` w35 `p` ssig0# w27 `p` w26
+ !w43 = ssig1# w41 `p` w36 `p` ssig0# w28 `p` w27
+ !w44 = ssig1# w42 `p` w37 `p` ssig0# w29 `p` w28
+ !w45 = ssig1# w43 `p` w38 `p` ssig0# w30 `p` w29
+ !w46 = ssig1# w44 `p` w39 `p` ssig0# w31 `p` w30
+ !w47 = ssig1# w45 `p` w40 `p` ssig0# w32 `p` w31
+ !w48 = ssig1# w46 `p` w41 `p` ssig0# w33 `p` w32
+ !w49 = ssig1# w47 `p` w42 `p` ssig0# w34 `p` w33
+ !w50 = ssig1# w48 `p` w43 `p` ssig0# w35 `p` w34
+ !w51 = ssig1# w49 `p` w44 `p` ssig0# w36 `p` w35
+ !w52 = ssig1# w50 `p` w45 `p` ssig0# w37 `p` w36
+ !w53 = ssig1# w51 `p` w46 `p` ssig0# w38 `p` w37
+ !w54 = ssig1# w52 `p` w47 `p` ssig0# w39 `p` w38
+ !w55 = ssig1# w53 `p` w48 `p` ssig0# w40 `p` w39
+ !w56 = ssig1# w54 `p` w49 `p` ssig0# w41 `p` w40
+ !w57 = ssig1# w55 `p` w50 `p` ssig0# w42 `p` w41
+ !w58 = ssig1# w56 `p` w51 `p` ssig0# w43 `p` w42
+ !w59 = ssig1# w57 `p` w52 `p` ssig0# w44 `p` w43
+ !w60 = ssig1# w58 `p` w53 `p` ssig0# w45 `p` w44
+ !w61 = ssig1# w59 `p` w54 `p` ssig0# w46 `p` w45
+ !w62 = ssig1# w60 `p` w55 `p` ssig0# w47 `p` w46
+ !w63 = ssig1# w61 `p` w56 `p` ssig0# w48 `p` w47
+ !w64 = ssig1# w62 `p` w57 `p` ssig0# w49 `p` w48
+ !w65 = ssig1# w63 `p` w58 `p` ssig0# w50 `p` w49
+ !w66 = ssig1# w64 `p` w59 `p` ssig0# w51 `p` w50
+ !w67 = ssig1# w65 `p` w60 `p` ssig0# w52 `p` w51
+ !w68 = ssig1# w66 `p` w61 `p` ssig0# w53 `p` w52
+ !w69 = ssig1# w67 `p` w62 `p` ssig0# w54 `p` w53
+ !w70 = ssig1# w68 `p` w63 `p` ssig0# w55 `p` w54
+ !w71 = ssig1# w69 `p` w64 `p` ssig0# w56 `p` w55
+ !w72 = ssig1# w70 `p` w65 `p` ssig0# w57 `p` w56
+ !w73 = ssig1# w71 `p` w66 `p` ssig0# w58 `p` w57
+ !w74 = ssig1# w72 `p` w67 `p` ssig0# w59 `p` w58
+ !w75 = ssig1# w73 `p` w68 `p` ssig0# w60 `p` w59
+ !w76 = ssig1# w74 `p` w69 `p` ssig0# w61 `p` w60
+ !w77 = ssig1# w75 `p` w70 `p` ssig0# w62 `p` w61
+ !w78 = ssig1# w76 `p` w71 `p` ssig0# w63 `p` w62
+ !w79 = ssig1# w77 `p` w72 `p` ssig0# w64 `p` w63
+
+ -- rounds (constants are cube roots of first 80 primes)
+ !(R s00a s00b s00c s00d s00e s00f s00g s00h) =
+ step# h0 h1 h2 h3 h4 h5 h6 h7 (k 0x428a2f98d728ae22##) w00
+ !(R s01a s01b s01c s01d s01e s01f s01g s01h) =
+ step# s00a s00b s00c s00d s00e s00f s00g s00h
+ (k 0x7137449123ef65cd##) w01
+ !(R s02a s02b s02c s02d s02e s02f s02g s02h) =
+ step# s01a s01b s01c s01d s01e s01f s01g s01h
+ (k 0xb5c0fbcfec4d3b2f##) w02
+ !(R s03a s03b s03c s03d s03e s03f s03g s03h) =
+ step# s02a s02b s02c s02d s02e s02f s02g s02h
+ (k 0xe9b5dba58189dbbc##) w03
+ !(R s04a s04b s04c s04d s04e s04f s04g s04h) =
+ step# s03a s03b s03c s03d s03e s03f s03g s03h
+ (k 0x3956c25bf348b538##) w04
+ !(R s05a s05b s05c s05d s05e s05f s05g s05h) =
+ step# s04a s04b s04c s04d s04e s04f s04g s04h
+ (k 0x59f111f1b605d019##) w05
+ !(R s06a s06b s06c s06d s06e s06f s06g s06h) =
+ step# s05a s05b s05c s05d s05e s05f s05g s05h
+ (k 0x923f82a4af194f9b##) w06
+ !(R s07a s07b s07c s07d s07e s07f s07g s07h) =
+ step# s06a s06b s06c s06d s06e s06f s06g s06h
+ (k 0xab1c5ed5da6d8118##) w07
+ !(R s08a s08b s08c s08d s08e s08f s08g s08h) =
+ step# s07a s07b s07c s07d s07e s07f s07g s07h
+ (k 0xd807aa98a3030242##) w08
+ !(R s09a s09b s09c s09d s09e s09f s09g s09h) =
+ step# s08a s08b s08c s08d s08e s08f s08g s08h
+ (k 0x12835b0145706fbe##) w09
+ !(R s10a s10b s10c s10d s10e s10f s10g s10h) =
+ step# s09a s09b s09c s09d s09e s09f s09g s09h
+ (k 0x243185be4ee4b28c##) w10
+ !(R s11a s11b s11c s11d s11e s11f s11g s11h) =
+ step# s10a s10b s10c s10d s10e s10f s10g s10h
+ (k 0x550c7dc3d5ffb4e2##) w11
+ !(R s12a s12b s12c s12d s12e s12f s12g s12h) =
+ step# s11a s11b s11c s11d s11e s11f s11g s11h
+ (k 0x72be5d74f27b896f##) w12
+ !(R s13a s13b s13c s13d s13e s13f s13g s13h) =
+ step# s12a s12b s12c s12d s12e s12f s12g s12h
+ (k 0x80deb1fe3b1696b1##) w13
+ !(R s14a s14b s14c s14d s14e s14f s14g s14h) =
+ step# s13a s13b s13c s13d s13e s13f s13g s13h
+ (k 0x9bdc06a725c71235##) w14
+ !(R s15a s15b s15c s15d s15e s15f s15g s15h) =
+ step# s14a s14b s14c s14d s14e s14f s14g s14h
+ (k 0xc19bf174cf692694##) w15
+ !(R s16a s16b s16c s16d s16e s16f s16g s16h) =
+ step# s15a s15b s15c s15d s15e s15f s15g s15h
+ (k 0xe49b69c19ef14ad2##) w16
+ !(R s17a s17b s17c s17d s17e s17f s17g s17h) =
+ step# s16a s16b s16c s16d s16e s16f s16g s16h
+ (k 0xefbe4786384f25e3##) w17
+ !(R s18a s18b s18c s18d s18e s18f s18g s18h) =
+ step# s17a s17b s17c s17d s17e s17f s17g s17h
+ (k 0x0fc19dc68b8cd5b5##) w18
+ !(R s19a s19b s19c s19d s19e s19f s19g s19h) =
+ step# s18a s18b s18c s18d s18e s18f s18g s18h
+ (k 0x240ca1cc77ac9c65##) w19
+ !(R s20a s20b s20c s20d s20e s20f s20g s20h) =
+ step# s19a s19b s19c s19d s19e s19f s19g s19h
+ (k 0x2de92c6f592b0275##) w20
+ !(R s21a s21b s21c s21d s21e s21f s21g s21h) =
+ step# s20a s20b s20c s20d s20e s20f s20g s20h
+ (k 0x4a7484aa6ea6e483##) w21
+ !(R s22a s22b s22c s22d s22e s22f s22g s22h) =
+ step# s21a s21b s21c s21d s21e s21f s21g s21h
+ (k 0x5cb0a9dcbd41fbd4##) w22
+ !(R s23a s23b s23c s23d s23e s23f s23g s23h) =
+ step# s22a s22b s22c s22d s22e s22f s22g s22h
+ (k 0x76f988da831153b5##) w23
+ !(R s24a s24b s24c s24d s24e s24f s24g s24h) =
+ step# s23a s23b s23c s23d s23e s23f s23g s23h
+ (k 0x983e5152ee66dfab##) w24
+ !(R s25a s25b s25c s25d s25e s25f s25g s25h) =
+ step# s24a s24b s24c s24d s24e s24f s24g s24h
+ (k 0xa831c66d2db43210##) w25
+ !(R s26a s26b s26c s26d s26e s26f s26g s26h) =
+ step# s25a s25b s25c s25d s25e s25f s25g s25h
+ (k 0xb00327c898fb213f##) w26
+ !(R s27a s27b s27c s27d s27e s27f s27g s27h) =
+ step# s26a s26b s26c s26d s26e s26f s26g s26h
+ (k 0xbf597fc7beef0ee4##) w27
+ !(R s28a s28b s28c s28d s28e s28f s28g s28h) =
+ step# s27a s27b s27c s27d s27e s27f s27g s27h
+ (k 0xc6e00bf33da88fc2##) w28
+ !(R s29a s29b s29c s29d s29e s29f s29g s29h) =
+ step# s28a s28b s28c s28d s28e s28f s28g s28h
+ (k 0xd5a79147930aa725##) w29
+ !(R s30a s30b s30c s30d s30e s30f s30g s30h) =
+ step# s29a s29b s29c s29d s29e s29f s29g s29h
+ (k 0x06ca6351e003826f##) w30
+ !(R s31a s31b s31c s31d s31e s31f s31g s31h) =
+ step# s30a s30b s30c s30d s30e s30f s30g s30h
+ (k 0x142929670a0e6e70##) w31
+ !(R s32a s32b s32c s32d s32e s32f s32g s32h) =
+ step# s31a s31b s31c s31d s31e s31f s31g s31h
+ (k 0x27b70a8546d22ffc##) w32
+ !(R s33a s33b s33c s33d s33e s33f s33g s33h) =
+ step# s32a s32b s32c s32d s32e s32f s32g s32h
+ (k 0x2e1b21385c26c926##) w33
+ !(R s34a s34b s34c s34d s34e s34f s34g s34h) =
+ step# s33a s33b s33c s33d s33e s33f s33g s33h
+ (k 0x4d2c6dfc5ac42aed##) w34
+ !(R s35a s35b s35c s35d s35e s35f s35g s35h) =
+ step# s34a s34b s34c s34d s34e s34f s34g s34h
+ (k 0x53380d139d95b3df##) w35
+ !(R s36a s36b s36c s36d s36e s36f s36g s36h) =
+ step# s35a s35b s35c s35d s35e s35f s35g s35h
+ (k 0x650a73548baf63de##) w36
+ !(R s37a s37b s37c s37d s37e s37f s37g s37h) =
+ step# s36a s36b s36c s36d s36e s36f s36g s36h
+ (k 0x766a0abb3c77b2a8##) w37
+ !(R s38a s38b s38c s38d s38e s38f s38g s38h) =
+ step# s37a s37b s37c s37d s37e s37f s37g s37h
+ (k 0x81c2c92e47edaee6##) w38
+ !(R s39a s39b s39c s39d s39e s39f s39g s39h) =
+ step# s38a s38b s38c s38d s38e s38f s38g s38h
+ (k 0x92722c851482353b##) w39
+ !(R s40a s40b s40c s40d s40e s40f s40g s40h) =
+ step# s39a s39b s39c s39d s39e s39f s39g s39h
+ (k 0xa2bfe8a14cf10364##) w40
+ !(R s41a s41b s41c s41d s41e s41f s41g s41h) =
+ step# s40a s40b s40c s40d s40e s40f s40g s40h
+ (k 0xa81a664bbc423001##) w41
+ !(R s42a s42b s42c s42d s42e s42f s42g s42h) =
+ step# s41a s41b s41c s41d s41e s41f s41g s41h
+ (k 0xc24b8b70d0f89791##) w42
+ !(R s43a s43b s43c s43d s43e s43f s43g s43h) =
+ step# s42a s42b s42c s42d s42e s42f s42g s42h
+ (k 0xc76c51a30654be30##) w43
+ !(R s44a s44b s44c s44d s44e s44f s44g s44h) =
+ step# s43a s43b s43c s43d s43e s43f s43g s43h
+ (k 0xd192e819d6ef5218##) w44
+ !(R s45a s45b s45c s45d s45e s45f s45g s45h) =
+ step# s44a s44b s44c s44d s44e s44f s44g s44h
+ (k 0xd69906245565a910##) w45
+ !(R s46a s46b s46c s46d s46e s46f s46g s46h) =
+ step# s45a s45b s45c s45d s45e s45f s45g s45h
+ (k 0xf40e35855771202a##) w46
+ !(R s47a s47b s47c s47d s47e s47f s47g s47h) =
+ step# s46a s46b s46c s46d s46e s46f s46g s46h
+ (k 0x106aa07032bbd1b8##) w47
+ !(R s48a s48b s48c s48d s48e s48f s48g s48h) =
+ step# s47a s47b s47c s47d s47e s47f s47g s47h
+ (k 0x19a4c116b8d2d0c8##) w48
+ !(R s49a s49b s49c s49d s49e s49f s49g s49h) =
+ step# s48a s48b s48c s48d s48e s48f s48g s48h
+ (k 0x1e376c085141ab53##) w49
+ !(R s50a s50b s50c s50d s50e s50f s50g s50h) =
+ step# s49a s49b s49c s49d s49e s49f s49g s49h
+ (k 0x2748774cdf8eeb99##) w50
+ !(R s51a s51b s51c s51d s51e s51f s51g s51h) =
+ step# s50a s50b s50c s50d s50e s50f s50g s50h
+ (k 0x34b0bcb5e19b48a8##) w51
+ !(R s52a s52b s52c s52d s52e s52f s52g s52h) =
+ step# s51a s51b s51c s51d s51e s51f s51g s51h
+ (k 0x391c0cb3c5c95a63##) w52
+ !(R s53a s53b s53c s53d s53e s53f s53g s53h) =
+ step# s52a s52b s52c s52d s52e s52f s52g s52h
+ (k 0x4ed8aa4ae3418acb##) w53
+ !(R s54a s54b s54c s54d s54e s54f s54g s54h) =
+ step# s53a s53b s53c s53d s53e s53f s53g s53h
+ (k 0x5b9cca4f7763e373##) w54
+ !(R s55a s55b s55c s55d s55e s55f s55g s55h) =
+ step# s54a s54b s54c s54d s54e s54f s54g s54h
+ (k 0x682e6ff3d6b2b8a3##) w55
+ !(R s56a s56b s56c s56d s56e s56f s56g s56h) =
+ step# s55a s55b s55c s55d s55e s55f s55g s55h
+ (k 0x748f82ee5defb2fc##) w56
+ !(R s57a s57b s57c s57d s57e s57f s57g s57h) =
+ step# s56a s56b s56c s56d s56e s56f s56g s56h
+ (k 0x78a5636f43172f60##) w57
+ !(R s58a s58b s58c s58d s58e s58f s58g s58h) =
+ step# s57a s57b s57c s57d s57e s57f s57g s57h
+ (k 0x84c87814a1f0ab72##) w58
+ !(R s59a s59b s59c s59d s59e s59f s59g s59h) =
+ step# s58a s58b s58c s58d s58e s58f s58g s58h
+ (k 0x8cc702081a6439ec##) w59
+ !(R s60a s60b s60c s60d s60e s60f s60g s60h) =
+ step# s59a s59b s59c s59d s59e s59f s59g s59h
+ (k 0x90befffa23631e28##) w60
+ !(R s61a s61b s61c s61d s61e s61f s61g s61h) =
+ step# s60a s60b s60c s60d s60e s60f s60g s60h
+ (k 0xa4506cebde82bde9##) w61
+ !(R s62a s62b s62c s62d s62e s62f s62g s62h) =
+ step# s61a s61b s61c s61d s61e s61f s61g s61h
+ (k 0xbef9a3f7b2c67915##) w62
+ !(R s63a s63b s63c s63d s63e s63f s63g s63h) =
+ step# s62a s62b s62c s62d s62e s62f s62g s62h
+ (k 0xc67178f2e372532b##) w63
+ !(R s64a s64b s64c s64d s64e s64f s64g s64h) =
+ step# s63a s63b s63c s63d s63e s63f s63g s63h
+ (k 0xca273eceea26619c##) w64
+ !(R s65a s65b s65c s65d s65e s65f s65g s65h) =
+ step# s64a s64b s64c s64d s64e s64f s64g s64h
+ (k 0xd186b8c721c0c207##) w65
+ !(R s66a s66b s66c s66d s66e s66f s66g s66h) =
+ step# s65a s65b s65c s65d s65e s65f s65g s65h
+ (k 0xeada7dd6cde0eb1e##) w66
+ !(R s67a s67b s67c s67d s67e s67f s67g s67h) =
+ step# s66a s66b s66c s66d s66e s66f s66g s66h
+ (k 0xf57d4f7fee6ed178##) w67
+ !(R s68a s68b s68c s68d s68e s68f s68g s68h) =
+ step# s67a s67b s67c s67d s67e s67f s67g s67h
+ (k 0x06f067aa72176fba##) w68
+ !(R s69a s69b s69c s69d s69e s69f s69g s69h) =
+ step# s68a s68b s68c s68d s68e s68f s68g s68h
+ (k 0x0a637dc5a2c898a6##) w69
+ !(R s70a s70b s70c s70d s70e s70f s70g s70h) =
+ step# s69a s69b s69c s69d s69e s69f s69g s69h
+ (k 0x113f9804bef90dae##) w70
+ !(R s71a s71b s71c s71d s71e s71f s71g s71h) =
+ step# s70a s70b s70c s70d s70e s70f s70g s70h
+ (k 0x1b710b35131c471b##) w71
+ !(R s72a s72b s72c s72d s72e s72f s72g s72h) =
+ step# s71a s71b s71c s71d s71e s71f s71g s71h
+ (k 0x28db77f523047d84##) w72
+ !(R s73a s73b s73c s73d s73e s73f s73g s73h) =
+ step# s72a s72b s72c s72d s72e s72f s72g s72h
+ (k 0x32caab7b40c72493##) w73
+ !(R s74a s74b s74c s74d s74e s74f s74g s74h) =
+ step# s73a s73b s73c s73d s73e s73f s73g s73h
+ (k 0x3c9ebe0a15c9bebc##) w74
+ !(R s75a s75b s75c s75d s75e s75f s75g s75h) =
+ step# s74a s74b s74c s74d s74e s74f s74g s74h
+ (k 0x431d67c49c100d4c##) w75
+ !(R s76a s76b s76c s76d s76e s76f s76g s76h) =
+ step# s75a s75b s75c s75d s75e s75f s75g s75h
+ (k 0x4cc5d4becb3e42b6##) w76
+ !(R s77a s77b s77c s77d s77e s77f s77g s77h) =
+ step# s76a s76b s76c s76d s76e s76f s76g s76h
+ (k 0x597f299cfc657e2a##) w77
+ !(R s78a s78b s78c s78d s78e s78f s78g s78h) =
+ step# s77a s77b s77c s77d s77e s77f s77g s77h
+ (k 0x5fcb6fab3ad6faec##) w78
+ !(R s79a s79b s79c s79d s79e s79f s79g s79h) =
+ step# s78a s78b s78c s78d s78e s78f s78g s78h
+ (k 0x6c44198c4a475817##) w79
+ in R (h0 `p` s79a) (h1 `p` s79b) (h2 `p` s79c) (h3 `p` s79d)
+ (h4 `p` s79e) (h5 `p` s79f) (h6 `p` s79g) (h7 `p` s79h)
where
- poke_word64be :: Ptr Word8 -> Word64 -> IO ()
- poke_word64be p w = do
- poke p (fi (w `B.unsafeShiftR` 56) :: Word8)
- poke (p `plusPtr` 1) (fi (w `B.unsafeShiftR` 48) :: Word8)
- poke (p `plusPtr` 2) (fi (w `B.unsafeShiftR` 40) :: Word8)
- poke (p `plusPtr` 3) (fi (w `B.unsafeShiftR` 32) :: Word8)
- poke (p `plusPtr` 4) (fi (w `B.unsafeShiftR` 24) :: Word8)
- poke (p `plusPtr` 5) (fi (w `B.unsafeShiftR` 16) :: Word8)
- poke (p `plusPtr` 6) (fi (w `B.unsafeShiftR` 8) :: Word8)
- poke (p `plusPtr` 7) (fi w :: Word8)
+ p = Exts.plusWord64#
+ {-# INLINE p #-}
+ k :: Exts.Word# -> Exts.Word64#
+ k = Exts.wordToWord64#
+ {-# INLINE k #-}
+
+-- rotate right
+rotr# :: Exts.Word64# -> Int# -> Exts.Word64#
+rotr# x n =
+ Exts.uncheckedShiftRL64# x n `Exts.or64#`
+ Exts.uncheckedShiftL64# x (64# Exts.-# n)
+{-# INLINE rotr# #-}
+
+-- logical right shift
+shr# :: Exts.Word64# -> Int# -> Exts.Word64#
+shr# = Exts.uncheckedShiftRL64#
+{-# INLINE shr# #-}
+
+-- ch(x, y, z) = (x & y) ^ (~x & z)
+ch# :: Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ch# x y z =
+ (x `Exts.and64#` y) `Exts.xor64#`
+ (Exts.not64# x `Exts.and64#` z)
+{-# INLINE ch# #-}
+
+-- maj(x, y, z) = (x & (y | z)) | (y & z)
+maj# :: Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+maj# x y z =
+ (x `Exts.and64#` (y `Exts.or64#` z)) `Exts.or64#`
+ (y `Exts.and64#` z)
+{-# INLINE maj# #-}
+
+-- big sigma 0: rotr28 ^ rotr34 ^ rotr39
+bsig0# :: Exts.Word64# -> Exts.Word64#
+bsig0# x =
+ rotr# x 28# `Exts.xor64#` rotr# x 34# `Exts.xor64#` rotr# x 39#
+{-# INLINE bsig0# #-}
+
+-- big sigma 1: rotr14 ^ rotr18 ^ rotr41
+bsig1# :: Exts.Word64# -> Exts.Word64#
+bsig1# x =
+ rotr# x 14# `Exts.xor64#` rotr# x 18# `Exts.xor64#` rotr# x 41#
+{-# INLINE bsig1# #-}
+
+-- small sigma 0: rotr1 ^ rotr8 ^ shr7
+ssig0# :: Exts.Word64# -> Exts.Word64#
+ssig0# x =
+ rotr# x 1# `Exts.xor64#` rotr# x 8# `Exts.xor64#` shr# x 7#
+{-# INLINE ssig0# #-}
+
+-- small sigma 1: rotr19 ^ rotr61 ^ shr6
+ssig1# :: Exts.Word64# -> Exts.Word64#
+ssig1# x =
+ rotr# x 19# `Exts.xor64#` rotr# x 61# `Exts.xor64#` shr# x 6#
+{-# INLINE ssig1# #-}
+
+-- round step
+step#
+ :: Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Exts.Word64# -> Exts.Word64# -> Exts.Word64# -> Exts.Word64#
+ -> Exts.Word64# -> Exts.Word64#
+ -> Registers
+step# a b c d e f g h k w =
+ let !t1 = h
+ `Exts.plusWord64#` bsig1# e
+ `Exts.plusWord64#` ch# e f g
+ `Exts.plusWord64#` k
+ `Exts.plusWord64#` w
+ !t2 = bsig0# a `Exts.plusWord64#` maj# a b c
+ in R (t1 `Exts.plusWord64#` t2) a b c (d `Exts.plusWord64#` t1) e f g
+{-# INLINE step# #-}
+
+-- initial register state; first 64 bits of the fractional parts of the
+-- square roots of the first eight primes
+iv :: () -> Registers
+iv _ = R
+ (Exts.wordToWord64# 0x6a09e667f3bcc908##)
+ (Exts.wordToWord64# 0xbb67ae8584caa73b##)
+ (Exts.wordToWord64# 0x3c6ef372fe94f82b##)
+ (Exts.wordToWord64# 0xa54ff53a5f1d36f1##)
+ (Exts.wordToWord64# 0x510e527fade682d1##)
+ (Exts.wordToWord64# 0x9b05688c2b3e6c1f##)
+ (Exts.wordToWord64# 0x1f83d9abfb41bd6b##)
+ (Exts.wordToWord64# 0x5be0cd19137e2179##)
+
+-- serializing ----------------------------------------------------------------
+
+-- | Concat SHA512 state into a ByteString.
+cat :: Registers -> BS.ByteString
+cat rs = BI.unsafeCreate 64 (cat_into rs)
+{-# INLINABLE cat #-}
+
+-- | Serialize SHA512 state to a pointer (big-endian).
+cat_into :: Registers -> Ptr Word8 -> IO ()
+cat_into (R h0 h1 h2 h3 h4 h5 h6 h7) (Ptr addr) = GHC.IO.IO $ \s0 ->
+ case poke64be addr 00# h0 s0 of { s1 ->
+ case poke64be addr 08# h1 s1 of { s2 ->
+ case poke64be addr 16# h2 s2 of { s3 ->
+ case poke64be addr 24# h3 s3 of { s4 ->
+ case poke64be addr 32# h4 s4 of { s5 ->
+ case poke64be addr 40# h5 s5 of { s6 ->
+ case poke64be addr 48# h6 s6 of { s7 ->
+ case poke64be addr 56# h7 s7 of { s8 ->
+ (# s8, () #)
+ }}}}}}}}
+{-# INLINE cat_into #-}
+
+poke64be
+ :: Exts.Addr#
+ -> Int#
+ -> Exts.Word64#
+ -> Exts.State# Exts.RealWorld
+ -> Exts.State# Exts.RealWorld
+poke64be a off w s0 =
+ case Exts.writeWord8OffAddr# a off (byte# w 56#) s0 of { s1 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 1#) (byte# w 48#) s1 of { s2 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 2#) (byte# w 40#) s2 of { s3 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 3#) (byte# w 32#) s3 of { s4 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 4#) (byte# w 24#) s4 of { s5 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 5#) (byte# w 16#) s5 of { s6 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 6#) (byte# w 8#) s6 of { s7 ->
+ Exts.writeWord8OffAddr# a (off Exts.+# 7#) (byte# w 0#) s7
+ }}}}}}}
+{-# INLINE poke64be #-}
+
+byte# :: Exts.Word64# -> Int# -> Exts.Word8#
+byte# w n = Exts.wordToWord8#
+ (Exts.word64ToWord# (Exts.uncheckedShiftRL64# w n))
+{-# INLINE byte# #-}
+
+-- | Write register state to a pointer (native endian Word64s).
+poke_registers :: Ptr Word64 -> Registers -> IO ()
+poke_registers (Ptr addr) (R w0 w1 w2 w3 w4 w5 w6 w7) = GHC.IO.IO $ \s0 ->
+ case Exts.writeWord64OffAddr# addr 0# w0 s0 of { s1 ->
+ case Exts.writeWord64OffAddr# addr 1# w1 s1 of { s2 ->
+ case Exts.writeWord64OffAddr# addr 2# w2 s2 of { s3 ->
+ case Exts.writeWord64OffAddr# addr 3# w3 s3 of { s4 ->
+ case Exts.writeWord64OffAddr# addr 4# w4 s4 of { s5 ->
+ case Exts.writeWord64OffAddr# addr 5# w5 s5 of { s6 ->
+ case Exts.writeWord64OffAddr# addr 6# w6 s6 of { s7 ->
+ case Exts.writeWord64OffAddr# addr 7# w7 s7 of { s8 ->
+ (# s8, () #) }}}}}}}}
+{-# INLINE poke_registers #-}
+
+-- hmac utilities -------------------------------------------------------------
+
+-- pad registers to block
+pad_registers :: Registers -> Block
+pad_registers (R w0 w1 w2 w3 w4 w5 w6 w7) = B
+ w0 w1 w2 w3 w4 w5 w6 w7
+ (Exts.wordToWord64# 0##) (Exts.wordToWord64# 0##) (Exts.wordToWord64# 0##)
+ (Exts.wordToWord64# 0##) (Exts.wordToWord64# 0##) (Exts.wordToWord64# 0##)
+ (Exts.wordToWord64# 0##) (Exts.wordToWord64# 0##)
+{-# INLINE pad_registers #-}
+
+-- pad registers to block, using padding separator and augmented length
+-- (assumes existence of a leading block)
+-- length = (128 + 64) * 8 = 1536 = 0x600
+pad_registers_with_length :: Registers -> Block
+pad_registers_with_length (R h0 h1 h2 h3 h4 h5 h6 h7) = B
+ h0 h1 h2 h3 h4 h5 h6 h7 -- inner hash
+ (Exts.wordToWord64# 0x8000000000000000##) -- padding separator
+ (Exts.wordToWord64# 0x0000000000000000##)
+ (Exts.wordToWord64# 0x0000000000000000##)
+ (Exts.wordToWord64# 0x0000000000000000##)
+ (Exts.wordToWord64# 0x0000000000000000##)
+ (Exts.wordToWord64# 0x0000000000000000##)
+ (Exts.wordToWord64# 0x0000000000000000##) -- high 64 bits of length
+ (Exts.wordToWord64# 0x0000000000000600##) -- low 64 bits of length
+{-# INLINABLE pad_registers_with_length #-}
+
+xor :: Block -> Exts.Word64# -> Block
+xor (B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15) b = B
+ (Exts.xor64# w00 b)
+ (Exts.xor64# w01 b)
+ (Exts.xor64# w02 b)
+ (Exts.xor64# w03 b)
+ (Exts.xor64# w04 b)
+ (Exts.xor64# w05 b)
+ (Exts.xor64# w06 b)
+ (Exts.xor64# w07 b)
+ (Exts.xor64# w08 b)
+ (Exts.xor64# w09 b)
+ (Exts.xor64# w10 b)
+ (Exts.xor64# w11 b)
+ (Exts.xor64# w12 b)
+ (Exts.xor64# w13 b)
+ (Exts.xor64# w14 b)
+ (Exts.xor64# w15 b)
+{-# INLINE xor #-}
+
+parse_key :: BS.ByteString -> Block
+parse_key bs = B
+ (w64_zero bs 000) (w64_zero bs 008) (w64_zero bs 016) (w64_zero bs 024)
+ (w64_zero bs 032) (w64_zero bs 040) (w64_zero bs 048) (w64_zero bs 056)
+ (w64_zero bs 064) (w64_zero bs 072) (w64_zero bs 080) (w64_zero bs 088)
+ (w64_zero bs 096) (w64_zero bs 104) (w64_zero bs 112) (w64_zero bs 120)
+{-# INLINE parse_key #-}
+
+-- read big-endian Word64#, zero-padding beyond input length
+w64_zero :: BS.ByteString -> Int -> Exts.Word64#
+w64_zero bs i =
+ let !w0 = w8_zero bs i `Exts.uncheckedShiftL#` 56#
+ !w1 = w8_zero bs (i + 1) `Exts.uncheckedShiftL#` 48#
+ !w2 = w8_zero bs (i + 2) `Exts.uncheckedShiftL#` 40#
+ !w3 = w8_zero bs (i + 3) `Exts.uncheckedShiftL#` 32#
+ !w4 = w8_zero bs (i + 4) `Exts.uncheckedShiftL#` 24#
+ !w5 = w8_zero bs (i + 5) `Exts.uncheckedShiftL#` 16#
+ !w6 = w8_zero bs (i + 6) `Exts.uncheckedShiftL#` 08#
+ !w7 = w8_zero bs (i + 7)
+ in Exts.wordToWord64#
+ (w0 `Exts.or#` w1 `Exts.or#` w2 `Exts.or#` w3 `Exts.or#`
+ w4 `Exts.or#` w5 `Exts.or#` w6 `Exts.or#` w7)
+{-# INLINE w64_zero #-}
+
+-- read byte as Word#, returning zero beyond input length
+w8_zero :: BS.ByteString -> Int -> Exts.Word#
+w8_zero bs@(BI.PS _ _ l) i
+ | i < l = let !(GHC.Word.W8# w) = BU.unsafeIndex bs i
+ in Exts.word8ToWord# w
+ | otherwise = 0##
+{-# INLINE w8_zero #-}
+
+-- hmac-drbg utilities --------------------------------------------------------
+
+-- | Parse first complete block from v || sep || dat[0:63].
+--
+-- Requires len(dat) >= 63.
+parse_vsb :: Registers -> Word8 -> BS.ByteString -> Block
+parse_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) (GHC.Word.W8# sep) dat =
+ let !(GHC.Word.W8# b0) = BU.unsafeIndex dat 0
+ !(GHC.Word.W8# b1) = BU.unsafeIndex dat 1
+ !(GHC.Word.W8# b2) = BU.unsafeIndex dat 2
+ !(GHC.Word.W8# b3) = BU.unsafeIndex dat 3
+ !(GHC.Word.W8# b4) = BU.unsafeIndex dat 4
+ !(GHC.Word.W8# b5) = BU.unsafeIndex dat 5
+ !(GHC.Word.W8# b6) = BU.unsafeIndex dat 6
+ !w08 =
+ Exts.uncheckedShiftL# (Exts.word8ToWord# sep) 56#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b0) 48#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b1) 40#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b2) 32#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b3) 24#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b4) 16#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b5) 8#
+ `Exts.or#`
+ Exts.word8ToWord# b6
+ in B v0 v1 v2 v3 v4 v5 v6 v7
+ (Exts.wordToWord64# w08)
+ (word64be dat 07) (word64be dat 15) (word64be dat 23)
+ (word64be dat 31) (word64be dat 39) (word64be dat 47) (word64be dat 55)
+{-# INLINE parse_vsb #-}
+
+-- | Parse single padding block from v || sep || dat.
+--
+-- Requires (65 + len(dat)) < 112.
+parse_pad1_vsb :: Registers -> Word8 -> BS.ByteString -> Word64 -> Block
+parse_pad1_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) sep dat total =
+ let !bits = total * 8
+ !(GHC.Word.W64# llo) = bits
+ in B v0 v1 v2 v3 v4 v5 v6 v7
+ (w64_sdp sep dat 064) (w64_sdp sep dat 072)
+ (w64_sdp sep dat 080) (w64_sdp sep dat 088)
+ (w64_sdp sep dat 096) (w64_sdp sep dat 104)
+ (Exts.wordToWord64# 0##) llo
+{-# INLINABLE parse_pad1_vsb #-}
+
+-- | Parse two padding blocks from v || sep || dat.
+--
+-- Requires 112 <= (65 + len(dat)) < 128.
+parse_pad2_vsb
+ :: Registers -> Word8 -> BS.ByteString -> Word64 -> (# Block, Block #)
+parse_pad2_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) sep dat total =
+ let !bits = total * 8
+ !z = Exts.wordToWord64# 0##
+ !(GHC.Word.W64# llo) = bits
+ !b0 = B v0 v1 v2 v3 v4 v5 v6 v7
+ (w64_sdp sep dat 064) (w64_sdp sep dat 072)
+ (w64_sdp sep dat 080) (w64_sdp sep dat 088)
+ (w64_sdp sep dat 096) (w64_sdp sep dat 104)
+ (w64_sdp sep dat 112) (w64_sdp sep dat 120)
+ !b1 = B z z z z z z z z z z z z z z z llo
+ in (# b0, b1 #)
+{-# INLINABLE parse_pad2_vsb #-}
+
+-- Read Word64 at offset i (>= 64) from (sep || dat || 0x80 || zeros).
+w64_sdp :: Word8 -> BS.ByteString -> Int -> Exts.Word64#
+w64_sdp sep dat i =
+ let !(GHC.Word.W8# a) = byte_sdp sep dat i
+ !(GHC.Word.W8# b) = byte_sdp sep dat (i + 1)
+ !(GHC.Word.W8# c) = byte_sdp sep dat (i + 2)
+ !(GHC.Word.W8# d) = byte_sdp sep dat (i + 3)
+ !(GHC.Word.W8# e) = byte_sdp sep dat (i + 4)
+ !(GHC.Word.W8# f) = byte_sdp sep dat (i + 5)
+ !(GHC.Word.W8# g) = byte_sdp sep dat (i + 6)
+ !(GHC.Word.W8# h) = byte_sdp sep dat (i + 7)
+ in Exts.wordToWord64#
+ (Exts.uncheckedShiftL# (Exts.word8ToWord# a) 56#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# b) 48#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# c) 40#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# d) 32#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# e) 24#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# f) 16#
+ `Exts.or#`
+ Exts.uncheckedShiftL# (Exts.word8ToWord# g) 8#
+ `Exts.or#`
+ Exts.word8ToWord# h)
+{-# INLINE w64_sdp #-}
+
+-- Read byte at offset i (>= 64) from (sep || dat || 0x80 || zeros).
+byte_sdp :: Word8 -> BS.ByteString -> Int -> Word8
+byte_sdp sep dat@(BI.PS _ _ l) i
+ | i == 64 = sep
+ | i < 65 + l = BU.unsafeIndex dat (i - 65)
+ | i == 65 + l = 0x80
+ | otherwise = 0x00
+{-# INLINE byte_sdp #-}
+
diff --git a/lib/Crypto/Hash/SHA512/Lazy.hs b/lib/Crypto/Hash/SHA512/Lazy.hs
@@ -32,9 +32,6 @@ import qualified Data.ByteString.Lazy.Internal as BLI
import Data.Word (Word64)
import Foreign.ForeignPtr (plusForeignPtr)
--- preliminary utils
-
--- keystroke saver
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
@@ -109,20 +106,16 @@ pad_lazy (BL.toChunks -> m) = BL.fromChunks (walk 0 m) where
-- >>> hash_lazy "lazy bytestring input"
-- "<strict 512-bit message digest>"
hash_lazy :: BL.ByteString -> BS.ByteString
-hash_lazy bl = cat (go iv (pad_lazy bl)) where
+hash_lazy bl = cat (go (iv ()) (pad_lazy bl)) where
go :: Registers -> BL.ByteString -> Registers
go !acc bs
| BL.null bs = acc
| otherwise = case splitAt128 bs of
- SLPair c r -> go (unsafe_hash_alg acc c) r
+ SLPair c r -> go (update acc (parse c 0)) r
-- HMAC -----------------------------------------------------------------------
-- https://datatracker.ietf.org/doc/html/rfc2104#section-2
-data KeyAndLen = KeyAndLen
- {-# UNPACK #-} !BS.ByteString
- {-# UNPACK #-} !Int
-
-- | Produce a message authentication code for a lazy bytestring, based
-- on the provided (strict, bytestring) key, via SHA-512.
--
@@ -146,12 +139,12 @@ hmac_lazy mk@(BI.PS _ _ l) text =
step6 = step5 <> step4
in MAC (hash step6)
where
- hash bs = cat (go iv (pad bs)) where
+ hash bs = cat (go (iv ()) (pad bs)) where
go :: Registers -> BS.ByteString -> Registers
go !acc b
| BS.null b = acc
| otherwise = case unsafe_splitAt 128 b of
- SSPair c r -> go (unsafe_hash_alg acc c) r
+ SSPair c r -> go (update acc (parse c 0)) r
pad m@(BI.PS _ _ (fi -> len))
| len < 256 = to_strict_small padded
@@ -177,6 +170,4 @@ hmac_lazy mk@(BI.PS _ _ l) text =
| j == 0 = acc
| otherwise = loop8 (pred j) (acc <> BSB.word8 0x00)
- !(KeyAndLen k lk)
- | l > 128 = KeyAndLen (hash mk) 64
- | otherwise = KeyAndLen mk l
+ !(k, lk) = if l > 128 then (hash mk, 64) else (mk, l)