commit 1320bcbd71f27616015c8d3c7cd860460731ee45
parent 3710c8d144c461b5842c9f207b33d506b8a05083
Author: Jared Tobin <jared@jtobin.io>
Date: Mon, 12 Jan 2026 13:41:25 +0400
lib: large-scaling refactoring
Diffstat:
5 files changed, 941 insertions(+), 350 deletions(-)
diff --git a/cbits/sha256_arm.c b/cbits/sha256_arm.c
@@ -28,11 +28,11 @@ static const uint32_t K[64] = {
* Process one 64-byte block using ARM SHA256 crypto instructions.
*
* state: pointer to 8 uint32_t words (a,b,c,d,e,f,g,h)
- * block: pointer to 64 bytes of message data
+ * block: pointer to 16 uint32_t words (already native endian)
*
* The state is updated in place.
*/
-void sha256_block_arm(uint32_t *state, const uint8_t *block) {
+void sha256_block_arm(uint32_t *state, const uint32_t *block) {
/* Load current hash state */
uint32x4_t abcd = vld1q_u32(&state[0]);
uint32x4_t efgh = vld1q_u32(&state[4]);
@@ -41,11 +41,11 @@ void sha256_block_arm(uint32_t *state, const uint8_t *block) {
uint32x4_t abcd_orig = abcd;
uint32x4_t efgh_orig = efgh;
- /* Load message and convert from big-endian */
- uint32x4_t m0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[0])));
- uint32x4_t m1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[16])));
- uint32x4_t m2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[32])));
- uint32x4_t m3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[48])));
+ /* Load message (already native endian) */
+ uint32x4_t m0 = vld1q_u32(&block[0]);
+ uint32x4_t m1 = vld1q_u32(&block[4]);
+ uint32x4_t m2 = vld1q_u32(&block[8]);
+ uint32x4_t m3 = vld1q_u32(&block[12]);
uint32x4_t tmp, tmp2;
@@ -174,7 +174,7 @@ int sha256_arm_available(void) {
#else
/* Stub implementations when ARM SHA2 is not available */
-void sha256_block_arm(uint32_t *state, const uint8_t *block) {
+void sha256_block_arm(uint32_t *state, const uint32_t *block) {
(void)state;
(void)block;
/* Should never be called - use pure Haskell fallback */
diff --git a/lib/Crypto/Hash/SHA256.hs b/lib/Crypto/Hash/SHA256.hs
@@ -29,25 +29,33 @@ module Crypto.Hash.SHA256 (
, MAC(..)
, hmac
, Lazy.hmac_lazy
+
+ -- low-level specialized primitives
+ , _hmac_rr
+ , _hmac_rm
+ , _hmac_rsb
+
+ -- pointer-based IO functions (for HMAC-DRBG)
+ , hmac_rr_unsafe
+ , hmac_rsb_unsafe
) where
-import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
-import Data.Word (Word64)
-import Crypto.Hash.SHA256.Arm
+import Data.Word (Word8, Word32, Word64)
+import Foreign.Ptr (Ptr)
+import qualified GHC.Exts as Exts
+import qualified Crypto.Hash.SHA256.Arm as Arm
import Crypto.Hash.SHA256.Internal
import qualified Crypto.Hash.SHA256.Lazy as Lazy
--- utils ---------------------------------------------------------------------
+-- utilities ------------------------------------------------------------------
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
--- hash ----------------------------------------------------------------------
-
-- | Compute a condensed representation of a strict bytestring via
-- SHA-256.
--
@@ -57,60 +65,192 @@ fi = fromIntegral
-- "<strict 256-bit message digest>"
hash :: BS.ByteString -> BS.ByteString
hash m
- | sha256_arm_available = hash_arm m
- | otherwise = cat (process m)
-
--- process a message, given the specified iv
-process_with :: Registers -> Word64 -> BS.ByteString -> Registers
-process_with acc0 el m@(BI.PS _ _ l) = finalize (go acc0 0) where
- go !acc !j
- | j + 64 <= l = go (block_hash acc (parse_block m j)) (j + 64)
- | otherwise = acc
-
- finalize !acc
- | len < 56 = block_hash acc (parse_block padded 0)
- | otherwise = block_hash
- (block_hash acc (parse_block padded 0))
- (parse_block padded 64)
- where
- !remaining@(BI.PS _ _ len) = BU.unsafeDrop (l - l `rem` 64) m
- !padded = unsafe_padding remaining (el + fi l)
-
-process :: BS.ByteString -> Registers
-process = process_with (iv ()) 0
+ | Arm.sha256_arm_available = Arm.hash m
+ | otherwise = cat (_hash 0 (iv ()) m)
--- hmac ----------------------------------------------------------------------
+_hash
+ :: Word64 -- ^ extra prefix length for padding calculations
+ -> Registers -- ^ register state
+ -> BS.ByteString -- ^ input
+ -> Registers
+_hash el rs m@(BI.PS _ _ l) = do
+ let !state = _hash_blocks rs m
+ !fin@(BI.PS _ _ ll) = BU.unsafeDrop (l - l `rem` 64) m
+ !total = el + fi l
+ if ll < 56
+ then
+ let !ult = parse_pad1 fin total
+ in update state ult
+ else
+ let !(# pen, ult #) = parse_pad2 fin total
+ in update (update state pen) ult
+{-# INLINABLE _hash #-}
-data KeyAndLen = KeyAndLen
- {-# UNPACK #-} !BS.ByteString
- {-# UNPACK #-} !Int
+_hash_blocks
+ :: Registers -- ^ state
+ -> BS.ByteString -- ^ input
+ -> Registers
+_hash_blocks rs m@(BI.PS _ _ l) = loop rs 0 where
+ loop !acc !j
+ | j + 64 > l = acc
+ | otherwise =
+ let !nacc = update acc (parse m j)
+ in loop nacc (j + 64)
+{-# INLINABLE _hash_blocks #-}
--- | Produce a message authentication code for a strict bytestring,
--- based on the provided (strict, bytestring) key, via SHA-256.
---
--- The 256-bit MAC is returned as a strict bytestring.
+-- hmac ----------------------------------------------------------------------
+
+-- | Compute a condensed representation of a strict bytestring via
+-- SHA-256.
--
--- Per RFC 2104, the key /should/ be a minimum of 32 bytes long. Keys
--- exceeding 64 bytes in length will first be hashed (via SHA-256).
+-- The 256-bit output digest is returned as a strict bytestring.
--
--- >>> hmac "strict bytestring key" "strict bytestring input"
--- "<strict 256-bit MAC>"
-hmac
- :: BS.ByteString -- ^ key
- -> BS.ByteString -- ^ text
- -> MAC
-hmac mk@(BI.PS _ _ l) text
- | sha256_arm_available =
- let !inner = hash_arm_with ipad 64 text
- in MAC (hash_arm (opad <> inner))
- | otherwise =
- let !ipad_state = block_hash (iv ()) (parse_block ipad 0)
- !inner = cat (process_with ipad_state 64 text)
- in MAC (hash (opad <> inner))
- where
- !step1 = k <> BS.replicate (64 - lk) 0x00
- !ipad = BS.map (B.xor 0x36) step1
- !opad = BS.map (B.xor 0x5C) step1
- !(KeyAndLen k lk)
- | l > 64 = KeyAndLen (hash mk) 32
- | otherwise = KeyAndLen mk l
+-- >>> hash "strict bytestring input"
+-- "<strict 256-bit message digest>"
+hmac :: BS.ByteString -> BS.ByteString -> MAC
+hmac k m
+ | Arm.sha256_arm_available = MAC (Arm.hmac k m)
+ | otherwise = MAC (cat (_hmac (prep_key k) m))
+
+prep_key :: BS.ByteString -> Block
+prep_key k@(BI.PS _ _ l)
+ | l > 64 = parse_key (hash k)
+ | otherwise = parse_key k
+{-# INLINABLE prep_key #-}
+
+_hmac
+ :: Block -- ^ padded key
+ -> BS.ByteString -- ^ message
+ -> Registers
+_hmac k m =
+ let !rs0 = update (iv ()) (xor k (Exts.wordToWord32# 0x36363636##))
+ !block = pad_registers_with_length (_hash 64 rs0 m)
+ !rs1 = update (iv ()) (xor k (Exts.wordToWord32# 0x5C5C5C5C##))
+ in update rs1 block
+{-# INLINABLE _hmac #-}
+
+-- XX these need testing
+
+_hmac_rm
+ :: Registers -- ^ key
+ -> BS.ByteString -- ^ message
+ -> Registers
+_hmac_rm k m
+ | Arm.sha256_arm_available = Arm.hmac_rm k m
+ | otherwise =
+ let !key = pad_registers k
+ in _hmac key m
+{-# INLINABLE _hmac_rm #-}
+
+_hmac_rr
+ :: Registers -- ^ key
+ -> Registers -- ^ message
+ -> Registers
+_hmac_rr k m
+ | Arm.sha256_arm_available = Arm.hmac_rr k m
+ | otherwise =
+ let !key = pad_registers k
+ !block = pad_registers_with_length m
+ in _hmac_bb key block
+{-# INLINABLE _hmac_rr #-}
+
+_hmac_bb
+ :: Block -- ^ key
+ -> Block -- ^ message
+ -> Registers
+_hmac_bb k m =
+ let !rs0 = update (iv ()) (xor k (Exts.wordToWord32# 0x36363636##))
+ !rs1 = update rs0 m
+ !inner = pad_registers_with_length rs1
+ !rs2 = update (iv ()) (xor k (Exts.wordToWord32# 0x5C5C5C5C##))
+ in update rs2 inner
+{-# INLINABLE _hmac_bb #-}
+
+-- | HMAC for message (v || sep || dat), avoiding concatenation allocation.
+_hmac_rsb
+ :: Registers -- ^ key
+ -> Registers -- ^ v (32 bytes)
+ -> Word8 -- ^ separator byte
+ -> BS.ByteString -- ^ data
+ -> Registers
+_hmac_rsb k v sep dat
+ -- XX add Arm.hmac_rsb when available
+ -- | Arm.sha256_arm_available = Arm.hmac_rsb k v sep dat
+ | otherwise =
+ let !key = pad_registers k
+ !rs0 = update (iv ()) (xor key (Exts.wordToWord32# 0x36363636##))
+ !inner = hash_vsb 64 rs0 v sep dat
+ !block = pad_registers_with_length inner
+ !rs1 = update (iv ()) (xor key (Exts.wordToWord32# 0x5C5C5C5C##))
+ in update rs1 block
+{-# INLINABLE _hmac_rsb #-}
+
+-- Hash (v || sep || dat) with initial state and extra prefix length.
+hash_vsb
+ :: Word64 -- ^ extra prefix length
+ -> Registers -- ^ initial state
+ -> Registers -- ^ v
+ -> Word8 -- ^ sep
+ -> BS.ByteString -- ^ dat
+ -> Registers
+hash_vsb el rs0 v sep dat@(BI.PS _ _ l)
+ | l >= 31 =
+ -- first block is complete
+ let !b0 = parse_vsb v sep dat
+ !rs1 = update rs0 b0
+ !rest = BU.unsafeDrop 31 dat
+ !restLen = l - 31
+ !rs2 = _hash_blocks rs1 rest
+ !finLen = restLen `rem` 64
+ !fin = BU.unsafeDrop (restLen - finLen) rest
+ !total = el + 33 + fi l
+ in if finLen < 56
+ then update rs2 (parse_pad1 fin total)
+ else let !(# pen, ult #) = parse_pad2 fin total
+ in update (update rs2 pen) ult
+ | otherwise =
+ -- message < 64 bytes, goes straight to padding
+ let !total = el + 33 + fi l
+ in if 33 + l < 56
+ then update rs0 (parse_pad1_vsb v sep dat total)
+ else let !(# pen, ult #) = parse_pad2_vsb v sep dat total
+ in update (update rs0 pen) ult
+{-# INLINABLE hash_vsb #-}
+
+-- pointer-based IO functions ------------------------------------------------
+
+-- | HMAC(key, message) where both are register-sized.
+-- Writes 32-byte result to destination pointer.
+-- Uses ARM crypto extensions if available, otherwise software fallback.
+hmac_rr_unsafe
+ :: Ptr Word32 -- ^ destination (8 Word32s)
+ -> Ptr Word32 -- ^ scratch block buffer (16 Word32s)
+ -> Registers -- ^ key
+ -> Registers -- ^ message
+ -> IO ()
+hmac_rr_unsafe rp bp k m
+ | Arm.sha256_arm_available = Arm._hmac_rr rp bp k m
+ | otherwise = do
+ let !key = pad_registers k
+ !block = pad_registers_with_length m
+ !rs = _hmac_bb key block
+ poke_registers rp rs
+{-# INLINABLE hmac_rr_unsafe #-}
+
+-- | HMAC(key, v || sep || data).
+-- Writes 32-byte result to destination pointer.
+-- Uses ARM crypto extensions if available, otherwise software fallback.
+hmac_rsb_unsafe
+ :: Ptr Word32 -- ^ destination (8 Word32s)
+ -> Ptr Word32 -- ^ scratch block buffer (16 Word32s)
+ -> Registers -- ^ key
+ -> Registers -- ^ v
+ -> Word8 -- ^ separator byte
+ -> BS.ByteString -- ^ data
+ -> IO ()
+hmac_rsb_unsafe rp bp k v sep dat
+ | Arm.sha256_arm_available = Arm._hmac_rsb rp bp k v sep dat
+ | otherwise = do
+ let !rs = _hmac_rsb k v sep dat
+ poke_registers rp rs
+{-# INLINABLE hmac_rsb_unsafe #-}
diff --git a/lib/Crypto/Hash/SHA256/Arm.hs b/lib/Crypto/Hash/SHA256/Arm.hs
@@ -1,5 +1,8 @@
{-# OPTIONS_HADDOCK hide #-}
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE UnboxedTuples #-}
-- |
-- Module: Crypto.Hash.SHA256.Arm
@@ -9,115 +12,303 @@
--
-- ARM crypto extension support for SHA-256.
-module Crypto.Hash.SHA256.Arm (
- sha256_arm_available
- , hash_arm
- , hash_arm_with
- ) where
+module Crypto.Hash.SHA256.Arm -- (
+ -- sha256_arm_available
+ --, hash
+ --, hash_with
+ --, hmac
+ --) where
+ where
-import Control.Monad (unless, when)
-import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
import Data.Word (Word8, Word32, Word64)
import Foreign.Marshal.Alloc (allocaBytes)
-import Foreign.Ptr (Ptr, plusPtr)
-import Foreign.Storable (poke, peek)
-import Crypto.Hash.SHA256.Internal (unsafe_padding)
-import System.IO.Unsafe (unsafePerformIO)
+import Foreign.Ptr (Ptr)
+import qualified GHC.Exts as Exts
+import qualified GHC.IO (IO(..))
+import qualified GHC.Ptr
+import Crypto.Hash.SHA256.Internal hiding (update)
+import System.IO.Unsafe (unsafeDupablePerformIO)
--- ffi -----------------------------------------------------------------------
+-- ffi ------------------------------------------------------------------------
foreign import ccall unsafe "sha256_block_arm"
- c_sha256_block :: Ptr Word32 -> Ptr Word8 -> IO ()
+ c_sha256_block :: Ptr Word32 -> Ptr Word32 -> IO ()
foreign import ccall unsafe "sha256_arm_available"
c_sha256_arm_available :: IO Int
--- utilities -----------------------------------------------------------------
+-- utilities ------------------------------------------------------------------
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
+
+peek_registers
+ :: Ptr Word32
+ -> Registers
+peek_registers (GHC.Ptr.Ptr addr) = R
+ (Exts.indexWord32OffAddr# addr 0#)
+ (Exts.indexWord32OffAddr# addr 1#)
+ (Exts.indexWord32OffAddr# addr 2#)
+ (Exts.indexWord32OffAddr# addr 3#)
+ (Exts.indexWord32OffAddr# addr 4#)
+ (Exts.indexWord32OffAddr# addr 5#)
+ (Exts.indexWord32OffAddr# addr 6#)
+ (Exts.indexWord32OffAddr# addr 7#)
+{-# INLINE peek_registers #-}
+
+poke_block :: Ptr Word32 -> Block -> IO ()
+poke_block
+ (GHC.Ptr.Ptr addr)
+ (B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15)
+ = GHC.IO.IO $ \s00 ->
+ case Exts.writeWord32OffAddr# addr 00# w00 s00 of { s01 ->
+ case Exts.writeWord32OffAddr# addr 01# w01 s01 of { s02 ->
+ case Exts.writeWord32OffAddr# addr 02# w02 s02 of { s03 ->
+ case Exts.writeWord32OffAddr# addr 03# w03 s03 of { s04 ->
+ case Exts.writeWord32OffAddr# addr 04# w04 s04 of { s05 ->
+ case Exts.writeWord32OffAddr# addr 05# w05 s05 of { s06 ->
+ case Exts.writeWord32OffAddr# addr 06# w06 s06 of { s07 ->
+ case Exts.writeWord32OffAddr# addr 07# w07 s07 of { s08 ->
+ case Exts.writeWord32OffAddr# addr 08# w08 s08 of { s09 ->
+ case Exts.writeWord32OffAddr# addr 09# w09 s09 of { s10 ->
+ case Exts.writeWord32OffAddr# addr 10# w10 s10 of { s11 ->
+ case Exts.writeWord32OffAddr# addr 11# w11 s11 of { s12 ->
+ case Exts.writeWord32OffAddr# addr 12# w12 s12 of { s13 ->
+ case Exts.writeWord32OffAddr# addr 13# w13 s13 of { s14 ->
+ case Exts.writeWord32OffAddr# addr 14# w14 s14 of { s15 ->
+ case Exts.writeWord32OffAddr# addr 15# w15 s15 of { s16 ->
+ (# s16, () #) }}}}}}}}}}}}}}}}
+{-# INLINE poke_block #-}
+
+-- update ---------------------------------------------------------------------
+
+update :: Ptr Word32 -> Ptr Word32 -> Block -> IO ()
+update rp bp block = do
+ poke_block bp block
+ c_sha256_block rp bp
+{-# INLINE update #-}
+
+data BoxedRegisters = BoxedRegisters !Registers
+
+update_pure :: Registers -> Block -> Registers
+update_pure r b =
+ let !(BoxedRegisters rs) = _update r b
+ in rs
+{-# INLINE update_pure #-}
+
+_update :: Registers -> Block -> BoxedRegisters
+_update r b = unsafeDupablePerformIO $
+ allocaBytes 32 $ \rp ->
+ allocaBytes 64 $ \bp -> do
+ poke_registers rp r
+ poke_block bp b
+ c_sha256_block rp bp
+ pure (BoxedRegisters (peek_registers rp))
+{-# INLINE _update #-}
+
-- api -----------------------------------------------------------------------
+-- | Are ARM +sha2 extensions available?
sha256_arm_available :: Bool
-sha256_arm_available = unsafePerformIO c_sha256_arm_available /= 0
+sha256_arm_available = unsafeDupablePerformIO c_sha256_arm_available /= 0
{-# NOINLINE sha256_arm_available #-}
-hash_arm :: BS.ByteString -> BS.ByteString
-hash_arm = hash_arm_with mempty 0
-
--- | Hash with optional 64-byte prefix and extra length for padding.
-hash_arm_with
- :: BS.ByteString -- ^ optional 64-byte prefix (or empty)
- -> Word64 -- ^ extra length to add for padding
- -> BS.ByteString -- ^ message
+hash
+ :: BS.ByteString
-> BS.ByteString
-hash_arm_with prefix el m@(BI.PS fp off l) = unsafePerformIO $
- allocaBytes 32 $ \state -> do
- poke_iv state
- -- process prefix block if provided
- unless (BS.null prefix) $ do
- let BI.PS pfp poff _ = prefix
- BI.unsafeWithForeignPtr pfp $ \src ->
- c_sha256_block state (src `plusPtr` poff)
-
- go state 0
-
- let !remaining@(BI.PS _ _ rlen) = BU.unsafeDrop (l - l `rem` 64) m
- BI.PS padfp padoff _ = unsafe_padding remaining (el + fi l)
- BI.unsafeWithForeignPtr padfp $ \src -> do
- c_sha256_block state (src `plusPtr` padoff)
- when (rlen >= 56) $
- c_sha256_block state (src `plusPtr` (padoff + 64))
-
- read_state state
- where
- go !state !j
- | j + 64 <= l = do
- BI.unsafeWithForeignPtr fp $ \src ->
- c_sha256_block state (src `plusPtr` (off + j))
- go state (j + 64)
- | otherwise = pure ()
-
--- arm helpers ---------------------------------------------------------------
-
-poke_iv :: Ptr Word32 -> IO ()
-poke_iv !state = do
- poke state (0x6a09e667 :: Word32)
- poke (state `plusPtr` 4) (0xbb67ae85 :: Word32)
- poke (state `plusPtr` 8) (0x3c6ef372 :: Word32)
- poke (state `plusPtr` 12) (0xa54ff53a :: Word32)
- poke (state `plusPtr` 16) (0x510e527f :: Word32)
- poke (state `plusPtr` 20) (0x9b05688c :: Word32)
- poke (state `plusPtr` 24) (0x1f83d9ab :: Word32)
- poke (state `plusPtr` 28) (0x5be0cd19 :: Word32)
-
-read_state :: Ptr Word32 -> IO BS.ByteString
-read_state !state = BI.create 32 $ \out -> do
- h0 <- peek state :: IO Word32
- h1 <- peek (state `plusPtr` 4) :: IO Word32
- h2 <- peek (state `plusPtr` 8) :: IO Word32
- h3 <- peek (state `plusPtr` 12) :: IO Word32
- h4 <- peek (state `plusPtr` 16) :: IO Word32
- h5 <- peek (state `plusPtr` 20) :: IO Word32
- h6 <- peek (state `plusPtr` 24) :: IO Word32
- h7 <- peek (state `plusPtr` 28) :: IO Word32
- poke_word32be out 0 h0
- poke_word32be out 4 h1
- poke_word32be out 8 h2
- poke_word32be out 12 h3
- poke_word32be out 16 h4
- poke_word32be out 20 h5
- poke_word32be out 24 h6
- poke_word32be out 28 h7
-
-poke_word32be :: Ptr Word8 -> Int -> Word32 -> IO ()
-poke_word32be !p !off !w = do
- poke (p `plusPtr` off) (fi (w `B.unsafeShiftR` 24) :: Word8)
- poke (p `plusPtr` (off + 1)) (fi (w `B.unsafeShiftR` 16) :: Word8)
- poke (p `plusPtr` (off + 2)) (fi (w `B.unsafeShiftR` 8) :: Word8)
- poke (p `plusPtr` (off + 3)) (fi w :: Word8)
+hash m = unsafeDupablePerformIO $
+ allocaBytes 32 $ \rp ->
+ allocaBytes 64 $ \bp -> do
+ poke_registers rp (iv ())
+ _hash rp bp 0 m
+ let !rs = peek_registers rp
+ pure (cat rs)
+
+_hash
+ :: Ptr Word32 -- ^ register state
+ -> Ptr Word32 -- ^ block state
+ -> Word64 -- ^ extra prefix length, for padding calculation
+ -> BS.ByteString -- ^ input
+ -> IO ()
+_hash rp bp el m@(BI.PS _ _ l) = do
+ hash_blocks rp bp m
+ let !fin@(BI.PS _ _ ll) = BU.unsafeDrop (l - l `rem` 64) m
+ !total = el + fi l
+ if ll < 56
+ then do
+ let !ult = parse_pad1 fin total
+ update rp bp ult
+ else do
+ let !(# pen, ult #) = parse_pad2 fin total
+ update rp bp pen
+ update rp bp ult
+{-# INLINABLE _hash #-}
+
+hash_blocks
+ :: Ptr Word32 -- ^ register state
+ -> Ptr Word32 -- ^ block state
+ -> BS.ByteString -- ^ input
+ -> IO ()
+hash_blocks rp bp m@(BI.PS _ _ l) = loop 0 where
+ loop !j
+ | j + 64 > l = pure ()
+ | otherwise = do
+ let !block = parse m j
+ update rp bp block
+ loop (j + 64)
+{-# INLINE hash_blocks #-}
+
+-- hmac ------------------------------------------------------------------------
+
+hmac :: BS.ByteString -> BS.ByteString -> BS.ByteString
+hmac k m = unsafeDupablePerformIO $
+ allocaBytes 32 $ \rp ->
+ allocaBytes 64 $ \bp -> do
+ _hmac rp bp (prep_key k) m
+ pure (cat (peek_registers rp))
+
+prep_key :: BS.ByteString -> Block
+prep_key k@(BI.PS _ _ l)
+ | l > 64 = parse_key (hash k)
+ | otherwise = parse_key k
+{-# INLINABLE prep_key #-}
+
+-- assume padded key as block.
+_hmac
+ :: Ptr Word32 -- ^ register state
+ -> Ptr Word32 -- ^ block state
+ -> Block -- ^ padded key
+ -> BS.ByteString -- ^ message
+ -> IO ()
+_hmac rp bp k m = do
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord32# 0x36363636##))
+ _hash rp bp 64 m
+ let !block = pad_registers_with_length (peek_registers rp)
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord32# 0x5C5C5C5C##))
+ update rp bp block
+{-# INLINABLE _hmac #-}
+
+_hmac_rr
+ :: Ptr Word32 -- ^ register state
+ -> Ptr Word32 -- ^ block state
+ -> Registers -- ^ key
+ -> Registers -- ^ message
+ -> IO ()
+_hmac_rr rp bp k m = do
+ let !key = pad_registers k
+ !block = pad_registers_with_length m
+ _hmac_bb rp bp key block
+{-# INLINABLE _hmac_rr #-}
+
+_hmac_bb
+ :: Ptr Word32 -- ^ register state
+ -> Ptr Word32 -- ^ block state
+ -> Block -- ^ padded key
+ -> Block -- ^ padded message
+ -> IO ()
+_hmac_bb rp bp k m = do
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord32# 0x36363636##))
+ update rp bp m
+ let !inner = pad_registers_with_length (peek_registers rp)
+ poke_registers rp (iv ())
+ update rp bp (xor k (Exts.wordToWord32# 0x5C5C5C5C##))
+ update rp bp inner
+{-# INLINABLE _hmac_bb #-}
+
+hmac_rm :: Registers -> BS.ByteString -> Registers
+hmac_rm k m =
+ let !(BoxedRegisters rs) = _hmac_rm k m
+ in rs
+{-# INLINABLE hmac_rm #-}
+
+_hmac_rm :: Registers -> BS.ByteString -> BoxedRegisters
+_hmac_rm k m = unsafeDupablePerformIO $
+ allocaBytes 32 $ \rp ->
+ allocaBytes 64 $ \bp -> do
+ let !key = pad_registers k
+ _hmac rp bp key m
+ pure (BoxedRegisters (peek_registers rp))
+{-# INLINABLE _hmac_rm #-}
+
+hmac_rr :: Registers -> Registers -> Registers
+hmac_rr k m =
+ let !(BoxedRegisters rs) = _hmac_rr_pure k m
+ in rs
+{-# INLINABLE hmac_rr #-}
+
+_hmac_rr_pure :: Registers -> Registers -> BoxedRegisters
+_hmac_rr_pure k m = unsafeDupablePerformIO $
+ allocaBytes 32 $ \rp ->
+ allocaBytes 64 $ \bp -> do
+ _hmac_rr rp bp k m
+ pure (BoxedRegisters (peek_registers rp))
+{-# INLINABLE _hmac_rr_pure #-}
+
+-- | HMAC(key, v || sep || data) using ARM crypto extensions.
+-- Writes result to destination pointer.
+_hmac_rsb
+ :: Ptr Word32 -- ^ destination (8 Word32s)
+ -> Ptr Word32 -- ^ scratch block buffer (16 Word32s)
+ -> Registers -- ^ key
+ -> Registers -- ^ v
+ -> Word8 -- ^ separator byte
+ -> BS.ByteString -- ^ data
+ -> IO ()
+_hmac_rsb rp bp k v sep dat = do
+ poke_registers rp (iv ())
+ let !key = pad_registers k
+ update rp bp (xor key (Exts.wordToWord32# 0x36363636##))
+ _hash_vsb rp bp 64 v sep dat
+ let !inner = pad_registers_with_length (peek_registers rp)
+ poke_registers rp (iv ())
+ update rp bp (xor key (Exts.wordToWord32# 0x5C5C5C5C##))
+ update rp bp inner
+{-# INLINABLE _hmac_rsb #-}
+
+-- | Hash (v || sep || dat) with ARM crypto extensions.
+-- Assumes register state already initialized at rp.
+_hash_vsb
+ :: Ptr Word32 -- ^ register state
+ -> Ptr Word32 -- ^ block buffer
+ -> Word64 -- ^ extra prefix length
+ -> Registers -- ^ v
+ -> Word8 -- ^ sep
+ -> BS.ByteString -- ^ dat
+ -> IO ()
+_hash_vsb rp bp el v sep dat@(BI.PS _ _ l)
+ | l >= 31 = do
+ -- first block is complete: v || sep || dat[0:31]
+ let !b0 = parse_vsb v sep dat
+ update rp bp b0
+ -- hash remaining complete blocks from dat[31:]
+ let !rest = BU.unsafeDrop 31 dat
+ !restLen = l - 31
+ hash_blocks rp bp rest
+ -- handle final padding
+ let !finLen = restLen `rem` 64
+ !fin = BU.unsafeDrop (restLen - finLen) rest
+ !total = el + 33 + fi l
+ if finLen < 56
+ then update rp bp (parse_pad1 fin total)
+ else do
+ let !(# pen, ult #) = parse_pad2 fin total
+ update rp bp pen
+ update rp bp ult
+ | otherwise = do
+ -- message < 64 bytes total, straight to padding
+ let !total = el + 33 + fi l
+ if 33 + l < 56
+ then update rp bp (parse_pad1_vsb v sep dat total)
+ else do
+ let !(# pen, ult #) = parse_pad2_vsb v sep dat total
+ update rp bp pen
+ update rp bp ult
+{-# INLINABLE _hash_vsb #-}
diff --git a/lib/Crypto/Hash/SHA256/Internal.hs b/lib/Crypto/Hash/SHA256/Internal.hs
@@ -6,6 +6,7 @@
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedNewtypes #-}
+{-# LANGUAGE ViewPatterns #-}
-- |
-- Module: Crypto.Hash.SHA256.Internal
@@ -16,34 +17,52 @@
-- SHA-256 internals.
module Crypto.Hash.SHA256.Internal (
- Block(..)
- , pattern B
- , Registers(..)
- , pattern R
-
+ -- * Types
+ Block(B, ..)
+ , Registers(R, ..)
, MAC(..)
- , iv
- , block_hash
+ -- * Parsing
+ , parse
+ , parse_pad1
+ , parse_pad2
+
+ -- * Serializing
, cat
+ , cat_into
+ , cat_into32
+
+ -- * Hash function internals
+ , update
+ , iv
+
+ -- * HMAC utilities
+ , pad_registers
+ , pad_registers_with_length
+ , xor
+ , parse_key
- , word32be
- , parse_block
- , unsafe_hash_alg
- , unsafe_padding
+ -- * HMAC-DRBG utilities
+ , parse_vsb
+ , parse_pad1_vsb
+ , parse_pad2_vsb
+
+ -- * Pointer-based IO utilities
+ , poke_registers
) where
import qualified Data.Bits as B
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Unsafe as BU
-import Data.Word (Word8, Word64)
-import Foreign.Marshal.Utils (copyBytes, fillBytes)
-import Foreign.Ptr (Ptr, plusPtr)
-import Foreign.Storable (poke)
+import Data.Word (Word8, Word32, Word64)
+import qualified GHC.IO (IO(..))
+import GHC.Ptr (Ptr(..))
import GHC.Exts (Int#)
import qualified GHC.Exts as Exts
-import qualified GHC.Word (Word8(..))
+import qualified GHC.Word (Word32(..), Word8(..))
+
+-- types ----------------------------------------------------------------------
-- | A message authentication code.
--
@@ -68,8 +87,7 @@ instance Eq MAC where
| la /= lb = False
| otherwise = BS.foldl' (B..|.) 0 (BS.packZipWith B.xor a b) == 0
--- https://datatracker.ietf.org/doc/html/rfc6234
-
+-- | SHA256 block.
newtype Block = Block
(# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
, Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
@@ -85,13 +103,12 @@ pattern B
-> Block
pattern B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15 =
Block
- (# w00, w01, w02, w03
- , w04, w05, w06, w07
- , w08, w09, w10, w11
- , w12, w13, w14, w15
+ (# w00, w01, w02, w03, w04, w05, w06, w07
+ , w08, w09, w10, w11, w12, w13, w14, w15
#)
{-# COMPLETE B #-}
+-- | SHA256 state.
newtype Registers = Registers
(# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
, Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
@@ -101,32 +118,25 @@ pattern R
:: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
-> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
-> Registers
-pattern R w00 w01 w02 w03 w04 w05 w06 w07 =
- Registers
- (# w00, w01, w02, w03
- , w04, w05, w06, w07
- #)
+pattern R w00 w01 w02 w03 w04 w05 w06 w07 = Registers
+ (# w00, w01, w02, w03
+ , w04, w05, w06, w07
+ #)
{-# COMPLETE R #-}
--- given a bytestring and offset, parse word32. length not checked.
-word32be :: BS.ByteString -> Int -> Exts.Word32#
-word32be bs m =
- let !(GHC.Word.W8# ra) = BU.unsafeIndex bs m
- !(GHC.Word.W8# rb) = BU.unsafeIndex bs (m + 1)
- !(GHC.Word.W8# rc) = BU.unsafeIndex bs (m + 2)
- !(GHC.Word.W8# rd) = BU.unsafeIndex bs (m + 3)
- !a = Exts.wordToWord32# (Exts.word8ToWord# ra)
- !b = Exts.wordToWord32# (Exts.word8ToWord# rb)
- !c = Exts.wordToWord32# (Exts.word8ToWord# rc)
- !d = Exts.wordToWord32# (Exts.word8ToWord# rd)
- !sa = Exts.uncheckedShiftLWord32# a 24#
- !sb = Exts.uncheckedShiftLWord32# b 16#
- !sc = Exts.uncheckedShiftLWord32# c 08#
- in sa `Exts.orWord32#` sb `Exts.orWord32#` sc `Exts.orWord32#` d
-{-# INLINE word32be #-}
+-- utilities ------------------------------------------------------------------
+
+fi :: (Integral a, Num b) => a -> b
+fi = fromIntegral
+{-# INLINE fi #-}
-parse_block :: BS.ByteString -> Int -> Block
-parse_block bs m = B
+-- parsing (nonfinal input) ---------------------------------------------------
+
+-- | Given a bytestring and offset, parse a full block.
+--
+-- The length of the input is not checked.
+parse :: BS.ByteString -> Int -> Block
+parse bs m = B
(word32be bs m)
(word32be bs (m + 04))
(word32be bs (m + 08))
@@ -143,88 +153,101 @@ parse_block bs m = B
(word32be bs (m + 52))
(word32be bs (m + 56))
(word32be bs (m + 60))
-{-# INLINE parse_block #-}
-
--- rotate right
-rotr# :: Exts.Word32# -> Int# -> Exts.Word32#
-rotr# x n =
- Exts.uncheckedShiftRLWord32# x n `Exts.orWord32#`
- Exts.uncheckedShiftLWord32# x (32# Exts.-# n)
-{-# INLINE rotr# #-}
+{-# INLINE parse #-}
--- logical right shift
-shr# :: Exts.Word32# -> Int# -> Exts.Word32#
-shr# = Exts.uncheckedShiftRLWord32#
-{-# INLINE shr# #-}
-
--- ch(x, y, z) = (x & y) ^ (~x & z)
-ch# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
-ch# x y z =
- (x `Exts.andWord32#` y) `Exts.xorWord32#`
- (Exts.notWord32# x `Exts.andWord32#` z)
-{-# INLINE ch# #-}
-
--- maj(x, y, z) = (x & (y | z)) | (y & z)
-maj# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
-maj# x y z =
- (x `Exts.andWord32#` (y `Exts.orWord32#` z)) `Exts.orWord32#`
- (y `Exts.andWord32#` z)
-{-# INLINE maj# #-}
-
--- big sigma 0: rotr2 ^ rotr13 ^ rotr22
-bsig0# :: Exts.Word32# -> Exts.Word32#
-bsig0# x =
- rotr# x 2# `Exts.xorWord32#` rotr# x 13# `Exts.xorWord32#` rotr# x 22#
-{-# INLINE bsig0# #-}
-
--- big sigma 1: rotr6 ^ rotr11 ^ rotr25
-bsig1# :: Exts.Word32# -> Exts.Word32#
-bsig1# x =
- rotr# x 6# `Exts.xorWord32#` rotr# x 11# `Exts.xorWord32#` rotr# x 25#
-{-# INLINE bsig1# #-}
-
--- small sigma 0: rotr7 ^ rotr18 ^ shr3
-ssig0# :: Exts.Word32# -> Exts.Word32#
-ssig0# x =
- rotr# x 7# `Exts.xorWord32#` rotr# x 18# `Exts.xorWord32#` shr# x 3#
-{-# INLINE ssig0# #-}
-
--- small sigma 1: rotr17 ^ rotr19 ^ shr10
-ssig1# :: Exts.Word32# -> Exts.Word32#
-ssig1# x =
- rotr# x 17# `Exts.xorWord32#` rotr# x 19# `Exts.xorWord32#` shr# x 10#
-{-# INLINE ssig1# #-}
+-- | Parse the 32-bit word encoded at the given ofset.
+--
+-- The length of the input is not checked.
+word32be :: BS.ByteString -> Int -> Exts.Word32#
+word32be bs m =
+ let !(GHC.Word.W8# ra) = BU.unsafeIndex bs m
+ !(GHC.Word.W8# rb) = BU.unsafeIndex bs (m + 1)
+ !(GHC.Word.W8# rc) = BU.unsafeIndex bs (m + 2)
+ !(GHC.Word.W8# rd) = BU.unsafeIndex bs (m + 3)
+ !a = Exts.wordToWord32# (Exts.word8ToWord# ra)
+ !b = Exts.wordToWord32# (Exts.word8ToWord# rb)
+ !c = Exts.wordToWord32# (Exts.word8ToWord# rc)
+ !d = Exts.wordToWord32# (Exts.word8ToWord# rd)
+ !sa = Exts.uncheckedShiftLWord32# a 24#
+ !sb = Exts.uncheckedShiftLWord32# b 16#
+ !sc = Exts.uncheckedShiftLWord32# c 08#
+ in sa `Exts.orWord32#` sb `Exts.orWord32#` sc `Exts.orWord32#` d
+{-# INLINE word32be #-}
--- round step
-step#
- :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
- -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
- -> Exts.Word32# -> Exts.Word32#
- -> Registers
-step# a b c d e f g h k w =
- let !t1 = h
- `Exts.plusWord32#` bsig1# e
- `Exts.plusWord32#` ch# e f g
- `Exts.plusWord32#` k
- `Exts.plusWord32#` w
- !t2 = bsig0# a `Exts.plusWord32#` maj# a b c
- in R (t1 `Exts.plusWord32#` t2) a b c (d `Exts.plusWord32#` t1) e f g
-{-# INLINE step# #-}
+-- parsing (final input) ------------------------------------------------------
--- first 32 bits of the fractional parts of the square roots of the
--- first eight primes
-iv :: () -> Registers
-iv _ = R (Exts.wordToWord32# 0x6a09e667##)
- (Exts.wordToWord32# 0xbb67ae85##)
- (Exts.wordToWord32# 0x3c6ef372##)
- (Exts.wordToWord32# 0xa54ff53a##)
- (Exts.wordToWord32# 0x510e527f##)
- (Exts.wordToWord32# 0x9b05688c##)
- (Exts.wordToWord32# 0x1f83d9ab##)
- (Exts.wordToWord32# 0x5be0cd19##)
-
-block_hash :: Registers -> Block -> Registers
-block_hash
+-- | Parse the final chunk of an input message, assuming it is less than
+-- 56 bytes in length (unchecked!).
+--
+-- Returns one block consisting of the chunk and padding.
+parse_pad1
+ :: BS.ByteString -- ^ final input chunk (< 56 bytes)
+ -> Word64 -- ^ length of all input
+ -> Block -- ^ resulting block
+parse_pad1 bs l =
+ let !bits = l * 8
+ !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32)
+ !(GHC.Word.W32# llo) = fi bits
+ in B (w32_at bs 00) (w32_at bs 04) (w32_at bs 08) (w32_at bs 12)
+ (w32_at bs 16) (w32_at bs 20) (w32_at bs 24) (w32_at bs 28)
+ (w32_at bs 32) (w32_at bs 36) (w32_at bs 40) (w32_at bs 44)
+ (w32_at bs 48) (w32_at bs 52) lhi llo
+{-# INLINABLE parse_pad1 #-}
+
+-- | Parse the final chunk of an input message, assuming it is at least 56
+-- bytes in length (unchecked!).
+--
+-- Returns two blocks consisting of the chunk and padding.
+parse_pad2
+ :: BS.ByteString -- ^ final input chunk (>= 56 bytes)
+ -> Word64 -- ^ length of all input
+ -> (# Block, Block #) -- ^ resulting blocks
+parse_pad2 bs l =
+ let !bits = l * 8
+ !z = Exts.wordToWord32# 0##
+ !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32)
+ !(GHC.Word.W32# llo) = fi bits
+ !block0 = B
+ (w32_at bs 00) (w32_at bs 04) (w32_at bs 08) (w32_at bs 12)
+ (w32_at bs 16) (w32_at bs 20) (w32_at bs 24) (w32_at bs 28)
+ (w32_at bs 32) (w32_at bs 36) (w32_at bs 40) (w32_at bs 44)
+ (w32_at bs 48) (w32_at bs 52) (w32_at bs 56) (w32_at bs 60)
+ !block1 = B z z z z z z z z z z z z z z lhi llo
+ in (# block0, block1 #)
+{-# INLINABLE parse_pad2 #-}
+
+-- | Return the byte at offset 'i', or a padding separator or zero byte
+-- beyond the input bounds, as an unboxed 32-bit word.
+w8_as_w32_at
+ :: BS.ByteString -- ^ input chunk
+ -> Int -- ^ offset
+ -> Exts.Word32#
+w8_as_w32_at bs@(BI.PS _ _ l) i = Exts.wordToWord32# $ case compare i l of
+ LT -> let !(GHC.Word.W8# w) = BU.unsafeIndex bs i
+ in Exts.word8ToWord# w
+ EQ -> 0x80##
+ _ -> 0x00##
+{-# INLINE w8_as_w32_at #-}
+
+-- | Return the 32-bit word encoded by four consecutive bytes at the
+-- provided offset.
+w32_at
+ :: BS.ByteString
+ -> Int
+ -> Exts.Word32#
+w32_at bs i =
+ let !wa = w8_as_w32_at bs i `Exts.uncheckedShiftLWord32#` 24#
+ !wb = w8_as_w32_at bs (i + 1) `Exts.uncheckedShiftLWord32#` 16#
+ !wc = w8_as_w32_at bs (i + 2) `Exts.uncheckedShiftLWord32#` 08#
+ !wd = w8_as_w32_at bs (i + 3)
+ in wa `Exts.orWord32#` wb `Exts.orWord32#` wc `Exts.orWord32#` wd
+{-# INLINE w32_at #-}
+
+-- update ---------------------------------------------------------------------
+
+-- | Update register state, given new input block.
+update :: Registers -> Block -> Registers
+update
(R h0 h1 h2 h3 h4 h5 h6 h7)
(B b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15)
=
@@ -282,7 +305,7 @@ block_hash
!w62 = ssig1# w60 `p` w55 `p` ssig0# w47 `p` w46
!w63 = ssig1# w61 `p` w56 `p` ssig0# w48 `p` w47
- -- rounds (cube roots of first 64 primes)
+ -- rounds (constants are cube roots of first 64 primes)
!(R s00a s00b s00c s00d s00e s00f s00g s00h) =
step# h0 h1 h2 h3 h4 h5 h6 h7 (k 0x428a2f98##) w00
!(R s01a s01b s01c s01d s01e s01f s01g s01h) =
@@ -420,65 +443,311 @@ block_hash
k = Exts.wordToWord32#
{-# INLINE k #-}
--- RFC 6234 6.2 block pipeline
---
--- invariant:
--- the input bytestring is exactly 512 bits in length
-unsafe_hash_alg :: Registers -> BS.ByteString -> Registers
-unsafe_hash_alg rs bs = block_hash rs (parse_block bs 0)
+-- rotate right
+rotr# :: Exts.Word32# -> Int# -> Exts.Word32#
+rotr# x n =
+ Exts.uncheckedShiftRLWord32# x n `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# x (32# Exts.-# n)
+{-# INLINE rotr# #-}
--- register concatenation
+-- logical right shift
+shr# :: Exts.Word32# -> Int# -> Exts.Word32#
+shr# = Exts.uncheckedShiftRLWord32#
+{-# INLINE shr# #-}
+
+-- ch(x, y, z) = (x & y) ^ (~x & z)
+ch# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ch# x y z =
+ (x `Exts.andWord32#` y) `Exts.xorWord32#`
+ (Exts.notWord32# x `Exts.andWord32#` z)
+{-# INLINE ch# #-}
+
+-- maj(x, y, z) = (x & (y | z)) | (y & z)
+maj# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+maj# x y z =
+ (x `Exts.andWord32#` (y `Exts.orWord32#` z)) `Exts.orWord32#`
+ (y `Exts.andWord32#` z)
+{-# INLINE maj# #-}
+
+-- big sigma 0: rotr2 ^ rotr13 ^ rotr22
+bsig0# :: Exts.Word32# -> Exts.Word32#
+bsig0# x =
+ rotr# x 2# `Exts.xorWord32#` rotr# x 13# `Exts.xorWord32#` rotr# x 22#
+{-# INLINE bsig0# #-}
+
+-- big sigma 1: rotr6 ^ rotr11 ^ rotr25
+bsig1# :: Exts.Word32# -> Exts.Word32#
+bsig1# x =
+ rotr# x 6# `Exts.xorWord32#` rotr# x 11# `Exts.xorWord32#` rotr# x 25#
+{-# INLINE bsig1# #-}
+
+-- small sigma 0: rotr7 ^ rotr18 ^ shr3
+ssig0# :: Exts.Word32# -> Exts.Word32#
+ssig0# x =
+ rotr# x 7# `Exts.xorWord32#` rotr# x 18# `Exts.xorWord32#` shr# x 3#
+{-# INLINE ssig0# #-}
+
+-- small sigma 1: rotr17 ^ rotr19 ^ shr10
+ssig1# :: Exts.Word32# -> Exts.Word32#
+ssig1# x =
+ rotr# x 17# `Exts.xorWord32#` rotr# x 19# `Exts.xorWord32#` shr# x 10#
+{-# INLINE ssig1# #-}
+
+-- round step
+step#
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32#
+ -> Registers
+step# a b c d e f g h k w =
+ let !t1 = h
+ `Exts.plusWord32#` bsig1# e
+ `Exts.plusWord32#` ch# e f g
+ `Exts.plusWord32#` k
+ `Exts.plusWord32#` w
+ !t2 = bsig0# a `Exts.plusWord32#` maj# a b c
+ in R (t1 `Exts.plusWord32#` t2) a b c (d `Exts.plusWord32#` t1) e f g
+{-# INLINE step# #-}
+
+-- initial register state; first 32 bits of the fractional parts of the
+-- square roots of the first eight primes
+iv :: () -> Registers
+iv _ = R
+ (Exts.wordToWord32# 0x6a09e667##)
+ (Exts.wordToWord32# 0xbb67ae85##)
+ (Exts.wordToWord32# 0x3c6ef372##)
+ (Exts.wordToWord32# 0xa54ff53a##)
+ (Exts.wordToWord32# 0x510e527f##)
+ (Exts.wordToWord32# 0x9b05688c##)
+ (Exts.wordToWord32# 0x1f83d9ab##)
+ (Exts.wordToWord32# 0x5be0cd19##)
+
+-- serializing ----------------------------------------------------------------
+
+-- | Concat SHA256 state into a ByteString.
cat :: Registers -> BS.ByteString
-cat (R h0 h1 h2 h3 h4 h5 h6 h7) = BI.unsafeCreate 32 $ \ptr -> do
- poke32be ptr 0 h0
- poke32be ptr 4 h1
- poke32be ptr 8 h2
- poke32be ptr 12 h3
- poke32be ptr 16 h4
- poke32be ptr 20 h5
- poke32be ptr 24 h6
- poke32be ptr 28 h7
- where
- poke32be :: Ptr Word8 -> Int -> Exts.Word32# -> IO ()
- poke32be p off w = do
- poke (p `plusPtr` off) (byte w 24#)
- poke (p `plusPtr` (off + 1)) (byte w 16#)
- poke (p `plusPtr` (off + 2)) (byte w 8#)
- poke (p `plusPtr` (off + 3)) (byte w 0#)
-
- byte :: Exts.Word32# -> Int# -> Word8
- byte w n = GHC.Word.W8# (Exts.wordToWord8#
- (Exts.word32ToWord# (Exts.uncheckedShiftRLWord32# w n)))
-
--- keystroke saver
-fi :: (Integral a, Num b) => a -> b
-fi = fromIntegral
-{-# INLINE fi #-}
+cat rs = BI.unsafeCreate 32 (cat_into rs)
+{-# INLINABLE cat #-}
+
+-- | Serialize SHA256 state to a pointer of word32's (big-endian).
+cat_into32 :: Registers -> Ptr Word32 -> IO ()
+cat_into32 (R h0 h1 h2 h3 h4 h5 h6 h7) (Ptr addr) = GHC.IO.IO $ \s0 ->
+ case Exts.writeWord32OffAddr# addr 0# h0 s0 of { s1 ->
+ case Exts.writeWord32OffAddr# addr 1# h1 s1 of { s2 ->
+ case Exts.writeWord32OffAddr# addr 2# h2 s2 of { s3 ->
+ case Exts.writeWord32OffAddr# addr 3# h3 s3 of { s4 ->
+ case Exts.writeWord32OffAddr# addr 4# h4 s4 of { s5 ->
+ case Exts.writeWord32OffAddr# addr 5# h5 s5 of { s6 ->
+ case Exts.writeWord32OffAddr# addr 6# h6 s6 of { s7 ->
+ case Exts.writeWord32OffAddr# addr 7# h7 s7 of { s8 ->
+ (# s8, () #)
+ }}}}}}}}
+{-# INLINE cat_into32 #-}
+
+-- | Serialize SHA256 state to a pointer (big-endian).
+cat_into :: Registers -> Ptr Word8 -> IO ()
+cat_into (R h0 h1 h2 h3 h4 h5 h6 h7) (Ptr addr) = GHC.IO.IO $ \s0 ->
+ case poke32be addr 00# h0 s0 of { s1 ->
+ case poke32be addr 04# h1 s1 of { s2 ->
+ case poke32be addr 08# h2 s2 of { s3 ->
+ case poke32be addr 12# h3 s3 of { s4 ->
+ case poke32be addr 16# h4 s4 of { s5 ->
+ case poke32be addr 20# h5 s5 of { s6 ->
+ case poke32be addr 24# h6 s6 of { s7 ->
+ case poke32be addr 28# h7 s7 of { s8 ->
+ (# s8, () #)
+ }}}}}}}}
+{-# INLINE cat_into #-}
+
+poke32be
+ :: Exts.Addr#
+ -> Int#
+ -> Exts.Word32#
+ -> Exts.State# Exts.RealWorld
+ -> Exts.State# Exts.RealWorld
+poke32be a off w s0 =
+ case Exts.writeWord8OffAddr# a off (byte# w 24#) s0 of { s1 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 1#) (byte# w 16#) s1 of { s2 ->
+ case Exts.writeWord8OffAddr# a (off Exts.+# 2#) (byte# w 8#) s2 of { s3 ->
+ Exts.writeWord8OffAddr# a (off Exts.+# 3#) (byte# w 0#) s3
+ }}}
+{-# INLINE poke32be #-}
+
+byte# :: Exts.Word32# -> Int# -> Exts.Word8#
+byte# w n = Exts.wordToWord8#
+ (Exts.word32ToWord# (Exts.uncheckedShiftRLWord32# w n))
+{-# INLINE byte# #-}
+
+-- | Write register state to a pointer (native endian Word32s).
+poke_registers :: Ptr Word32 -> Registers -> IO ()
+poke_registers (Ptr addr) (R w0 w1 w2 w3 w4 w5 w6 w7) = GHC.IO.IO $ \s0 ->
+ case Exts.writeWord32OffAddr# addr 0# w0 s0 of { s1 ->
+ case Exts.writeWord32OffAddr# addr 1# w1 s1 of { s2 ->
+ case Exts.writeWord32OffAddr# addr 2# w2 s2 of { s3 ->
+ case Exts.writeWord32OffAddr# addr 3# w3 s3 of { s4 ->
+ case Exts.writeWord32OffAddr# addr 4# w4 s4 of { s5 ->
+ case Exts.writeWord32OffAddr# addr 5# w5 s5 of { s6 ->
+ case Exts.writeWord32OffAddr# addr 6# w6 s6 of { s7 ->
+ case Exts.writeWord32OffAddr# addr 7# w7 s7 of { s8 ->
+ (# s8, () #) }}}}}}}}
+{-# INLINE poke_registers #-}
+
+-- hmac utilities -------------------------------------------------------------
+
+-- pad registers to block
+pad_registers :: Registers -> Block
+pad_registers (R w0 w1 w2 w3 w4 w5 w6 w7) = B
+ w0 w1 w2 w3 w4 w5 w6 w7
+ (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##)
+ (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##)
+ (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##)
+{-# INLINE pad_registers #-}
+
+-- pad registers to block, using padding separator and augmented length
+-- (assumes existence of a leading block)
+pad_registers_with_length :: Registers -> Block
+pad_registers_with_length (R h0 h1 h2 h3 h4 h5 h6 h7) = B
+ h0 h1 h2 h3 h4 h5 h6 h7 -- inner hash
+ (Exts.wordToWord32# 0x80000000##) -- padding separator
+ (Exts.wordToWord32# 0x00000000##)
+ (Exts.wordToWord32# 0x00000000##)
+ (Exts.wordToWord32# 0x00000000##)
+ (Exts.wordToWord32# 0x00000000##)
+ (Exts.wordToWord32# 0x00000000##)
+ (Exts.wordToWord32# 0x00000000##) -- high 32 bits of length
+ (Exts.wordToWord32# 0x00000300##) -- low 32 bits of length
+{-# INLINABLE pad_registers_with_length #-}
+
+xor :: Block -> Exts.Word32# -> Block
+xor (B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15) b = B
+ (Exts.xorWord32# w00 b)
+ (Exts.xorWord32# w01 b)
+ (Exts.xorWord32# w02 b)
+ (Exts.xorWord32# w03 b)
+ (Exts.xorWord32# w04 b)
+ (Exts.xorWord32# w05 b)
+ (Exts.xorWord32# w06 b)
+ (Exts.xorWord32# w07 b)
+ (Exts.xorWord32# w08 b)
+ (Exts.xorWord32# w09 b)
+ (Exts.xorWord32# w10 b)
+ (Exts.xorWord32# w11 b)
+ (Exts.xorWord32# w12 b)
+ (Exts.xorWord32# w13 b)
+ (Exts.xorWord32# w14 b)
+ (Exts.xorWord32# w15 b)
+{-# INLINE xor #-}
+
+parse_key :: BS.ByteString -> Block
+parse_key bs = B
+ (w32_zero bs 0) (w32_zero bs 4) (w32_zero bs 8) (w32_zero bs 12)
+ (w32_zero bs 16) (w32_zero bs 20) (w32_zero bs 24) (w32_zero bs 28)
+ (w32_zero bs 32) (w32_zero bs 36) (w32_zero bs 40) (w32_zero bs 44)
+ (w32_zero bs 48) (w32_zero bs 52) (w32_zero bs 56) (w32_zero bs 60)
+{-# INLINE parse_key #-}
+
+-- read big-endian Word32#, zero-padding beyond input length
+w32_zero :: BS.ByteString -> Int -> Exts.Word32#
+w32_zero bs i =
+ let !wa = w8_zero bs i `Exts.uncheckedShiftLWord32#` 24#
+ !wb = w8_zero bs (i + 1) `Exts.uncheckedShiftLWord32#` 16#
+ !wc = w8_zero bs (i + 2) `Exts.uncheckedShiftLWord32#` 08#
+ !wd = w8_zero bs (i + 3)
+ in wa `Exts.orWord32#` wb `Exts.orWord32#` wc `Exts.orWord32#` wd
+{-# INLINE w32_zero #-}
+
+-- read byte as Word32#, returning zero beyond input length
+w8_zero :: BS.ByteString -> Int -> Exts.Word32#
+w8_zero bs@(BI.PS _ _ l) i
+ | i < l = let !(GHC.Word.W8# w) = BU.unsafeIndex bs i
+ in Exts.wordToWord32# (Exts.word8ToWord# w)
+ | otherwise = Exts.wordToWord32# 0##
+{-# INLINE w8_zero #-}
+
+-- hmac-drbg utilities --------------------------------------------------------
+
+-- | Parse first complete block from v || sep || dat[0:31].
+--
+-- Requires len(dat) >= 31.
+parse_vsb :: Registers -> Word8 -> BS.ByteString -> Block
+parse_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) (GHC.Word.W8# sep) dat =
+ let !(GHC.Word.W8# b0) = BU.unsafeIndex dat 0
+ !(GHC.Word.W8# b1) = BU.unsafeIndex dat 1
+ !(GHC.Word.W8# b2) = BU.unsafeIndex dat 2
+ !w08 =
+ Exts.uncheckedShiftLWord32# (w8_w32 sep) 24#
+ `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# (w8_w32 b0) 16#
+ `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# (w8_w32 b1) 8#
+ `Exts.orWord32#`
+ w8_w32 b2
+ in B v0 v1 v2 v3 v4 v5 v6 v7
+ w08
+ (word32be dat 3) (word32be dat 7) (word32be dat 11)
+ (word32be dat 15) (word32be dat 19) (word32be dat 23) (word32be dat 27)
+{-# INLINE parse_vsb #-}
+
+-- | Parse single padding block from v || sep || dat.
+--
+-- Requires (33 + len(dat)) < 56.
+parse_pad1_vsb :: Registers -> Word8 -> BS.ByteString -> Word64 -> Block
+parse_pad1_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) sep dat total =
+ let !bits = total * 8
+ !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32)
+ !(GHC.Word.W32# llo) = fi bits
+ in B v0 v1 v2 v3 v4 v5 v6 v7
+ (w32_sdp sep dat 32) (w32_sdp sep dat 36)
+ (w32_sdp sep dat 40) (w32_sdp sep dat 44)
+ (w32_sdp sep dat 48) (w32_sdp sep dat 52)
+ lhi llo
+{-# INLINABLE parse_pad1_vsb #-}
+
+-- | Parse two padding blocks from v || sep || dat.
+--
+-- Requires 56 <= (33 + len(dat)) < 64.
+parse_pad2_vsb
+ :: Registers -> Word8 -> BS.ByteString -> Word64 -> (# Block, Block #)
+parse_pad2_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) sep dat total =
+ let !bits = total * 8
+ !z = Exts.wordToWord32# 0##
+ !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32)
+ !(GHC.Word.W32# llo) = fi bits
+ !b0 = B v0 v1 v2 v3 v4 v5 v6 v7
+ (w32_sdp sep dat 32) (w32_sdp sep dat 36)
+ (w32_sdp sep dat 40) (w32_sdp sep dat 44)
+ (w32_sdp sep dat 48) (w32_sdp sep dat 52)
+ (w32_sdp sep dat 56) (w32_sdp sep dat 60)
+ !b1 = B z z z z z z z z z z z z z z lhi llo
+ in (# b0, b1 #)
+{-# INLINABLE parse_pad2_vsb #-}
+
+-- Read Word32 at offset i (>= 32) from (sep || dat || 0x80 || zeros).
+w32_sdp :: Word8 -> BS.ByteString -> Int -> Exts.Word32#
+w32_sdp sep dat i =
+ let !(GHC.Word.W8# a) = byte_sdp sep dat i
+ !(GHC.Word.W8# b) = byte_sdp sep dat (i + 1)
+ !(GHC.Word.W8# c) = byte_sdp sep dat (i + 2)
+ !(GHC.Word.W8# d) = byte_sdp sep dat (i + 3)
+ in Exts.uncheckedShiftLWord32# (w8_w32 a) 24#
+ `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# (w8_w32 b) 16#
+ `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# (w8_w32 c) 8#
+ `Exts.orWord32#`
+ w8_w32 d
+{-# INLINE w32_sdp #-}
+
+-- Read byte at offset i (>= 32) from (sep || dat || 0x80 || zeros).
+byte_sdp :: Word8 -> BS.ByteString -> Int -> Word8
+byte_sdp sep dat@(BI.PS _ _ l) i
+ | i == 32 = sep
+ | i < 33 + l = BU.unsafeIndex dat (i - 33)
+ | i == 33 + l = 0x80
+ | otherwise = 0x00
+{-# INLINE byte_sdp #-}
+
+w8_w32 :: Exts.Word8# -> Exts.Word32#
+w8_w32 w = Exts.wordToWord32# (Exts.word8ToWord# w)
+{-# INLINE w8_w32 #-}
--- RFC 6234 4.1 message padding
-unsafe_padding :: BS.ByteString -> Word64 -> BS.ByteString
-unsafe_padding (BI.PS fp off r) len
- | r < 56 = BI.unsafeCreate 64 $ \p -> do
- BI.unsafeWithForeignPtr fp $ \src ->
- copyBytes p (src `plusPtr` off) r
- poke (p `plusPtr` r) (0x80 :: Word8)
- fillBytes (p `plusPtr` (r + 1)) 0 (55 - r)
- poke_word64be (p `plusPtr` 56) (len * 8)
- | otherwise = BI.unsafeCreate 128 $ \p -> do
- BI.unsafeWithForeignPtr fp $ \src ->
- copyBytes p (src `plusPtr` off) r
- poke (p `plusPtr` r) (0x80 :: Word8)
- fillBytes (p `plusPtr` (r + 1)) 0 (63 - r)
- fillBytes (p `plusPtr` 64) 0 56
- poke_word64be (p `plusPtr` 120) (len * 8)
- where
- poke_word64be :: Ptr Word8 -> Word64 -> IO ()
- poke_word64be p w = do
- poke p (fi (w `B.unsafeShiftR` 56) :: Word8)
- poke (p `plusPtr` 1) (fi (w `B.unsafeShiftR` 48) :: Word8)
- poke (p `plusPtr` 2) (fi (w `B.unsafeShiftR` 40) :: Word8)
- poke (p `plusPtr` 3) (fi (w `B.unsafeShiftR` 32) :: Word8)
- poke (p `plusPtr` 4) (fi (w `B.unsafeShiftR` 24) :: Word8)
- poke (p `plusPtr` 5) (fi (w `B.unsafeShiftR` 16) :: Word8)
- poke (p `plusPtr` 6) (fi (w `B.unsafeShiftR` 8) :: Word8)
- poke (p `plusPtr` 7) (fi w :: Word8)
diff --git a/lib/Crypto/Hash/SHA256/Lazy.hs b/lib/Crypto/Hash/SHA256/Lazy.hs
@@ -32,9 +32,6 @@ import Data.Word (Word64)
import Foreign.ForeignPtr (plusForeignPtr)
import Crypto.Hash.SHA256.Internal
--- preliminary utils
-
--- keystroke saver
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
@@ -114,15 +111,11 @@ hash_lazy bl = cat (go (iv ()) (pad_lazy bl)) where
go !acc bs
| BL.null bs = acc
| otherwise = case splitAt64 bs of
- SLPair c r -> go (unsafe_hash_alg acc c) r
+ SLPair c r -> go (update acc (parse c 0)) r
-- HMAC -----------------------------------------------------------------------
-- https://datatracker.ietf.org/doc/html/rfc2104#section-2
-data KeyAndLen = KeyAndLen
- {-# UNPACK #-} !BS.ByteString
- {-# UNPACK #-} !Int
-
-- | Produce a message authentication code for a lazy bytestring, based
-- on the provided (strict, bytestring) key, via SHA-256.
--
@@ -151,7 +144,7 @@ hmac_lazy mk@(BI.PS _ _ l) text =
go !acc b
| BS.null b = acc
| otherwise = case unsafe_splitAt 64 b of
- SSPair c r -> go (unsafe_hash_alg acc c) r
+ SSPair c r -> go (update acc (parse c 0)) r
pad m@(BI.PS _ _ (fi -> len))
| len < 128 = to_strict_small padded
@@ -176,6 +169,4 @@ hmac_lazy mk@(BI.PS _ _ l) text =
| j == 0 = acc
| otherwise = loop8 (pred j) (acc <> BSB.word8 0x00)
- !(KeyAndLen k lk)
- | l > 64 = KeyAndLen (hash mk) 32
- | otherwise = KeyAndLen mk l
+ !(k, lk) = if l > 64 then (hash mk, 32) else (mk, l)