sha256

Pure Haskell SHA-256, HMAC-SHA256 (docs.ppad.tech/sha256).
git clone git://git.ppad.tech/sha256.git
Log | Files | Refs | README | LICENSE

commit 1320bcbd71f27616015c8d3c7cd860460731ee45
parent 3710c8d144c461b5842c9f207b33d506b8a05083
Author: Jared Tobin <jared@jtobin.io>
Date:   Mon, 12 Jan 2026 13:41:25 +0400

lib: large-scaling refactoring

Diffstat:
Mcbits/sha256_arm.c | 16++++++++--------
Mlib/Crypto/Hash/SHA256.hs | 256+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mlib/Crypto/Hash/SHA256/Arm.hs | 367++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------
Mlib/Crypto/Hash/SHA256/Internal.hs | 637++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mlib/Crypto/Hash/SHA256/Lazy.hs | 15+++------------
5 files changed, 941 insertions(+), 350 deletions(-)

diff --git a/cbits/sha256_arm.c b/cbits/sha256_arm.c @@ -28,11 +28,11 @@ static const uint32_t K[64] = { * Process one 64-byte block using ARM SHA256 crypto instructions. * * state: pointer to 8 uint32_t words (a,b,c,d,e,f,g,h) - * block: pointer to 64 bytes of message data + * block: pointer to 16 uint32_t words (already native endian) * * The state is updated in place. */ -void sha256_block_arm(uint32_t *state, const uint8_t *block) { +void sha256_block_arm(uint32_t *state, const uint32_t *block) { /* Load current hash state */ uint32x4_t abcd = vld1q_u32(&state[0]); uint32x4_t efgh = vld1q_u32(&state[4]); @@ -41,11 +41,11 @@ void sha256_block_arm(uint32_t *state, const uint8_t *block) { uint32x4_t abcd_orig = abcd; uint32x4_t efgh_orig = efgh; - /* Load message and convert from big-endian */ - uint32x4_t m0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[0]))); - uint32x4_t m1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[16]))); - uint32x4_t m2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[32]))); - uint32x4_t m3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[48]))); + /* Load message (already native endian) */ + uint32x4_t m0 = vld1q_u32(&block[0]); + uint32x4_t m1 = vld1q_u32(&block[4]); + uint32x4_t m2 = vld1q_u32(&block[8]); + uint32x4_t m3 = vld1q_u32(&block[12]); uint32x4_t tmp, tmp2; @@ -174,7 +174,7 @@ int sha256_arm_available(void) { #else /* Stub implementations when ARM SHA2 is not available */ -void sha256_block_arm(uint32_t *state, const uint8_t *block) { +void sha256_block_arm(uint32_t *state, const uint32_t *block) { (void)state; (void)block; /* Should never be called - use pure Haskell fallback */ diff --git a/lib/Crypto/Hash/SHA256.hs b/lib/Crypto/Hash/SHA256.hs @@ -29,25 +29,33 @@ module Crypto.Hash.SHA256 ( , MAC(..) , hmac , Lazy.hmac_lazy + + -- low-level specialized primitives + , _hmac_rr + , _hmac_rm + , _hmac_rsb + + -- pointer-based IO functions (for HMAC-DRBG) + , hmac_rr_unsafe + , hmac_rsb_unsafe ) where -import qualified Data.Bits as B import qualified Data.ByteString as BS import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Unsafe as BU -import Data.Word (Word64) -import Crypto.Hash.SHA256.Arm +import Data.Word (Word8, Word32, Word64) +import Foreign.Ptr (Ptr) +import qualified GHC.Exts as Exts +import qualified Crypto.Hash.SHA256.Arm as Arm import Crypto.Hash.SHA256.Internal import qualified Crypto.Hash.SHA256.Lazy as Lazy --- utils --------------------------------------------------------------------- +-- utilities ------------------------------------------------------------------ fi :: (Integral a, Num b) => a -> b fi = fromIntegral {-# INLINE fi #-} --- hash ---------------------------------------------------------------------- - -- | Compute a condensed representation of a strict bytestring via -- SHA-256. -- @@ -57,60 +65,192 @@ fi = fromIntegral -- "<strict 256-bit message digest>" hash :: BS.ByteString -> BS.ByteString hash m - | sha256_arm_available = hash_arm m - | otherwise = cat (process m) - --- process a message, given the specified iv -process_with :: Registers -> Word64 -> BS.ByteString -> Registers -process_with acc0 el m@(BI.PS _ _ l) = finalize (go acc0 0) where - go !acc !j - | j + 64 <= l = go (block_hash acc (parse_block m j)) (j + 64) - | otherwise = acc - - finalize !acc - | len < 56 = block_hash acc (parse_block padded 0) - | otherwise = block_hash - (block_hash acc (parse_block padded 0)) - (parse_block padded 64) - where - !remaining@(BI.PS _ _ len) = BU.unsafeDrop (l - l `rem` 64) m - !padded = unsafe_padding remaining (el + fi l) - -process :: BS.ByteString -> Registers -process = process_with (iv ()) 0 + | Arm.sha256_arm_available = Arm.hash m + | otherwise = cat (_hash 0 (iv ()) m) --- hmac ---------------------------------------------------------------------- +_hash + :: Word64 -- ^ extra prefix length for padding calculations + -> Registers -- ^ register state + -> BS.ByteString -- ^ input + -> Registers +_hash el rs m@(BI.PS _ _ l) = do + let !state = _hash_blocks rs m + !fin@(BI.PS _ _ ll) = BU.unsafeDrop (l - l `rem` 64) m + !total = el + fi l + if ll < 56 + then + let !ult = parse_pad1 fin total + in update state ult + else + let !(# pen, ult #) = parse_pad2 fin total + in update (update state pen) ult +{-# INLINABLE _hash #-} -data KeyAndLen = KeyAndLen - {-# UNPACK #-} !BS.ByteString - {-# UNPACK #-} !Int +_hash_blocks + :: Registers -- ^ state + -> BS.ByteString -- ^ input + -> Registers +_hash_blocks rs m@(BI.PS _ _ l) = loop rs 0 where + loop !acc !j + | j + 64 > l = acc + | otherwise = + let !nacc = update acc (parse m j) + in loop nacc (j + 64) +{-# INLINABLE _hash_blocks #-} --- | Produce a message authentication code for a strict bytestring, --- based on the provided (strict, bytestring) key, via SHA-256. --- --- The 256-bit MAC is returned as a strict bytestring. +-- hmac ---------------------------------------------------------------------- + +-- | Compute a condensed representation of a strict bytestring via +-- SHA-256. -- --- Per RFC 2104, the key /should/ be a minimum of 32 bytes long. Keys --- exceeding 64 bytes in length will first be hashed (via SHA-256). +-- The 256-bit output digest is returned as a strict bytestring. -- --- >>> hmac "strict bytestring key" "strict bytestring input" --- "<strict 256-bit MAC>" -hmac - :: BS.ByteString -- ^ key - -> BS.ByteString -- ^ text - -> MAC -hmac mk@(BI.PS _ _ l) text - | sha256_arm_available = - let !inner = hash_arm_with ipad 64 text - in MAC (hash_arm (opad <> inner)) - | otherwise = - let !ipad_state = block_hash (iv ()) (parse_block ipad 0) - !inner = cat (process_with ipad_state 64 text) - in MAC (hash (opad <> inner)) - where - !step1 = k <> BS.replicate (64 - lk) 0x00 - !ipad = BS.map (B.xor 0x36) step1 - !opad = BS.map (B.xor 0x5C) step1 - !(KeyAndLen k lk) - | l > 64 = KeyAndLen (hash mk) 32 - | otherwise = KeyAndLen mk l +-- >>> hash "strict bytestring input" +-- "<strict 256-bit message digest>" +hmac :: BS.ByteString -> BS.ByteString -> MAC +hmac k m + | Arm.sha256_arm_available = MAC (Arm.hmac k m) + | otherwise = MAC (cat (_hmac (prep_key k) m)) + +prep_key :: BS.ByteString -> Block +prep_key k@(BI.PS _ _ l) + | l > 64 = parse_key (hash k) + | otherwise = parse_key k +{-# INLINABLE prep_key #-} + +_hmac + :: Block -- ^ padded key + -> BS.ByteString -- ^ message + -> Registers +_hmac k m = + let !rs0 = update (iv ()) (xor k (Exts.wordToWord32# 0x36363636##)) + !block = pad_registers_with_length (_hash 64 rs0 m) + !rs1 = update (iv ()) (xor k (Exts.wordToWord32# 0x5C5C5C5C##)) + in update rs1 block +{-# INLINABLE _hmac #-} + +-- XX these need testing + +_hmac_rm + :: Registers -- ^ key + -> BS.ByteString -- ^ message + -> Registers +_hmac_rm k m + | Arm.sha256_arm_available = Arm.hmac_rm k m + | otherwise = + let !key = pad_registers k + in _hmac key m +{-# INLINABLE _hmac_rm #-} + +_hmac_rr + :: Registers -- ^ key + -> Registers -- ^ message + -> Registers +_hmac_rr k m + | Arm.sha256_arm_available = Arm.hmac_rr k m + | otherwise = + let !key = pad_registers k + !block = pad_registers_with_length m + in _hmac_bb key block +{-# INLINABLE _hmac_rr #-} + +_hmac_bb + :: Block -- ^ key + -> Block -- ^ message + -> Registers +_hmac_bb k m = + let !rs0 = update (iv ()) (xor k (Exts.wordToWord32# 0x36363636##)) + !rs1 = update rs0 m + !inner = pad_registers_with_length rs1 + !rs2 = update (iv ()) (xor k (Exts.wordToWord32# 0x5C5C5C5C##)) + in update rs2 inner +{-# INLINABLE _hmac_bb #-} + +-- | HMAC for message (v || sep || dat), avoiding concatenation allocation. +_hmac_rsb + :: Registers -- ^ key + -> Registers -- ^ v (32 bytes) + -> Word8 -- ^ separator byte + -> BS.ByteString -- ^ data + -> Registers +_hmac_rsb k v sep dat + -- XX add Arm.hmac_rsb when available + -- | Arm.sha256_arm_available = Arm.hmac_rsb k v sep dat + | otherwise = + let !key = pad_registers k + !rs0 = update (iv ()) (xor key (Exts.wordToWord32# 0x36363636##)) + !inner = hash_vsb 64 rs0 v sep dat + !block = pad_registers_with_length inner + !rs1 = update (iv ()) (xor key (Exts.wordToWord32# 0x5C5C5C5C##)) + in update rs1 block +{-# INLINABLE _hmac_rsb #-} + +-- Hash (v || sep || dat) with initial state and extra prefix length. +hash_vsb + :: Word64 -- ^ extra prefix length + -> Registers -- ^ initial state + -> Registers -- ^ v + -> Word8 -- ^ sep + -> BS.ByteString -- ^ dat + -> Registers +hash_vsb el rs0 v sep dat@(BI.PS _ _ l) + | l >= 31 = + -- first block is complete + let !b0 = parse_vsb v sep dat + !rs1 = update rs0 b0 + !rest = BU.unsafeDrop 31 dat + !restLen = l - 31 + !rs2 = _hash_blocks rs1 rest + !finLen = restLen `rem` 64 + !fin = BU.unsafeDrop (restLen - finLen) rest + !total = el + 33 + fi l + in if finLen < 56 + then update rs2 (parse_pad1 fin total) + else let !(# pen, ult #) = parse_pad2 fin total + in update (update rs2 pen) ult + | otherwise = + -- message < 64 bytes, goes straight to padding + let !total = el + 33 + fi l + in if 33 + l < 56 + then update rs0 (parse_pad1_vsb v sep dat total) + else let !(# pen, ult #) = parse_pad2_vsb v sep dat total + in update (update rs0 pen) ult +{-# INLINABLE hash_vsb #-} + +-- pointer-based IO functions ------------------------------------------------ + +-- | HMAC(key, message) where both are register-sized. +-- Writes 32-byte result to destination pointer. +-- Uses ARM crypto extensions if available, otherwise software fallback. +hmac_rr_unsafe + :: Ptr Word32 -- ^ destination (8 Word32s) + -> Ptr Word32 -- ^ scratch block buffer (16 Word32s) + -> Registers -- ^ key + -> Registers -- ^ message + -> IO () +hmac_rr_unsafe rp bp k m + | Arm.sha256_arm_available = Arm._hmac_rr rp bp k m + | otherwise = do + let !key = pad_registers k + !block = pad_registers_with_length m + !rs = _hmac_bb key block + poke_registers rp rs +{-# INLINABLE hmac_rr_unsafe #-} + +-- | HMAC(key, v || sep || data). +-- Writes 32-byte result to destination pointer. +-- Uses ARM crypto extensions if available, otherwise software fallback. +hmac_rsb_unsafe + :: Ptr Word32 -- ^ destination (8 Word32s) + -> Ptr Word32 -- ^ scratch block buffer (16 Word32s) + -> Registers -- ^ key + -> Registers -- ^ v + -> Word8 -- ^ separator byte + -> BS.ByteString -- ^ data + -> IO () +hmac_rsb_unsafe rp bp k v sep dat + | Arm.sha256_arm_available = Arm._hmac_rsb rp bp k v sep dat + | otherwise = do + let !rs = _hmac_rsb k v sep dat + poke_registers rp rs +{-# INLINABLE hmac_rsb_unsafe #-} diff --git a/lib/Crypto/Hash/SHA256/Arm.hs b/lib/Crypto/Hash/SHA256/Arm.hs @@ -1,5 +1,8 @@ {-# OPTIONS_HADDOCK hide #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE UnboxedTuples #-} -- | -- Module: Crypto.Hash.SHA256.Arm @@ -9,115 +12,303 @@ -- -- ARM crypto extension support for SHA-256. -module Crypto.Hash.SHA256.Arm ( - sha256_arm_available - , hash_arm - , hash_arm_with - ) where +module Crypto.Hash.SHA256.Arm -- ( + -- sha256_arm_available + --, hash + --, hash_with + --, hmac + --) where + where -import Control.Monad (unless, when) -import qualified Data.Bits as B import qualified Data.ByteString as BS import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Unsafe as BU import Data.Word (Word8, Word32, Word64) import Foreign.Marshal.Alloc (allocaBytes) -import Foreign.Ptr (Ptr, plusPtr) -import Foreign.Storable (poke, peek) -import Crypto.Hash.SHA256.Internal (unsafe_padding) -import System.IO.Unsafe (unsafePerformIO) +import Foreign.Ptr (Ptr) +import qualified GHC.Exts as Exts +import qualified GHC.IO (IO(..)) +import qualified GHC.Ptr +import Crypto.Hash.SHA256.Internal hiding (update) +import System.IO.Unsafe (unsafeDupablePerformIO) --- ffi ----------------------------------------------------------------------- +-- ffi ------------------------------------------------------------------------ foreign import ccall unsafe "sha256_block_arm" - c_sha256_block :: Ptr Word32 -> Ptr Word8 -> IO () + c_sha256_block :: Ptr Word32 -> Ptr Word32 -> IO () foreign import ccall unsafe "sha256_arm_available" c_sha256_arm_available :: IO Int --- utilities ----------------------------------------------------------------- +-- utilities ------------------------------------------------------------------ fi :: (Integral a, Num b) => a -> b fi = fromIntegral {-# INLINE fi #-} + +peek_registers + :: Ptr Word32 + -> Registers +peek_registers (GHC.Ptr.Ptr addr) = R + (Exts.indexWord32OffAddr# addr 0#) + (Exts.indexWord32OffAddr# addr 1#) + (Exts.indexWord32OffAddr# addr 2#) + (Exts.indexWord32OffAddr# addr 3#) + (Exts.indexWord32OffAddr# addr 4#) + (Exts.indexWord32OffAddr# addr 5#) + (Exts.indexWord32OffAddr# addr 6#) + (Exts.indexWord32OffAddr# addr 7#) +{-# INLINE peek_registers #-} + +poke_block :: Ptr Word32 -> Block -> IO () +poke_block + (GHC.Ptr.Ptr addr) + (B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15) + = GHC.IO.IO $ \s00 -> + case Exts.writeWord32OffAddr# addr 00# w00 s00 of { s01 -> + case Exts.writeWord32OffAddr# addr 01# w01 s01 of { s02 -> + case Exts.writeWord32OffAddr# addr 02# w02 s02 of { s03 -> + case Exts.writeWord32OffAddr# addr 03# w03 s03 of { s04 -> + case Exts.writeWord32OffAddr# addr 04# w04 s04 of { s05 -> + case Exts.writeWord32OffAddr# addr 05# w05 s05 of { s06 -> + case Exts.writeWord32OffAddr# addr 06# w06 s06 of { s07 -> + case Exts.writeWord32OffAddr# addr 07# w07 s07 of { s08 -> + case Exts.writeWord32OffAddr# addr 08# w08 s08 of { s09 -> + case Exts.writeWord32OffAddr# addr 09# w09 s09 of { s10 -> + case Exts.writeWord32OffAddr# addr 10# w10 s10 of { s11 -> + case Exts.writeWord32OffAddr# addr 11# w11 s11 of { s12 -> + case Exts.writeWord32OffAddr# addr 12# w12 s12 of { s13 -> + case Exts.writeWord32OffAddr# addr 13# w13 s13 of { s14 -> + case Exts.writeWord32OffAddr# addr 14# w14 s14 of { s15 -> + case Exts.writeWord32OffAddr# addr 15# w15 s15 of { s16 -> + (# s16, () #) }}}}}}}}}}}}}}}} +{-# INLINE poke_block #-} + +-- update --------------------------------------------------------------------- + +update :: Ptr Word32 -> Ptr Word32 -> Block -> IO () +update rp bp block = do + poke_block bp block + c_sha256_block rp bp +{-# INLINE update #-} + +data BoxedRegisters = BoxedRegisters !Registers + +update_pure :: Registers -> Block -> Registers +update_pure r b = + let !(BoxedRegisters rs) = _update r b + in rs +{-# INLINE update_pure #-} + +_update :: Registers -> Block -> BoxedRegisters +_update r b = unsafeDupablePerformIO $ + allocaBytes 32 $ \rp -> + allocaBytes 64 $ \bp -> do + poke_registers rp r + poke_block bp b + c_sha256_block rp bp + pure (BoxedRegisters (peek_registers rp)) +{-# INLINE _update #-} + -- api ----------------------------------------------------------------------- +-- | Are ARM +sha2 extensions available? sha256_arm_available :: Bool -sha256_arm_available = unsafePerformIO c_sha256_arm_available /= 0 +sha256_arm_available = unsafeDupablePerformIO c_sha256_arm_available /= 0 {-# NOINLINE sha256_arm_available #-} -hash_arm :: BS.ByteString -> BS.ByteString -hash_arm = hash_arm_with mempty 0 - --- | Hash with optional 64-byte prefix and extra length for padding. -hash_arm_with - :: BS.ByteString -- ^ optional 64-byte prefix (or empty) - -> Word64 -- ^ extra length to add for padding - -> BS.ByteString -- ^ message +hash + :: BS.ByteString -> BS.ByteString -hash_arm_with prefix el m@(BI.PS fp off l) = unsafePerformIO $ - allocaBytes 32 $ \state -> do - poke_iv state - -- process prefix block if provided - unless (BS.null prefix) $ do - let BI.PS pfp poff _ = prefix - BI.unsafeWithForeignPtr pfp $ \src -> - c_sha256_block state (src `plusPtr` poff) - - go state 0 - - let !remaining@(BI.PS _ _ rlen) = BU.unsafeDrop (l - l `rem` 64) m - BI.PS padfp padoff _ = unsafe_padding remaining (el + fi l) - BI.unsafeWithForeignPtr padfp $ \src -> do - c_sha256_block state (src `plusPtr` padoff) - when (rlen >= 56) $ - c_sha256_block state (src `plusPtr` (padoff + 64)) - - read_state state - where - go !state !j - | j + 64 <= l = do - BI.unsafeWithForeignPtr fp $ \src -> - c_sha256_block state (src `plusPtr` (off + j)) - go state (j + 64) - | otherwise = pure () - --- arm helpers --------------------------------------------------------------- - -poke_iv :: Ptr Word32 -> IO () -poke_iv !state = do - poke state (0x6a09e667 :: Word32) - poke (state `plusPtr` 4) (0xbb67ae85 :: Word32) - poke (state `plusPtr` 8) (0x3c6ef372 :: Word32) - poke (state `plusPtr` 12) (0xa54ff53a :: Word32) - poke (state `plusPtr` 16) (0x510e527f :: Word32) - poke (state `plusPtr` 20) (0x9b05688c :: Word32) - poke (state `plusPtr` 24) (0x1f83d9ab :: Word32) - poke (state `plusPtr` 28) (0x5be0cd19 :: Word32) - -read_state :: Ptr Word32 -> IO BS.ByteString -read_state !state = BI.create 32 $ \out -> do - h0 <- peek state :: IO Word32 - h1 <- peek (state `plusPtr` 4) :: IO Word32 - h2 <- peek (state `plusPtr` 8) :: IO Word32 - h3 <- peek (state `plusPtr` 12) :: IO Word32 - h4 <- peek (state `plusPtr` 16) :: IO Word32 - h5 <- peek (state `plusPtr` 20) :: IO Word32 - h6 <- peek (state `plusPtr` 24) :: IO Word32 - h7 <- peek (state `plusPtr` 28) :: IO Word32 - poke_word32be out 0 h0 - poke_word32be out 4 h1 - poke_word32be out 8 h2 - poke_word32be out 12 h3 - poke_word32be out 16 h4 - poke_word32be out 20 h5 - poke_word32be out 24 h6 - poke_word32be out 28 h7 - -poke_word32be :: Ptr Word8 -> Int -> Word32 -> IO () -poke_word32be !p !off !w = do - poke (p `plusPtr` off) (fi (w `B.unsafeShiftR` 24) :: Word8) - poke (p `plusPtr` (off + 1)) (fi (w `B.unsafeShiftR` 16) :: Word8) - poke (p `plusPtr` (off + 2)) (fi (w `B.unsafeShiftR` 8) :: Word8) - poke (p `plusPtr` (off + 3)) (fi w :: Word8) +hash m = unsafeDupablePerformIO $ + allocaBytes 32 $ \rp -> + allocaBytes 64 $ \bp -> do + poke_registers rp (iv ()) + _hash rp bp 0 m + let !rs = peek_registers rp + pure (cat rs) + +_hash + :: Ptr Word32 -- ^ register state + -> Ptr Word32 -- ^ block state + -> Word64 -- ^ extra prefix length, for padding calculation + -> BS.ByteString -- ^ input + -> IO () +_hash rp bp el m@(BI.PS _ _ l) = do + hash_blocks rp bp m + let !fin@(BI.PS _ _ ll) = BU.unsafeDrop (l - l `rem` 64) m + !total = el + fi l + if ll < 56 + then do + let !ult = parse_pad1 fin total + update rp bp ult + else do + let !(# pen, ult #) = parse_pad2 fin total + update rp bp pen + update rp bp ult +{-# INLINABLE _hash #-} + +hash_blocks + :: Ptr Word32 -- ^ register state + -> Ptr Word32 -- ^ block state + -> BS.ByteString -- ^ input + -> IO () +hash_blocks rp bp m@(BI.PS _ _ l) = loop 0 where + loop !j + | j + 64 > l = pure () + | otherwise = do + let !block = parse m j + update rp bp block + loop (j + 64) +{-# INLINE hash_blocks #-} + +-- hmac ------------------------------------------------------------------------ + +hmac :: BS.ByteString -> BS.ByteString -> BS.ByteString +hmac k m = unsafeDupablePerformIO $ + allocaBytes 32 $ \rp -> + allocaBytes 64 $ \bp -> do + _hmac rp bp (prep_key k) m + pure (cat (peek_registers rp)) + +prep_key :: BS.ByteString -> Block +prep_key k@(BI.PS _ _ l) + | l > 64 = parse_key (hash k) + | otherwise = parse_key k +{-# INLINABLE prep_key #-} + +-- assume padded key as block. +_hmac + :: Ptr Word32 -- ^ register state + -> Ptr Word32 -- ^ block state + -> Block -- ^ padded key + -> BS.ByteString -- ^ message + -> IO () +_hmac rp bp k m = do + poke_registers rp (iv ()) + update rp bp (xor k (Exts.wordToWord32# 0x36363636##)) + _hash rp bp 64 m + let !block = pad_registers_with_length (peek_registers rp) + poke_registers rp (iv ()) + update rp bp (xor k (Exts.wordToWord32# 0x5C5C5C5C##)) + update rp bp block +{-# INLINABLE _hmac #-} + +_hmac_rr + :: Ptr Word32 -- ^ register state + -> Ptr Word32 -- ^ block state + -> Registers -- ^ key + -> Registers -- ^ message + -> IO () +_hmac_rr rp bp k m = do + let !key = pad_registers k + !block = pad_registers_with_length m + _hmac_bb rp bp key block +{-# INLINABLE _hmac_rr #-} + +_hmac_bb + :: Ptr Word32 -- ^ register state + -> Ptr Word32 -- ^ block state + -> Block -- ^ padded key + -> Block -- ^ padded message + -> IO () +_hmac_bb rp bp k m = do + poke_registers rp (iv ()) + update rp bp (xor k (Exts.wordToWord32# 0x36363636##)) + update rp bp m + let !inner = pad_registers_with_length (peek_registers rp) + poke_registers rp (iv ()) + update rp bp (xor k (Exts.wordToWord32# 0x5C5C5C5C##)) + update rp bp inner +{-# INLINABLE _hmac_bb #-} + +hmac_rm :: Registers -> BS.ByteString -> Registers +hmac_rm k m = + let !(BoxedRegisters rs) = _hmac_rm k m + in rs +{-# INLINABLE hmac_rm #-} + +_hmac_rm :: Registers -> BS.ByteString -> BoxedRegisters +_hmac_rm k m = unsafeDupablePerformIO $ + allocaBytes 32 $ \rp -> + allocaBytes 64 $ \bp -> do + let !key = pad_registers k + _hmac rp bp key m + pure (BoxedRegisters (peek_registers rp)) +{-# INLINABLE _hmac_rm #-} + +hmac_rr :: Registers -> Registers -> Registers +hmac_rr k m = + let !(BoxedRegisters rs) = _hmac_rr_pure k m + in rs +{-# INLINABLE hmac_rr #-} + +_hmac_rr_pure :: Registers -> Registers -> BoxedRegisters +_hmac_rr_pure k m = unsafeDupablePerformIO $ + allocaBytes 32 $ \rp -> + allocaBytes 64 $ \bp -> do + _hmac_rr rp bp k m + pure (BoxedRegisters (peek_registers rp)) +{-# INLINABLE _hmac_rr_pure #-} + +-- | HMAC(key, v || sep || data) using ARM crypto extensions. +-- Writes result to destination pointer. +_hmac_rsb + :: Ptr Word32 -- ^ destination (8 Word32s) + -> Ptr Word32 -- ^ scratch block buffer (16 Word32s) + -> Registers -- ^ key + -> Registers -- ^ v + -> Word8 -- ^ separator byte + -> BS.ByteString -- ^ data + -> IO () +_hmac_rsb rp bp k v sep dat = do + poke_registers rp (iv ()) + let !key = pad_registers k + update rp bp (xor key (Exts.wordToWord32# 0x36363636##)) + _hash_vsb rp bp 64 v sep dat + let !inner = pad_registers_with_length (peek_registers rp) + poke_registers rp (iv ()) + update rp bp (xor key (Exts.wordToWord32# 0x5C5C5C5C##)) + update rp bp inner +{-# INLINABLE _hmac_rsb #-} + +-- | Hash (v || sep || dat) with ARM crypto extensions. +-- Assumes register state already initialized at rp. +_hash_vsb + :: Ptr Word32 -- ^ register state + -> Ptr Word32 -- ^ block buffer + -> Word64 -- ^ extra prefix length + -> Registers -- ^ v + -> Word8 -- ^ sep + -> BS.ByteString -- ^ dat + -> IO () +_hash_vsb rp bp el v sep dat@(BI.PS _ _ l) + | l >= 31 = do + -- first block is complete: v || sep || dat[0:31] + let !b0 = parse_vsb v sep dat + update rp bp b0 + -- hash remaining complete blocks from dat[31:] + let !rest = BU.unsafeDrop 31 dat + !restLen = l - 31 + hash_blocks rp bp rest + -- handle final padding + let !finLen = restLen `rem` 64 + !fin = BU.unsafeDrop (restLen - finLen) rest + !total = el + 33 + fi l + if finLen < 56 + then update rp bp (parse_pad1 fin total) + else do + let !(# pen, ult #) = parse_pad2 fin total + update rp bp pen + update rp bp ult + | otherwise = do + -- message < 64 bytes total, straight to padding + let !total = el + 33 + fi l + if 33 + l < 56 + then update rp bp (parse_pad1_vsb v sep dat total) + else do + let !(# pen, ult #) = parse_pad2_vsb v sep dat total + update rp bp pen + update rp bp ult +{-# INLINABLE _hash_vsb #-} diff --git a/lib/Crypto/Hash/SHA256/Internal.hs b/lib/Crypto/Hash/SHA256/Internal.hs @@ -6,6 +6,7 @@ {-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE UnliftedNewtypes #-} +{-# LANGUAGE ViewPatterns #-} -- | -- Module: Crypto.Hash.SHA256.Internal @@ -16,34 +17,52 @@ -- SHA-256 internals. module Crypto.Hash.SHA256.Internal ( - Block(..) - , pattern B - , Registers(..) - , pattern R - + -- * Types + Block(B, ..) + , Registers(R, ..) , MAC(..) - , iv - , block_hash + -- * Parsing + , parse + , parse_pad1 + , parse_pad2 + + -- * Serializing , cat + , cat_into + , cat_into32 + + -- * Hash function internals + , update + , iv + + -- * HMAC utilities + , pad_registers + , pad_registers_with_length + , xor + , parse_key - , word32be - , parse_block - , unsafe_hash_alg - , unsafe_padding + -- * HMAC-DRBG utilities + , parse_vsb + , parse_pad1_vsb + , parse_pad2_vsb + + -- * Pointer-based IO utilities + , poke_registers ) where import qualified Data.Bits as B import qualified Data.ByteString as BS import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Unsafe as BU -import Data.Word (Word8, Word64) -import Foreign.Marshal.Utils (copyBytes, fillBytes) -import Foreign.Ptr (Ptr, plusPtr) -import Foreign.Storable (poke) +import Data.Word (Word8, Word32, Word64) +import qualified GHC.IO (IO(..)) +import GHC.Ptr (Ptr(..)) import GHC.Exts (Int#) import qualified GHC.Exts as Exts -import qualified GHC.Word (Word8(..)) +import qualified GHC.Word (Word32(..), Word8(..)) + +-- types ---------------------------------------------------------------------- -- | A message authentication code. -- @@ -68,8 +87,7 @@ instance Eq MAC where | la /= lb = False | otherwise = BS.foldl' (B..|.) 0 (BS.packZipWith B.xor a b) == 0 --- https://datatracker.ietf.org/doc/html/rfc6234 - +-- | SHA256 block. newtype Block = Block (# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32# , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32# @@ -85,13 +103,12 @@ pattern B -> Block pattern B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15 = Block - (# w00, w01, w02, w03 - , w04, w05, w06, w07 - , w08, w09, w10, w11 - , w12, w13, w14, w15 + (# w00, w01, w02, w03, w04, w05, w06, w07 + , w08, w09, w10, w11, w12, w13, w14, w15 #) {-# COMPLETE B #-} +-- | SHA256 state. newtype Registers = Registers (# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32# , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32# @@ -101,32 +118,25 @@ pattern R :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Registers -pattern R w00 w01 w02 w03 w04 w05 w06 w07 = - Registers - (# w00, w01, w02, w03 - , w04, w05, w06, w07 - #) +pattern R w00 w01 w02 w03 w04 w05 w06 w07 = Registers + (# w00, w01, w02, w03 + , w04, w05, w06, w07 + #) {-# COMPLETE R #-} --- given a bytestring and offset, parse word32. length not checked. -word32be :: BS.ByteString -> Int -> Exts.Word32# -word32be bs m = - let !(GHC.Word.W8# ra) = BU.unsafeIndex bs m - !(GHC.Word.W8# rb) = BU.unsafeIndex bs (m + 1) - !(GHC.Word.W8# rc) = BU.unsafeIndex bs (m + 2) - !(GHC.Word.W8# rd) = BU.unsafeIndex bs (m + 3) - !a = Exts.wordToWord32# (Exts.word8ToWord# ra) - !b = Exts.wordToWord32# (Exts.word8ToWord# rb) - !c = Exts.wordToWord32# (Exts.word8ToWord# rc) - !d = Exts.wordToWord32# (Exts.word8ToWord# rd) - !sa = Exts.uncheckedShiftLWord32# a 24# - !sb = Exts.uncheckedShiftLWord32# b 16# - !sc = Exts.uncheckedShiftLWord32# c 08# - in sa `Exts.orWord32#` sb `Exts.orWord32#` sc `Exts.orWord32#` d -{-# INLINE word32be #-} +-- utilities ------------------------------------------------------------------ + +fi :: (Integral a, Num b) => a -> b +fi = fromIntegral +{-# INLINE fi #-} -parse_block :: BS.ByteString -> Int -> Block -parse_block bs m = B +-- parsing (nonfinal input) --------------------------------------------------- + +-- | Given a bytestring and offset, parse a full block. +-- +-- The length of the input is not checked. +parse :: BS.ByteString -> Int -> Block +parse bs m = B (word32be bs m) (word32be bs (m + 04)) (word32be bs (m + 08)) @@ -143,88 +153,101 @@ parse_block bs m = B (word32be bs (m + 52)) (word32be bs (m + 56)) (word32be bs (m + 60)) -{-# INLINE parse_block #-} - --- rotate right -rotr# :: Exts.Word32# -> Int# -> Exts.Word32# -rotr# x n = - Exts.uncheckedShiftRLWord32# x n `Exts.orWord32#` - Exts.uncheckedShiftLWord32# x (32# Exts.-# n) -{-# INLINE rotr# #-} +{-# INLINE parse #-} --- logical right shift -shr# :: Exts.Word32# -> Int# -> Exts.Word32# -shr# = Exts.uncheckedShiftRLWord32# -{-# INLINE shr# #-} - --- ch(x, y, z) = (x & y) ^ (~x & z) -ch# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -ch# x y z = - (x `Exts.andWord32#` y) `Exts.xorWord32#` - (Exts.notWord32# x `Exts.andWord32#` z) -{-# INLINE ch# #-} - --- maj(x, y, z) = (x & (y | z)) | (y & z) -maj# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -maj# x y z = - (x `Exts.andWord32#` (y `Exts.orWord32#` z)) `Exts.orWord32#` - (y `Exts.andWord32#` z) -{-# INLINE maj# #-} - --- big sigma 0: rotr2 ^ rotr13 ^ rotr22 -bsig0# :: Exts.Word32# -> Exts.Word32# -bsig0# x = - rotr# x 2# `Exts.xorWord32#` rotr# x 13# `Exts.xorWord32#` rotr# x 22# -{-# INLINE bsig0# #-} - --- big sigma 1: rotr6 ^ rotr11 ^ rotr25 -bsig1# :: Exts.Word32# -> Exts.Word32# -bsig1# x = - rotr# x 6# `Exts.xorWord32#` rotr# x 11# `Exts.xorWord32#` rotr# x 25# -{-# INLINE bsig1# #-} - --- small sigma 0: rotr7 ^ rotr18 ^ shr3 -ssig0# :: Exts.Word32# -> Exts.Word32# -ssig0# x = - rotr# x 7# `Exts.xorWord32#` rotr# x 18# `Exts.xorWord32#` shr# x 3# -{-# INLINE ssig0# #-} - --- small sigma 1: rotr17 ^ rotr19 ^ shr10 -ssig1# :: Exts.Word32# -> Exts.Word32# -ssig1# x = - rotr# x 17# `Exts.xorWord32#` rotr# x 19# `Exts.xorWord32#` shr# x 10# -{-# INLINE ssig1# #-} +-- | Parse the 32-bit word encoded at the given ofset. +-- +-- The length of the input is not checked. +word32be :: BS.ByteString -> Int -> Exts.Word32# +word32be bs m = + let !(GHC.Word.W8# ra) = BU.unsafeIndex bs m + !(GHC.Word.W8# rb) = BU.unsafeIndex bs (m + 1) + !(GHC.Word.W8# rc) = BU.unsafeIndex bs (m + 2) + !(GHC.Word.W8# rd) = BU.unsafeIndex bs (m + 3) + !a = Exts.wordToWord32# (Exts.word8ToWord# ra) + !b = Exts.wordToWord32# (Exts.word8ToWord# rb) + !c = Exts.wordToWord32# (Exts.word8ToWord# rc) + !d = Exts.wordToWord32# (Exts.word8ToWord# rd) + !sa = Exts.uncheckedShiftLWord32# a 24# + !sb = Exts.uncheckedShiftLWord32# b 16# + !sc = Exts.uncheckedShiftLWord32# c 08# + in sa `Exts.orWord32#` sb `Exts.orWord32#` sc `Exts.orWord32#` d +{-# INLINE word32be #-} --- round step -step# - :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# - -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# - -> Exts.Word32# -> Exts.Word32# - -> Registers -step# a b c d e f g h k w = - let !t1 = h - `Exts.plusWord32#` bsig1# e - `Exts.plusWord32#` ch# e f g - `Exts.plusWord32#` k - `Exts.plusWord32#` w - !t2 = bsig0# a `Exts.plusWord32#` maj# a b c - in R (t1 `Exts.plusWord32#` t2) a b c (d `Exts.plusWord32#` t1) e f g -{-# INLINE step# #-} +-- parsing (final input) ------------------------------------------------------ --- first 32 bits of the fractional parts of the square roots of the --- first eight primes -iv :: () -> Registers -iv _ = R (Exts.wordToWord32# 0x6a09e667##) - (Exts.wordToWord32# 0xbb67ae85##) - (Exts.wordToWord32# 0x3c6ef372##) - (Exts.wordToWord32# 0xa54ff53a##) - (Exts.wordToWord32# 0x510e527f##) - (Exts.wordToWord32# 0x9b05688c##) - (Exts.wordToWord32# 0x1f83d9ab##) - (Exts.wordToWord32# 0x5be0cd19##) - -block_hash :: Registers -> Block -> Registers -block_hash +-- | Parse the final chunk of an input message, assuming it is less than +-- 56 bytes in length (unchecked!). +-- +-- Returns one block consisting of the chunk and padding. +parse_pad1 + :: BS.ByteString -- ^ final input chunk (< 56 bytes) + -> Word64 -- ^ length of all input + -> Block -- ^ resulting block +parse_pad1 bs l = + let !bits = l * 8 + !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32) + !(GHC.Word.W32# llo) = fi bits + in B (w32_at bs 00) (w32_at bs 04) (w32_at bs 08) (w32_at bs 12) + (w32_at bs 16) (w32_at bs 20) (w32_at bs 24) (w32_at bs 28) + (w32_at bs 32) (w32_at bs 36) (w32_at bs 40) (w32_at bs 44) + (w32_at bs 48) (w32_at bs 52) lhi llo +{-# INLINABLE parse_pad1 #-} + +-- | Parse the final chunk of an input message, assuming it is at least 56 +-- bytes in length (unchecked!). +-- +-- Returns two blocks consisting of the chunk and padding. +parse_pad2 + :: BS.ByteString -- ^ final input chunk (>= 56 bytes) + -> Word64 -- ^ length of all input + -> (# Block, Block #) -- ^ resulting blocks +parse_pad2 bs l = + let !bits = l * 8 + !z = Exts.wordToWord32# 0## + !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32) + !(GHC.Word.W32# llo) = fi bits + !block0 = B + (w32_at bs 00) (w32_at bs 04) (w32_at bs 08) (w32_at bs 12) + (w32_at bs 16) (w32_at bs 20) (w32_at bs 24) (w32_at bs 28) + (w32_at bs 32) (w32_at bs 36) (w32_at bs 40) (w32_at bs 44) + (w32_at bs 48) (w32_at bs 52) (w32_at bs 56) (w32_at bs 60) + !block1 = B z z z z z z z z z z z z z z lhi llo + in (# block0, block1 #) +{-# INLINABLE parse_pad2 #-} + +-- | Return the byte at offset 'i', or a padding separator or zero byte +-- beyond the input bounds, as an unboxed 32-bit word. +w8_as_w32_at + :: BS.ByteString -- ^ input chunk + -> Int -- ^ offset + -> Exts.Word32# +w8_as_w32_at bs@(BI.PS _ _ l) i = Exts.wordToWord32# $ case compare i l of + LT -> let !(GHC.Word.W8# w) = BU.unsafeIndex bs i + in Exts.word8ToWord# w + EQ -> 0x80## + _ -> 0x00## +{-# INLINE w8_as_w32_at #-} + +-- | Return the 32-bit word encoded by four consecutive bytes at the +-- provided offset. +w32_at + :: BS.ByteString + -> Int + -> Exts.Word32# +w32_at bs i = + let !wa = w8_as_w32_at bs i `Exts.uncheckedShiftLWord32#` 24# + !wb = w8_as_w32_at bs (i + 1) `Exts.uncheckedShiftLWord32#` 16# + !wc = w8_as_w32_at bs (i + 2) `Exts.uncheckedShiftLWord32#` 08# + !wd = w8_as_w32_at bs (i + 3) + in wa `Exts.orWord32#` wb `Exts.orWord32#` wc `Exts.orWord32#` wd +{-# INLINE w32_at #-} + +-- update --------------------------------------------------------------------- + +-- | Update register state, given new input block. +update :: Registers -> Block -> Registers +update (R h0 h1 h2 h3 h4 h5 h6 h7) (B b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15) = @@ -282,7 +305,7 @@ block_hash !w62 = ssig1# w60 `p` w55 `p` ssig0# w47 `p` w46 !w63 = ssig1# w61 `p` w56 `p` ssig0# w48 `p` w47 - -- rounds (cube roots of first 64 primes) + -- rounds (constants are cube roots of first 64 primes) !(R s00a s00b s00c s00d s00e s00f s00g s00h) = step# h0 h1 h2 h3 h4 h5 h6 h7 (k 0x428a2f98##) w00 !(R s01a s01b s01c s01d s01e s01f s01g s01h) = @@ -420,65 +443,311 @@ block_hash k = Exts.wordToWord32# {-# INLINE k #-} --- RFC 6234 6.2 block pipeline --- --- invariant: --- the input bytestring is exactly 512 bits in length -unsafe_hash_alg :: Registers -> BS.ByteString -> Registers -unsafe_hash_alg rs bs = block_hash rs (parse_block bs 0) +-- rotate right +rotr# :: Exts.Word32# -> Int# -> Exts.Word32# +rotr# x n = + Exts.uncheckedShiftRLWord32# x n `Exts.orWord32#` + Exts.uncheckedShiftLWord32# x (32# Exts.-# n) +{-# INLINE rotr# #-} --- register concatenation +-- logical right shift +shr# :: Exts.Word32# -> Int# -> Exts.Word32# +shr# = Exts.uncheckedShiftRLWord32# +{-# INLINE shr# #-} + +-- ch(x, y, z) = (x & y) ^ (~x & z) +ch# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# +ch# x y z = + (x `Exts.andWord32#` y) `Exts.xorWord32#` + (Exts.notWord32# x `Exts.andWord32#` z) +{-# INLINE ch# #-} + +-- maj(x, y, z) = (x & (y | z)) | (y & z) +maj# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# +maj# x y z = + (x `Exts.andWord32#` (y `Exts.orWord32#` z)) `Exts.orWord32#` + (y `Exts.andWord32#` z) +{-# INLINE maj# #-} + +-- big sigma 0: rotr2 ^ rotr13 ^ rotr22 +bsig0# :: Exts.Word32# -> Exts.Word32# +bsig0# x = + rotr# x 2# `Exts.xorWord32#` rotr# x 13# `Exts.xorWord32#` rotr# x 22# +{-# INLINE bsig0# #-} + +-- big sigma 1: rotr6 ^ rotr11 ^ rotr25 +bsig1# :: Exts.Word32# -> Exts.Word32# +bsig1# x = + rotr# x 6# `Exts.xorWord32#` rotr# x 11# `Exts.xorWord32#` rotr# x 25# +{-# INLINE bsig1# #-} + +-- small sigma 0: rotr7 ^ rotr18 ^ shr3 +ssig0# :: Exts.Word32# -> Exts.Word32# +ssig0# x = + rotr# x 7# `Exts.xorWord32#` rotr# x 18# `Exts.xorWord32#` shr# x 3# +{-# INLINE ssig0# #-} + +-- small sigma 1: rotr17 ^ rotr19 ^ shr10 +ssig1# :: Exts.Word32# -> Exts.Word32# +ssig1# x = + rotr# x 17# `Exts.xorWord32#` rotr# x 19# `Exts.xorWord32#` shr# x 10# +{-# INLINE ssig1# #-} + +-- round step +step# + :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# + -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# + -> Exts.Word32# -> Exts.Word32# + -> Registers +step# a b c d e f g h k w = + let !t1 = h + `Exts.plusWord32#` bsig1# e + `Exts.plusWord32#` ch# e f g + `Exts.plusWord32#` k + `Exts.plusWord32#` w + !t2 = bsig0# a `Exts.plusWord32#` maj# a b c + in R (t1 `Exts.plusWord32#` t2) a b c (d `Exts.plusWord32#` t1) e f g +{-# INLINE step# #-} + +-- initial register state; first 32 bits of the fractional parts of the +-- square roots of the first eight primes +iv :: () -> Registers +iv _ = R + (Exts.wordToWord32# 0x6a09e667##) + (Exts.wordToWord32# 0xbb67ae85##) + (Exts.wordToWord32# 0x3c6ef372##) + (Exts.wordToWord32# 0xa54ff53a##) + (Exts.wordToWord32# 0x510e527f##) + (Exts.wordToWord32# 0x9b05688c##) + (Exts.wordToWord32# 0x1f83d9ab##) + (Exts.wordToWord32# 0x5be0cd19##) + +-- serializing ---------------------------------------------------------------- + +-- | Concat SHA256 state into a ByteString. cat :: Registers -> BS.ByteString -cat (R h0 h1 h2 h3 h4 h5 h6 h7) = BI.unsafeCreate 32 $ \ptr -> do - poke32be ptr 0 h0 - poke32be ptr 4 h1 - poke32be ptr 8 h2 - poke32be ptr 12 h3 - poke32be ptr 16 h4 - poke32be ptr 20 h5 - poke32be ptr 24 h6 - poke32be ptr 28 h7 - where - poke32be :: Ptr Word8 -> Int -> Exts.Word32# -> IO () - poke32be p off w = do - poke (p `plusPtr` off) (byte w 24#) - poke (p `plusPtr` (off + 1)) (byte w 16#) - poke (p `plusPtr` (off + 2)) (byte w 8#) - poke (p `plusPtr` (off + 3)) (byte w 0#) - - byte :: Exts.Word32# -> Int# -> Word8 - byte w n = GHC.Word.W8# (Exts.wordToWord8# - (Exts.word32ToWord# (Exts.uncheckedShiftRLWord32# w n))) - --- keystroke saver -fi :: (Integral a, Num b) => a -> b -fi = fromIntegral -{-# INLINE fi #-} +cat rs = BI.unsafeCreate 32 (cat_into rs) +{-# INLINABLE cat #-} + +-- | Serialize SHA256 state to a pointer of word32's (big-endian). +cat_into32 :: Registers -> Ptr Word32 -> IO () +cat_into32 (R h0 h1 h2 h3 h4 h5 h6 h7) (Ptr addr) = GHC.IO.IO $ \s0 -> + case Exts.writeWord32OffAddr# addr 0# h0 s0 of { s1 -> + case Exts.writeWord32OffAddr# addr 1# h1 s1 of { s2 -> + case Exts.writeWord32OffAddr# addr 2# h2 s2 of { s3 -> + case Exts.writeWord32OffAddr# addr 3# h3 s3 of { s4 -> + case Exts.writeWord32OffAddr# addr 4# h4 s4 of { s5 -> + case Exts.writeWord32OffAddr# addr 5# h5 s5 of { s6 -> + case Exts.writeWord32OffAddr# addr 6# h6 s6 of { s7 -> + case Exts.writeWord32OffAddr# addr 7# h7 s7 of { s8 -> + (# s8, () #) + }}}}}}}} +{-# INLINE cat_into32 #-} + +-- | Serialize SHA256 state to a pointer (big-endian). +cat_into :: Registers -> Ptr Word8 -> IO () +cat_into (R h0 h1 h2 h3 h4 h5 h6 h7) (Ptr addr) = GHC.IO.IO $ \s0 -> + case poke32be addr 00# h0 s0 of { s1 -> + case poke32be addr 04# h1 s1 of { s2 -> + case poke32be addr 08# h2 s2 of { s3 -> + case poke32be addr 12# h3 s3 of { s4 -> + case poke32be addr 16# h4 s4 of { s5 -> + case poke32be addr 20# h5 s5 of { s6 -> + case poke32be addr 24# h6 s6 of { s7 -> + case poke32be addr 28# h7 s7 of { s8 -> + (# s8, () #) + }}}}}}}} +{-# INLINE cat_into #-} + +poke32be + :: Exts.Addr# + -> Int# + -> Exts.Word32# + -> Exts.State# Exts.RealWorld + -> Exts.State# Exts.RealWorld +poke32be a off w s0 = + case Exts.writeWord8OffAddr# a off (byte# w 24#) s0 of { s1 -> + case Exts.writeWord8OffAddr# a (off Exts.+# 1#) (byte# w 16#) s1 of { s2 -> + case Exts.writeWord8OffAddr# a (off Exts.+# 2#) (byte# w 8#) s2 of { s3 -> + Exts.writeWord8OffAddr# a (off Exts.+# 3#) (byte# w 0#) s3 + }}} +{-# INLINE poke32be #-} + +byte# :: Exts.Word32# -> Int# -> Exts.Word8# +byte# w n = Exts.wordToWord8# + (Exts.word32ToWord# (Exts.uncheckedShiftRLWord32# w n)) +{-# INLINE byte# #-} + +-- | Write register state to a pointer (native endian Word32s). +poke_registers :: Ptr Word32 -> Registers -> IO () +poke_registers (Ptr addr) (R w0 w1 w2 w3 w4 w5 w6 w7) = GHC.IO.IO $ \s0 -> + case Exts.writeWord32OffAddr# addr 0# w0 s0 of { s1 -> + case Exts.writeWord32OffAddr# addr 1# w1 s1 of { s2 -> + case Exts.writeWord32OffAddr# addr 2# w2 s2 of { s3 -> + case Exts.writeWord32OffAddr# addr 3# w3 s3 of { s4 -> + case Exts.writeWord32OffAddr# addr 4# w4 s4 of { s5 -> + case Exts.writeWord32OffAddr# addr 5# w5 s5 of { s6 -> + case Exts.writeWord32OffAddr# addr 6# w6 s6 of { s7 -> + case Exts.writeWord32OffAddr# addr 7# w7 s7 of { s8 -> + (# s8, () #) }}}}}}}} +{-# INLINE poke_registers #-} + +-- hmac utilities ------------------------------------------------------------- + +-- pad registers to block +pad_registers :: Registers -> Block +pad_registers (R w0 w1 w2 w3 w4 w5 w6 w7) = B + w0 w1 w2 w3 w4 w5 w6 w7 + (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) + (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) + (Exts.wordToWord32# 0##) (Exts.wordToWord32# 0##) +{-# INLINE pad_registers #-} + +-- pad registers to block, using padding separator and augmented length +-- (assumes existence of a leading block) +pad_registers_with_length :: Registers -> Block +pad_registers_with_length (R h0 h1 h2 h3 h4 h5 h6 h7) = B + h0 h1 h2 h3 h4 h5 h6 h7 -- inner hash + (Exts.wordToWord32# 0x80000000##) -- padding separator + (Exts.wordToWord32# 0x00000000##) + (Exts.wordToWord32# 0x00000000##) + (Exts.wordToWord32# 0x00000000##) + (Exts.wordToWord32# 0x00000000##) + (Exts.wordToWord32# 0x00000000##) + (Exts.wordToWord32# 0x00000000##) -- high 32 bits of length + (Exts.wordToWord32# 0x00000300##) -- low 32 bits of length +{-# INLINABLE pad_registers_with_length #-} + +xor :: Block -> Exts.Word32# -> Block +xor (B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15) b = B + (Exts.xorWord32# w00 b) + (Exts.xorWord32# w01 b) + (Exts.xorWord32# w02 b) + (Exts.xorWord32# w03 b) + (Exts.xorWord32# w04 b) + (Exts.xorWord32# w05 b) + (Exts.xorWord32# w06 b) + (Exts.xorWord32# w07 b) + (Exts.xorWord32# w08 b) + (Exts.xorWord32# w09 b) + (Exts.xorWord32# w10 b) + (Exts.xorWord32# w11 b) + (Exts.xorWord32# w12 b) + (Exts.xorWord32# w13 b) + (Exts.xorWord32# w14 b) + (Exts.xorWord32# w15 b) +{-# INLINE xor #-} + +parse_key :: BS.ByteString -> Block +parse_key bs = B + (w32_zero bs 0) (w32_zero bs 4) (w32_zero bs 8) (w32_zero bs 12) + (w32_zero bs 16) (w32_zero bs 20) (w32_zero bs 24) (w32_zero bs 28) + (w32_zero bs 32) (w32_zero bs 36) (w32_zero bs 40) (w32_zero bs 44) + (w32_zero bs 48) (w32_zero bs 52) (w32_zero bs 56) (w32_zero bs 60) +{-# INLINE parse_key #-} + +-- read big-endian Word32#, zero-padding beyond input length +w32_zero :: BS.ByteString -> Int -> Exts.Word32# +w32_zero bs i = + let !wa = w8_zero bs i `Exts.uncheckedShiftLWord32#` 24# + !wb = w8_zero bs (i + 1) `Exts.uncheckedShiftLWord32#` 16# + !wc = w8_zero bs (i + 2) `Exts.uncheckedShiftLWord32#` 08# + !wd = w8_zero bs (i + 3) + in wa `Exts.orWord32#` wb `Exts.orWord32#` wc `Exts.orWord32#` wd +{-# INLINE w32_zero #-} + +-- read byte as Word32#, returning zero beyond input length +w8_zero :: BS.ByteString -> Int -> Exts.Word32# +w8_zero bs@(BI.PS _ _ l) i + | i < l = let !(GHC.Word.W8# w) = BU.unsafeIndex bs i + in Exts.wordToWord32# (Exts.word8ToWord# w) + | otherwise = Exts.wordToWord32# 0## +{-# INLINE w8_zero #-} + +-- hmac-drbg utilities -------------------------------------------------------- + +-- | Parse first complete block from v || sep || dat[0:31]. +-- +-- Requires len(dat) >= 31. +parse_vsb :: Registers -> Word8 -> BS.ByteString -> Block +parse_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) (GHC.Word.W8# sep) dat = + let !(GHC.Word.W8# b0) = BU.unsafeIndex dat 0 + !(GHC.Word.W8# b1) = BU.unsafeIndex dat 1 + !(GHC.Word.W8# b2) = BU.unsafeIndex dat 2 + !w08 = + Exts.uncheckedShiftLWord32# (w8_w32 sep) 24# + `Exts.orWord32#` + Exts.uncheckedShiftLWord32# (w8_w32 b0) 16# + `Exts.orWord32#` + Exts.uncheckedShiftLWord32# (w8_w32 b1) 8# + `Exts.orWord32#` + w8_w32 b2 + in B v0 v1 v2 v3 v4 v5 v6 v7 + w08 + (word32be dat 3) (word32be dat 7) (word32be dat 11) + (word32be dat 15) (word32be dat 19) (word32be dat 23) (word32be dat 27) +{-# INLINE parse_vsb #-} + +-- | Parse single padding block from v || sep || dat. +-- +-- Requires (33 + len(dat)) < 56. +parse_pad1_vsb :: Registers -> Word8 -> BS.ByteString -> Word64 -> Block +parse_pad1_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) sep dat total = + let !bits = total * 8 + !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32) + !(GHC.Word.W32# llo) = fi bits + in B v0 v1 v2 v3 v4 v5 v6 v7 + (w32_sdp sep dat 32) (w32_sdp sep dat 36) + (w32_sdp sep dat 40) (w32_sdp sep dat 44) + (w32_sdp sep dat 48) (w32_sdp sep dat 52) + lhi llo +{-# INLINABLE parse_pad1_vsb #-} + +-- | Parse two padding blocks from v || sep || dat. +-- +-- Requires 56 <= (33 + len(dat)) < 64. +parse_pad2_vsb + :: Registers -> Word8 -> BS.ByteString -> Word64 -> (# Block, Block #) +parse_pad2_vsb (R v0 v1 v2 v3 v4 v5 v6 v7) sep dat total = + let !bits = total * 8 + !z = Exts.wordToWord32# 0## + !(GHC.Word.W32# lhi) = fi (bits `B.unsafeShiftR` 32) + !(GHC.Word.W32# llo) = fi bits + !b0 = B v0 v1 v2 v3 v4 v5 v6 v7 + (w32_sdp sep dat 32) (w32_sdp sep dat 36) + (w32_sdp sep dat 40) (w32_sdp sep dat 44) + (w32_sdp sep dat 48) (w32_sdp sep dat 52) + (w32_sdp sep dat 56) (w32_sdp sep dat 60) + !b1 = B z z z z z z z z z z z z z z lhi llo + in (# b0, b1 #) +{-# INLINABLE parse_pad2_vsb #-} + +-- Read Word32 at offset i (>= 32) from (sep || dat || 0x80 || zeros). +w32_sdp :: Word8 -> BS.ByteString -> Int -> Exts.Word32# +w32_sdp sep dat i = + let !(GHC.Word.W8# a) = byte_sdp sep dat i + !(GHC.Word.W8# b) = byte_sdp sep dat (i + 1) + !(GHC.Word.W8# c) = byte_sdp sep dat (i + 2) + !(GHC.Word.W8# d) = byte_sdp sep dat (i + 3) + in Exts.uncheckedShiftLWord32# (w8_w32 a) 24# + `Exts.orWord32#` + Exts.uncheckedShiftLWord32# (w8_w32 b) 16# + `Exts.orWord32#` + Exts.uncheckedShiftLWord32# (w8_w32 c) 8# + `Exts.orWord32#` + w8_w32 d +{-# INLINE w32_sdp #-} + +-- Read byte at offset i (>= 32) from (sep || dat || 0x80 || zeros). +byte_sdp :: Word8 -> BS.ByteString -> Int -> Word8 +byte_sdp sep dat@(BI.PS _ _ l) i + | i == 32 = sep + | i < 33 + l = BU.unsafeIndex dat (i - 33) + | i == 33 + l = 0x80 + | otherwise = 0x00 +{-# INLINE byte_sdp #-} + +w8_w32 :: Exts.Word8# -> Exts.Word32# +w8_w32 w = Exts.wordToWord32# (Exts.word8ToWord# w) +{-# INLINE w8_w32 #-} --- RFC 6234 4.1 message padding -unsafe_padding :: BS.ByteString -> Word64 -> BS.ByteString -unsafe_padding (BI.PS fp off r) len - | r < 56 = BI.unsafeCreate 64 $ \p -> do - BI.unsafeWithForeignPtr fp $ \src -> - copyBytes p (src `plusPtr` off) r - poke (p `plusPtr` r) (0x80 :: Word8) - fillBytes (p `plusPtr` (r + 1)) 0 (55 - r) - poke_word64be (p `plusPtr` 56) (len * 8) - | otherwise = BI.unsafeCreate 128 $ \p -> do - BI.unsafeWithForeignPtr fp $ \src -> - copyBytes p (src `plusPtr` off) r - poke (p `plusPtr` r) (0x80 :: Word8) - fillBytes (p `plusPtr` (r + 1)) 0 (63 - r) - fillBytes (p `plusPtr` 64) 0 56 - poke_word64be (p `plusPtr` 120) (len * 8) - where - poke_word64be :: Ptr Word8 -> Word64 -> IO () - poke_word64be p w = do - poke p (fi (w `B.unsafeShiftR` 56) :: Word8) - poke (p `plusPtr` 1) (fi (w `B.unsafeShiftR` 48) :: Word8) - poke (p `plusPtr` 2) (fi (w `B.unsafeShiftR` 40) :: Word8) - poke (p `plusPtr` 3) (fi (w `B.unsafeShiftR` 32) :: Word8) - poke (p `plusPtr` 4) (fi (w `B.unsafeShiftR` 24) :: Word8) - poke (p `plusPtr` 5) (fi (w `B.unsafeShiftR` 16) :: Word8) - poke (p `plusPtr` 6) (fi (w `B.unsafeShiftR` 8) :: Word8) - poke (p `plusPtr` 7) (fi w :: Word8) diff --git a/lib/Crypto/Hash/SHA256/Lazy.hs b/lib/Crypto/Hash/SHA256/Lazy.hs @@ -32,9 +32,6 @@ import Data.Word (Word64) import Foreign.ForeignPtr (plusForeignPtr) import Crypto.Hash.SHA256.Internal --- preliminary utils - --- keystroke saver fi :: (Integral a, Num b) => a -> b fi = fromIntegral {-# INLINE fi #-} @@ -114,15 +111,11 @@ hash_lazy bl = cat (go (iv ()) (pad_lazy bl)) where go !acc bs | BL.null bs = acc | otherwise = case splitAt64 bs of - SLPair c r -> go (unsafe_hash_alg acc c) r + SLPair c r -> go (update acc (parse c 0)) r -- HMAC ----------------------------------------------------------------------- -- https://datatracker.ietf.org/doc/html/rfc2104#section-2 -data KeyAndLen = KeyAndLen - {-# UNPACK #-} !BS.ByteString - {-# UNPACK #-} !Int - -- | Produce a message authentication code for a lazy bytestring, based -- on the provided (strict, bytestring) key, via SHA-256. -- @@ -151,7 +144,7 @@ hmac_lazy mk@(BI.PS _ _ l) text = go !acc b | BS.null b = acc | otherwise = case unsafe_splitAt 64 b of - SSPair c r -> go (unsafe_hash_alg acc c) r + SSPair c r -> go (update acc (parse c 0)) r pad m@(BI.PS _ _ (fi -> len)) | len < 128 = to_strict_small padded @@ -176,6 +169,4 @@ hmac_lazy mk@(BI.PS _ _ l) text = | j == 0 = acc | otherwise = loop8 (pred j) (acc <> BSB.word8 0x00) - !(KeyAndLen k lk) - | l > 64 = KeyAndLen (hash mk) 32 - | otherwise = KeyAndLen mk l + !(k, lk) = if l > 64 then (hash mk, 32) else (mk, l)