sha256

Pure Haskell SHA-256, HMAC-SHA256 as specified by RFC's 6234 and 2104.
git clone git://git.ppad.tech/sha256.git
Log | Files | Refs | README | LICENSE

commit d1bea758e16402da7a9875cda7c81ecda82447ff
parent ff08521ffe124aa922f46fde544396af98393fa4
Author: Jared Tobin <jared@jtobin.io>
Date:   Thu, 12 Sep 2024 17:11:10 +0400

lib: unlifting experiments

This commit (which I will immediately revert) illustrates another sha256
function, 'u_hash_lazy', which uses unlifted internals. It eliminates
allocation entirely in certain areas, though time is mostly unaffected.

An accompanying binary, 'hash-large', is useful for profiling. Use

  cabal build hash-large --enable-profiling

to build it, and then run the binary with the desired RTS flags (e.g.
-s, -p, -hT) to profile things accordingly.

Diffstat:
M.gitignore | 1+
Mbench/Main.hs | 1+
Mlib/Crypto/Hash/SHA256.hs | 597+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msrc/HashLarge.hs | 6+++---
Mtest/Main.hs | 12++++++++++++
5 files changed, 614 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,2 +1,3 @@ dist-newstyle/ ppad-sha256-hash-large.dat +hash-large* diff --git a/bench/Main.hs b/bench/Main.hs @@ -18,6 +18,7 @@ suite = env setup $ \ ~(bs, bl) -> bgroup "SHA256 (32B input)" [ bench "hash" $ whnf SHA256.hash bs , bench "hash_lazy" $ whnf SHA256.hash_lazy bl + , bench "u_hash_lazy" $ whnf SHA256.u_hash_lazy bl ] , bgroup "HMAC-SHA256 (32B input)" [ bench "hmac" $ whnf (SHA256.hmac "key") bs diff --git a/lib/Crypto/Hash/SHA256.hs b/lib/Crypto/Hash/SHA256.hs @@ -1,7 +1,10 @@ {-# OPTIONS_GHC -funbox-small-strict-fields #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE ExtendedLiterals #-} {-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MagicHash #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE ViewPatterns #-} -- | @@ -20,6 +23,8 @@ module Crypto.Hash.SHA256 ( hash , hash_lazy + , u_hash_lazy + -- * SHA256-based MAC functions , hmac , hmac_lazy @@ -33,6 +38,7 @@ import qualified Data.ByteString.Lazy as BL import qualified Data.ByteString.Unsafe as BU import qualified Data.List as L import Data.Word (Word32, Word64) +import GHC.Exts -- preliminary utils @@ -50,6 +56,7 @@ blocks_lazy s = loop where | BL.null bs = [] | otherwise = case BL.splitAt (fi s) bs of (c, r) -> BL.toStrict c : loop r +{-# SCC blocks_lazy #-} -- verbatim from Data.Binary word32be :: BS.ByteString -> Word32 @@ -60,6 +67,35 @@ word32be s = (fromIntegral (s `BU.unsafeIndex` 3)) {-# INLINE word32be #-} +-- unlifted Word32 bit twiddling (see GHC.Internal.Word) + +(.&.#) :: Word32# -> Word32# -> Word32# +x# .&.# y# = wordToWord32# ((word32ToWord# x#) `and#` (word32ToWord# y#)) + +(.^.#) :: Word32# -> Word32# -> Word32# +x# .^.# y# = wordToWord32# ((word32ToWord# x#) `xor#` (word32ToWord# y#)) + +complement# :: Word32# -> Word32# +complement# x# = wordToWord32# (not# (word32ToWord# x#)) + +rotate# :: Word32# -> Int# -> Word32# +rotate# x# i# + | isTrue# (i'# ==# 0#) = x# + | otherwise = wordToWord32# $ + ((word32ToWord# x#) `uncheckedShiftL#` i'#) + `or#` ((word32ToWord# x#) `uncheckedShiftRL#` (32# -# i'#)) + where + !i'# = word2Int# (int2Word# i# `and#` 31##) +{-# INLINE rotate# #-} + +rotateR# :: Word32# -> Int# -> Word32# +rotateR# x# i# = x# `rotate#` (negateInt# i#) +{-# INLINE rotateR# #-} + +unsafeShiftR# :: Word32# -> Int# -> Word32# +unsafeShiftR# x# i# = wordToWord32# ((word32ToWord# x#) `uncheckedShiftRL#` i#) +{-# INLINE unsafeShiftR# #-} + -- message padding and parsing -- https://datatracker.ietf.org/doc/html/rfc6234#section-4.1 @@ -89,6 +125,7 @@ pad_lazy (BL.toChunks -> m) = BL.fromChunks (walk 0 m) where | otherwise = let nacc = bs <> BSB.word8 0x00 in padding l (pred k) nacc +{-# SCC pad_lazy #-} -- functions and constants used -- https://datatracker.ietf.org/doc/html/rfc6234#section-5.1 @@ -111,6 +148,24 @@ ssig0 x = B.rotateR x 7 `B.xor` B.rotateR x 18 `B.xor` B.unsafeShiftR x 3 ssig1 :: Word32 -> Word32 ssig1 x = B.rotateR x 17 `B.xor` B.rotateR x 19 `B.xor` B.unsafeShiftR x 10 +u_ch :: Word32# -> Word32# -> Word32# -> Word32# +u_ch x# y# z# = (x# .&.# y#) .^.# (complement# x# .&.# z#) + +u_maj :: Word32# -> Word32# -> Word32# -> Word32# +u_maj x# y# z# = (x# .&.# y#) .^.# (x# .&.# z#) .^.# (y# .&.# z#) + +u_bsig0 :: Word32# -> Word32# +u_bsig0 x# = rotateR# x# 2# .^.# rotateR# x# 13# .^.# rotateR# x# 22# + +u_bsig1 :: Word32# -> Word32# +u_bsig1 x# = rotateR# x# 6# .^.# rotateR# x# 11# .^.# rotateR# x# 25# + +u_ssig0 :: Word32# -> Word32# +u_ssig0 x# = rotateR# x# 7# .^.# rotateR# x# 18# .^.# unsafeShiftR# x# 3# + +u_ssig1 :: Word32# -> Word32# +u_ssig1 x# = rotateR# x# 17# .^.# rotateR# x# 19# .^.# unsafeShiftR# x# 10# + data Schedule = Schedule { w00 :: !Word32, w01 :: !Word32, w02 :: !Word32, w03 :: !Word32 , w04 :: !Word32, w05 :: !Word32, w06 :: !Word32, w07 :: !Word32 @@ -130,6 +185,73 @@ data Schedule = Schedule { , w60 :: !Word32, w61 :: !Word32, w62 :: !Word32, w63 :: !Word32 } deriving (Eq, Show) +type Sd = (# + Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + , Word32# + #) + choose_w :: Schedule -> Int -> Word32 choose_w s = \case 0 -> w00 s; 1 -> w01 s; 2 -> w02 s; 3 -> w03 s @@ -150,6 +272,140 @@ choose_w s = \case 60 -> w60 s; 61 -> w61 s; 62 -> w62 s; 63 -> w63 s _ -> error "ppad-sha256: internal error (invalid schedule index)" +u_choose_w :: Sd -> Int# -> Word32# +u_choose_w s i# = + case s of + (# w00# + , w01# + , w02# + , w03# + , w04# + , w05# + , w06# + , w07# + , w08# + , w09# + , w10# + , w11# + , w12# + , w13# + , w14# + , w15# + , w16# + , w17# + , w18# + , w19# + , w20# + , w21# + , w22# + , w23# + , w24# + , w25# + , w26# + , w27# + , w28# + , w29# + , w30# + , w31# + , w32# + , w33# + , w34# + , w35# + , w36# + , w37# + , w38# + , w39# + , w40# + , w41# + , w42# + , w43# + , w44# + , w45# + , w46# + , w47# + , w48# + , w49# + , w50# + , w51# + , w52# + , w53# + , w54# + , w55# + , w56# + , w57# + , w58# + , w59# + , w60# + , w61# + , w62# + , w63# #) + | isTrue# (i# ==# 0#) -> w00# + | isTrue# (i# ==# 1#) -> w01# + | isTrue# (i# ==# 2#) -> w02# + | isTrue# (i# ==# 3#) -> w03# + | isTrue# (i# ==# 4#) -> w04# + | isTrue# (i# ==# 5#) -> w05# + | isTrue# (i# ==# 6#) -> w06# + | isTrue# (i# ==# 7#) -> w07# + | isTrue# (i# ==# 8#) -> w08# + | isTrue# (i# ==# 9#) -> w09# + | isTrue# (i# ==# 10#) -> w10# + | isTrue# (i# ==# 11#) -> w11# + | isTrue# (i# ==# 12#) -> w12# + | isTrue# (i# ==# 13#) -> w13# + | isTrue# (i# ==# 14#) -> w14# + | isTrue# (i# ==# 15#) -> w15# + | isTrue# (i# ==# 16#) -> w16# + | isTrue# (i# ==# 17#) -> w17# + | isTrue# (i# ==# 18#) -> w18# + | isTrue# (i# ==# 19#) -> w19# + | isTrue# (i# ==# 20#) -> w20# + | isTrue# (i# ==# 21#) -> w21# + | isTrue# (i# ==# 22#) -> w22# + | isTrue# (i# ==# 23#) -> w23# + | isTrue# (i# ==# 24#) -> w24# + | isTrue# (i# ==# 25#) -> w25# + | isTrue# (i# ==# 26#) -> w26# + | isTrue# (i# ==# 27#) -> w27# + | isTrue# (i# ==# 28#) -> w28# + | isTrue# (i# ==# 29#) -> w29# + | isTrue# (i# ==# 30#) -> w30# + | isTrue# (i# ==# 31#) -> w31# + | isTrue# (i# ==# 32#) -> w32# + | isTrue# (i# ==# 33#) -> w33# + | isTrue# (i# ==# 34#) -> w34# + | isTrue# (i# ==# 35#) -> w35# + | isTrue# (i# ==# 36#) -> w36# + | isTrue# (i# ==# 37#) -> w37# + | isTrue# (i# ==# 38#) -> w38# + | isTrue# (i# ==# 39#) -> w39# + | isTrue# (i# ==# 40#) -> w40# + | isTrue# (i# ==# 41#) -> w41# + | isTrue# (i# ==# 42#) -> w42# + | isTrue# (i# ==# 43#) -> w43# + | isTrue# (i# ==# 44#) -> w44# + | isTrue# (i# ==# 45#) -> w45# + | isTrue# (i# ==# 46#) -> w46# + | isTrue# (i# ==# 47#) -> w47# + | isTrue# (i# ==# 48#) -> w48# + | isTrue# (i# ==# 49#) -> w49# + | isTrue# (i# ==# 50#) -> w50# + | isTrue# (i# ==# 51#) -> w51# + | isTrue# (i# ==# 52#) -> w52# + | isTrue# (i# ==# 53#) -> w53# + | isTrue# (i# ==# 54#) -> w54# + | isTrue# (i# ==# 55#) -> w55# + | isTrue# (i# ==# 56#) -> w56# + | isTrue# (i# ==# 57#) -> w57# + | isTrue# (i# ==# 58#) -> w58# + | isTrue# (i# ==# 59#) -> w59# + | isTrue# (i# ==# 60#) -> w60# + | isTrue# (i# ==# 61#) -> w61# + | isTrue# (i# ==# 62#) -> w62# + | isTrue# (i# ==# 63#) -> w63# + | otherwise -> error "boom" +{-# SCC u_choose_w #-} + -- k0-k63 are the first 32 bits of the fractional parts of the cube -- roots of the first sixty-four prime numbers choose_k :: Int -> Word32 @@ -172,6 +428,75 @@ choose_k = \case 60 -> 0x90befffa; 61 -> 0xa4506ceb; 62 -> 0xbef9a3f7; 63 -> 0xc67178f2 _ -> error "ppad-sha256: internal error (invalid constant index)" +u_choose_k :: Int# -> Word32# +u_choose_k i# + | isTrue# (i# ==# 0#) = 0x428a2f98#Word32 + | isTrue# (i# ==# 1#) = 0x71374491#Word32 + | isTrue# (i# ==# 2#) = 0xb5c0fbcf#Word32 + | isTrue# (i# ==# 3#) = 0xe9b5dba5#Word32 + | isTrue# (i# ==# 4#) = 0x3956c25b#Word32 + | isTrue# (i# ==# 5#) = 0x59f111f1#Word32 + | isTrue# (i# ==# 6#) = 0x923f82a4#Word32 + | isTrue# (i# ==# 7#) = 0xab1c5ed5#Word32 + | isTrue# (i# ==# 8#) = 0xd807aa98#Word32 + | isTrue# (i# ==# 9#) = 0x12835b01#Word32 + | isTrue# (i# ==# 10#) = 0x243185be#Word32 + | isTrue# (i# ==# 11#) = 0x550c7dc3#Word32 + | isTrue# (i# ==# 12#) = 0x72be5d74#Word32 + | isTrue# (i# ==# 13#) = 0x80deb1fe#Word32 + | isTrue# (i# ==# 14#) = 0x9bdc06a7#Word32 + | isTrue# (i# ==# 15#) = 0xc19bf174#Word32 + | isTrue# (i# ==# 16#) = 0xe49b69c1#Word32 + | isTrue# (i# ==# 17#) = 0xefbe4786#Word32 + | isTrue# (i# ==# 18#) = 0x0fc19dc6#Word32 + | isTrue# (i# ==# 19#) = 0x240ca1cc#Word32 + | isTrue# (i# ==# 20#) = 0x2de92c6f#Word32 + | isTrue# (i# ==# 21#) = 0x4a7484aa#Word32 + | isTrue# (i# ==# 22#) = 0x5cb0a9dc#Word32 + | isTrue# (i# ==# 23#) = 0x76f988da#Word32 + | isTrue# (i# ==# 24#) = 0x983e5152#Word32 + | isTrue# (i# ==# 25#) = 0xa831c66d#Word32 + | isTrue# (i# ==# 26#) = 0xb00327c8#Word32 + | isTrue# (i# ==# 27#) = 0xbf597fc7#Word32 + | isTrue# (i# ==# 28#) = 0xc6e00bf3#Word32 + | isTrue# (i# ==# 29#) = 0xd5a79147#Word32 + | isTrue# (i# ==# 30#) = 0x06ca6351#Word32 + | isTrue# (i# ==# 31#) = 0x14292967#Word32 + | isTrue# (i# ==# 32#) = 0x27b70a85#Word32 + | isTrue# (i# ==# 33#) = 0x2e1b2138#Word32 + | isTrue# (i# ==# 34#) = 0x4d2c6dfc#Word32 + | isTrue# (i# ==# 35#) = 0x53380d13#Word32 + | isTrue# (i# ==# 36#) = 0x650a7354#Word32 + | isTrue# (i# ==# 37#) = 0x766a0abb#Word32 + | isTrue# (i# ==# 38#) = 0x81c2c92e#Word32 + | isTrue# (i# ==# 39#) = 0x92722c85#Word32 + | isTrue# (i# ==# 40#) = 0xa2bfe8a1#Word32 + | isTrue# (i# ==# 41#) = 0xa81a664b#Word32 + | isTrue# (i# ==# 42#) = 0xc24b8b70#Word32 + | isTrue# (i# ==# 43#) = 0xc76c51a3#Word32 + | isTrue# (i# ==# 44#) = 0xd192e819#Word32 + | isTrue# (i# ==# 45#) = 0xd6990624#Word32 + | isTrue# (i# ==# 46#) = 0xf40e3585#Word32 + | isTrue# (i# ==# 47#) = 0x106aa070#Word32 + | isTrue# (i# ==# 48#) = 0x19a4c116#Word32 + | isTrue# (i# ==# 49#) = 0x1e376c08#Word32 + | isTrue# (i# ==# 50#) = 0x2748774c#Word32 + | isTrue# (i# ==# 51#) = 0x34b0bcb5#Word32 + | isTrue# (i# ==# 52#) = 0x391c0cb3#Word32 + | isTrue# (i# ==# 53#) = 0x4ed8aa4a#Word32 + | isTrue# (i# ==# 54#) = 0x5b9cca4f#Word32 + | isTrue# (i# ==# 55#) = 0x682e6ff3#Word32 + | isTrue# (i# ==# 56#) = 0x748f82ee#Word32 + | isTrue# (i# ==# 57#) = 0x78a5636f#Word32 + | isTrue# (i# ==# 58#) = 0x84c87814#Word32 + | isTrue# (i# ==# 59#) = 0x8cc70208#Word32 + | isTrue# (i# ==# 60#) = 0x90befffa#Word32 + | isTrue# (i# ==# 61#) = 0xa4506ceb#Word32 + | isTrue# (i# ==# 62#) = 0xbef9a3f7#Word32 + | isTrue# (i# ==# 63#) = 0xc67178f2#Word32 + | otherwise = error "ppad-sha256: internal error (invalid constant index)" +{-# SCC u_choose_k #-} + -- initialization -- https://datatracker.ietf.org/doc/html/rfc6234#section-6.1 @@ -187,6 +512,9 @@ iv = Registers 0x6a09e667 0xbb67ae85 0x3c6ef372 0xa54ff53a 0x510e527f 0x9b05688c 0x1f83d9ab 0x5be0cd19 +type Rs = (# Word32#, Word32#, Word32#, Word32# + , Word32#, Word32#, Word32#, Word32# #) + -- processing -- https://datatracker.ietf.org/doc/html/rfc6234#section-6.2 @@ -197,6 +525,12 @@ data Block = Block { , m12 :: !Word32, m13 :: !Word32, m14 :: !Word32, m15 :: !Word32 } deriving (Eq, Show) +type Bl = (# + Word32#, Word32#, Word32#, Word32# + , Word32#, Word32#, Word32#, Word32# + , Word32#, Word32#, Word32#, Word32# + , Word32#, Word32#, Word32#, Word32# #) + -- parse a 512-bit block into sixteen 32-bit words parse :: BS.ByteString -> Block parse bs = @@ -220,6 +554,199 @@ parse bs = then Block {..} else error "ppad-sha256: internal error (bytes remaining)" +unW32 :: Word32 -> Word32# +unW32 (fi -> i) = case i of + I# i# -> wordToWord32# (int2Word# i#) + +u_parse :: BS.ByteString -> Bl +u_parse bs = + let (word32be -> m00, t00) = BS.splitAt 4 bs + (word32be -> m01, t01) = BS.splitAt 4 t00 + (word32be -> m02, t02) = BS.splitAt 4 t01 + (word32be -> m03, t03) = BS.splitAt 4 t02 + (word32be -> m04, t04) = BS.splitAt 4 t03 + (word32be -> m05, t05) = BS.splitAt 4 t04 + (word32be -> m06, t06) = BS.splitAt 4 t05 + (word32be -> m07, t07) = BS.splitAt 4 t06 + (word32be -> m08, t08) = BS.splitAt 4 t07 + (word32be -> m09, t09) = BS.splitAt 4 t08 + (word32be -> m10, t10) = BS.splitAt 4 t09 + (word32be -> m11, t11) = BS.splitAt 4 t10 + (word32be -> m12, t12) = BS.splitAt 4 t11 + (word32be -> m13, t13) = BS.splitAt 4 t12 + (word32be -> m14, t14) = BS.splitAt 4 t13 + (word32be -> m15, t15) = BS.splitAt 4 t14 + in if BS.null t15 + then (# unW32 m00 + , unW32 m01 + , unW32 m02 + , unW32 m03 + , unW32 m04 + , unW32 m05 + , unW32 m06 + , unW32 m07 + , unW32 m08 + , unW32 m09 + , unW32 m10 + , unW32 m11 + , unW32 m12 + , unW32 m13 + , unW32 m14 + , unW32 m15 #) + else error "ppad-sha256: internal error (bytes remaining)" +{-# SCC u_parse #-} + +p32# :: Word32# -> Word32# -> Word32# +p32# = plusWord32# + +u_prepare_schedule :: Bl -> Sd +u_prepare_schedule b = case b of + (# m00# + , m01# + , m02# + , m03# + , m04# + , m05# + , m06# + , m07# + , m08# + , m09# + , m10# + , m11# + , m12# + , m13# + , m14# + , m15# #) -> + let w00# = m00# + w01# = m01# + w02# = m02# + w03# = m03# + w04# = m04# + w05# = m05# + w06# = m06# + w07# = m07# + w08# = m08# + w09# = m09# + w10# = m10# + w11# = m11# + w12# = m12# + w13# = m13# + w14# = m14# + w15# = m15# + w16# = p32# (u_ssig1 w14#) (p32# w09# (p32# (u_ssig0 w01#) w00#)) + w17# = p32# (u_ssig1 w15#) (p32# w10# (p32# (u_ssig0 w02#) w01#)) + w18# = p32# (u_ssig1 w16#) (p32# w11# (p32# (u_ssig0 w03#) w02#)) + w19# = p32# (u_ssig1 w17#) (p32# w12# (p32# (u_ssig0 w04#) w03#)) + w20# = p32# (u_ssig1 w18#) (p32# w13# (p32# (u_ssig0 w05#) w04#)) + w21# = p32# (u_ssig1 w19#) (p32# w14# (p32# (u_ssig0 w06#) w05#)) + w22# = p32# (u_ssig1 w20#) (p32# w15# (p32# (u_ssig0 w07#) w06#)) + w23# = p32# (u_ssig1 w21#) (p32# w16# (p32# (u_ssig0 w08#) w07#)) + w24# = p32# (u_ssig1 w22#) (p32# w17# (p32# (u_ssig0 w09#) w08#)) + w25# = p32# (u_ssig1 w23#) (p32# w18# (p32# (u_ssig0 w10#) w09#)) + w26# = p32# (u_ssig1 w24#) (p32# w19# (p32# (u_ssig0 w11#) w10#)) + w27# = p32# (u_ssig1 w25#) (p32# w20# (p32# (u_ssig0 w12#) w11#)) + w28# = p32# (u_ssig1 w26#) (p32# w21# (p32# (u_ssig0 w13#) w12#)) + w29# = p32# (u_ssig1 w27#) (p32# w22# (p32# (u_ssig0 w14#) w13#)) + w30# = p32# (u_ssig1 w28#) (p32# w23# (p32# (u_ssig0 w15#) w14#)) + w31# = p32# (u_ssig1 w29#) (p32# w24# (p32# (u_ssig0 w16#) w15#)) + w32# = p32# (u_ssig1 w30#) (p32# w25# (p32# (u_ssig0 w17#) w16#)) + w33# = p32# (u_ssig1 w31#) (p32# w26# (p32# (u_ssig0 w18#) w17#)) + w34# = p32# (u_ssig1 w32#) (p32# w27# (p32# (u_ssig0 w19#) w18#)) + w35# = p32# (u_ssig1 w33#) (p32# w28# (p32# (u_ssig0 w20#) w19#)) + w36# = p32# (u_ssig1 w34#) (p32# w29# (p32# (u_ssig0 w21#) w20#)) + w37# = p32# (u_ssig1 w35#) (p32# w30# (p32# (u_ssig0 w22#) w21#)) + w38# = p32# (u_ssig1 w36#) (p32# w31# (p32# (u_ssig0 w23#) w22#)) + w39# = p32# (u_ssig1 w37#) (p32# w32# (p32# (u_ssig0 w24#) w23#)) + w40# = p32# (u_ssig1 w38#) (p32# w33# (p32# (u_ssig0 w25#) w24#)) + w41# = p32# (u_ssig1 w39#) (p32# w34# (p32# (u_ssig0 w26#) w25#)) + w42# = p32# (u_ssig1 w40#) (p32# w35# (p32# (u_ssig0 w27#) w26#)) + w43# = p32# (u_ssig1 w41#) (p32# w36# (p32# (u_ssig0 w28#) w27#)) + w44# = p32# (u_ssig1 w42#) (p32# w37# (p32# (u_ssig0 w29#) w28#)) + w45# = p32# (u_ssig1 w43#) (p32# w38# (p32# (u_ssig0 w30#) w29#)) + w46# = p32# (u_ssig1 w44#) (p32# w39# (p32# (u_ssig0 w31#) w30#)) + w47# = p32# (u_ssig1 w45#) (p32# w40# (p32# (u_ssig0 w32#) w31#)) + w48# = p32# (u_ssig1 w46#) (p32# w41# (p32# (u_ssig0 w33#) w32#)) + w49# = p32# (u_ssig1 w47#) (p32# w42# (p32# (u_ssig0 w34#) w33#)) + w50# = p32# (u_ssig1 w48#) (p32# w43# (p32# (u_ssig0 w35#) w34#)) + w51# = p32# (u_ssig1 w49#) (p32# w44# (p32# (u_ssig0 w36#) w35#)) + w52# = p32# (u_ssig1 w50#) (p32# w45# (p32# (u_ssig0 w37#) w36#)) + w53# = p32# (u_ssig1 w51#) (p32# w46# (p32# (u_ssig0 w38#) w37#)) + w54# = p32# (u_ssig1 w52#) (p32# w47# (p32# (u_ssig0 w39#) w38#)) + w55# = p32# (u_ssig1 w53#) (p32# w48# (p32# (u_ssig0 w40#) w39#)) + w56# = p32# (u_ssig1 w54#) (p32# w49# (p32# (u_ssig0 w41#) w40#)) + w57# = p32# (u_ssig1 w55#) (p32# w50# (p32# (u_ssig0 w42#) w41#)) + w58# = p32# (u_ssig1 w56#) (p32# w51# (p32# (u_ssig0 w43#) w42#)) + w59# = p32# (u_ssig1 w57#) (p32# w52# (p32# (u_ssig0 w44#) w43#)) + w60# = p32# (u_ssig1 w58#) (p32# w53# (p32# (u_ssig0 w45#) w44#)) + w61# = p32# (u_ssig1 w59#) (p32# w54# (p32# (u_ssig0 w46#) w45#)) + w62# = p32# (u_ssig1 w60#) (p32# w55# (p32# (u_ssig0 w47#) w46#)) + w63# = p32# (u_ssig1 w61#) (p32# w56# (p32# (u_ssig0 w48#) w47#)) + in (# w00# + , w01# + , w02# + , w03# + , w04# + , w05# + , w06# + , w07# + , w08# + , w09# + , w10# + , w11# + , w12# + , w13# + , w14# + , w15# + , w16# + , w17# + , w18# + , w19# + , w20# + , w21# + , w22# + , w23# + , w24# + , w25# + , w26# + , w27# + , w28# + , w29# + , w30# + , w31# + , w32# + , w33# + , w34# + , w35# + , w36# + , w37# + , w38# + , w39# + , w40# + , w41# + , w42# + , w43# + , w44# + , w45# + , w46# + , w47# + , w48# + , w49# + , w50# + , w51# + , w52# + , w53# + , w54# + , w55# + , w56# + , w57# + , w58# + , w59# + , w60# + , w61# + , w62# + , w63# #) +{-# SCC u_prepare_schedule #-} + -- RFC 6234 6.2 step 1 prepare_schedule :: Block -> Schedule prepare_schedule Block {..} = Schedule {..} where @@ -302,10 +829,45 @@ block_hash r@Registers {..} s = loop 0 r where nacc = Registers (t1 + t2) a b c (d + t1) e f g in loop (succ t) nacc +u_block_hash :: Rs -> Sd -> Rs +u_block_hash r@(# h0#, h1#, h2#, h3#, h4#, h5#, h6#, h7# #) s# = loop 0# r where + loop t# !(# a#, b#, c#, d#, e#, f#, g#, h# #) + | isTrue# (t# ==# 64#) = (# + plusWord32# a# h0# + , plusWord32# b# h1# + , plusWord32# c# h2# + , plusWord32# d# h3# + , plusWord32# e# h4# + , plusWord32# f# h5# + , plusWord32# g# h6# + , plusWord32# h# h7# + #) + | otherwise = + let t1# = {-# SCC t1 #-} + plusWord32# h# + (plusWord32# (u_bsig1 e#) + (plusWord32# (u_ch e# f# g#) + (plusWord32# (u_choose_k t#) (u_choose_w s# t#)))) + t2# = {-# SCC t2 #-} plusWord32# (u_bsig0 a#) (u_maj a# b# c#) + nacc = (# plusWord32# t1# t2# + , a# + , b# + , c# + , plusWord32# d# t1# + , e# + , f# + , g# + #) + in loop (t# +# 1#) nacc +{-# SCC u_block_hash #-} + -- RFC 6234 6.2 block pipeline hash_alg :: Registers -> BS.ByteString -> Registers hash_alg rs = block_hash rs . prepare_schedule . parse +u_hash_alg :: Rs -> BS.ByteString -> Rs +u_hash_alg rs# bs = u_block_hash rs# (u_prepare_schedule (u_parse bs)) + -- register concatenation cat :: Registers -> BS.ByteString cat Registers {..} = BL.toStrict . BSB.toLazyByteString $ mconcat [ @@ -319,6 +881,23 @@ cat Registers {..} = BL.toStrict . BSB.toLazyByteString $ mconcat [ , BSB.word32BE h7 ] +lw32 :: Word32# -> Word32 +lw32 w = fi i + where i = I# (word2Int# (word32ToWord# w)) + +u_cat :: Rs -> BS.ByteString +u_cat (# h0#, h1#, h2#, h3#, h4#, h5#, h6#, h7# #) = + BL.toStrict . BSB.toLazyByteString $ mconcat [ + BSB.word32BE (lw32 h0#) + , BSB.word32BE (lw32 h1#) + , BSB.word32BE (lw32 h2#) + , BSB.word32BE (lw32 h3#) + , BSB.word32BE (lw32 h4#) + , BSB.word32BE (lw32 h5#) + , BSB.word32BE (lw32 h6#) + , BSB.word32BE (lw32 h7#) + ] + -- | Compute a condensed representation of a strict bytestring via -- SHA-256. -- @@ -347,6 +926,24 @@ hash_lazy = . blocks_lazy 64 . pad_lazy +u_hash_lazy :: BL.ByteString -> BS.ByteString +u_hash_lazy bl = + u_cat (go u_hash_alg r_iv (pad_lazy bl)) + where + r_iv :: Rs + r_iv = + (# 0x6a09e667#Word32, 0xbb67ae85#Word32, 0x3c6ef372#Word32, 0xa54ff53a#Word32 + , 0x510e527f#Word32, 0x9b05688c#Word32, 0x1f83d9ab#Word32, 0x5be0cd19#Word32 + #) + + go :: (Rs -> BS.ByteString -> Rs) -> Rs -> BL.ByteString -> Rs + go alg !acc# bs + | BL.null bs = acc# + | otherwise = case BL.splitAt 64 bs of + (c, r) -> go alg (alg acc# (BL.toStrict c)) r +{-# SCC u_hash_lazy #-} + + -- HMAC -- https://datatracker.ietf.org/doc/html/rfc2104#section-2 diff --git a/src/HashLarge.hs b/src/HashLarge.hs @@ -20,12 +20,12 @@ main = do hash :: IO () hash = do - input <- BL.readFile "ppad-sha256-large.dat" - let digest = B16.encode $ SHA256.hash_lazy input + input <- BL.readFile "ppad-sha256-hash-large.dat" + let digest = B16.encode $ SHA256.u_hash_lazy input print digest make :: IO () -make = BL.writeFile "ppad-sha256-large.dat" big_input where +make = BL.writeFile "ppad-sha256-hash-large.dat" big_input where big_input :: BL.ByteString big_input = go (16777216 :: Int) mempty where go j acc diff --git a/test/Main.hs b/test/Main.hs @@ -134,6 +134,13 @@ unit_tests = testGroup "ppad-sha256" [ , cmp_hash_lazy "hv3" hv3_put hv3_pec , cmp_hash_lazy "hv4" hv4_put hv4_pec ] + , testGroup "u_hash_lazy" [ + cmp_u_hash_lazy "hv0" hv0_put hv0_pec + , cmp_u_hash_lazy "hv1" hv1_put hv1_pec + , cmp_u_hash_lazy "hv2" hv2_put hv2_pec + , cmp_u_hash_lazy "hv3" hv3_put hv3_pec + , cmp_u_hash_lazy "hv4" hv4_put hv4_pec + ] -- uncomment me to run (slow, ~40s) -- -- , testGroup "hash_lazy (1GB input)" [ @@ -190,6 +197,11 @@ cmp_hash_lazy msg (BL.fromStrict -> put) pec = testCase msg $ do let out = B16.encode (SHA256.hash_lazy put) assertEqual mempty pec out +cmp_u_hash_lazy :: String -> BS.ByteString -> BS.ByteString -> TestTree +cmp_u_hash_lazy msg (BL.fromStrict -> put) pec = testCase msg $ do + let out = B16.encode (SHA256.u_hash_lazy put) + assertEqual mempty pec out + cmp_hmac :: String -> BS.ByteString -> BS.ByteString -> BS.ByteString -> TestTree cmp_hmac msg key put pec = testCase msg $ do