commit d1bea758e16402da7a9875cda7c81ecda82447ff
parent ff08521ffe124aa922f46fde544396af98393fa4
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 12 Sep 2024 17:11:10 +0400
lib: unlifting experiments
This commit (which I will immediately revert) illustrates another sha256
function, 'u_hash_lazy', which uses unlifted internals. It eliminates
allocation entirely in certain areas, though time is mostly unaffected.
An accompanying binary, 'hash-large', is useful for profiling. Use
cabal build hash-large --enable-profiling
to build it, and then run the binary with the desired RTS flags (e.g.
-s, -p, -hT) to profile things accordingly.
Diffstat:
5 files changed, 614 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
dist-newstyle/
ppad-sha256-hash-large.dat
+hash-large*
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -18,6 +18,7 @@ suite = env setup $ \ ~(bs, bl) ->
bgroup "SHA256 (32B input)" [
bench "hash" $ whnf SHA256.hash bs
, bench "hash_lazy" $ whnf SHA256.hash_lazy bl
+ , bench "u_hash_lazy" $ whnf SHA256.u_hash_lazy bl
]
, bgroup "HMAC-SHA256 (32B input)" [
bench "hmac" $ whnf (SHA256.hmac "key") bs
diff --git a/lib/Crypto/Hash/SHA256.hs b/lib/Crypto/Hash/SHA256.hs
@@ -1,7 +1,10 @@
{-# OPTIONS_GHC -funbox-small-strict-fields #-}
{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ExtendedLiterals #-}
{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MagicHash #-}
{-# LANGUAGE RecordWildCards #-}
+{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE ViewPatterns #-}
-- |
@@ -20,6 +23,8 @@ module Crypto.Hash.SHA256 (
hash
, hash_lazy
+ , u_hash_lazy
+
-- * SHA256-based MAC functions
, hmac
, hmac_lazy
@@ -33,6 +38,7 @@ import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Unsafe as BU
import qualified Data.List as L
import Data.Word (Word32, Word64)
+import GHC.Exts
-- preliminary utils
@@ -50,6 +56,7 @@ blocks_lazy s = loop where
| BL.null bs = []
| otherwise = case BL.splitAt (fi s) bs of
(c, r) -> BL.toStrict c : loop r
+{-# SCC blocks_lazy #-}
-- verbatim from Data.Binary
word32be :: BS.ByteString -> Word32
@@ -60,6 +67,35 @@ word32be s =
(fromIntegral (s `BU.unsafeIndex` 3))
{-# INLINE word32be #-}
+-- unlifted Word32 bit twiddling (see GHC.Internal.Word)
+
+(.&.#) :: Word32# -> Word32# -> Word32#
+x# .&.# y# = wordToWord32# ((word32ToWord# x#) `and#` (word32ToWord# y#))
+
+(.^.#) :: Word32# -> Word32# -> Word32#
+x# .^.# y# = wordToWord32# ((word32ToWord# x#) `xor#` (word32ToWord# y#))
+
+complement# :: Word32# -> Word32#
+complement# x# = wordToWord32# (not# (word32ToWord# x#))
+
+rotate# :: Word32# -> Int# -> Word32#
+rotate# x# i#
+ | isTrue# (i'# ==# 0#) = x#
+ | otherwise = wordToWord32# $
+ ((word32ToWord# x#) `uncheckedShiftL#` i'#)
+ `or#` ((word32ToWord# x#) `uncheckedShiftRL#` (32# -# i'#))
+ where
+ !i'# = word2Int# (int2Word# i# `and#` 31##)
+{-# INLINE rotate# #-}
+
+rotateR# :: Word32# -> Int# -> Word32#
+rotateR# x# i# = x# `rotate#` (negateInt# i#)
+{-# INLINE rotateR# #-}
+
+unsafeShiftR# :: Word32# -> Int# -> Word32#
+unsafeShiftR# x# i# = wordToWord32# ((word32ToWord# x#) `uncheckedShiftRL#` i#)
+{-# INLINE unsafeShiftR# #-}
+
-- message padding and parsing
-- https://datatracker.ietf.org/doc/html/rfc6234#section-4.1
@@ -89,6 +125,7 @@ pad_lazy (BL.toChunks -> m) = BL.fromChunks (walk 0 m) where
| otherwise =
let nacc = bs <> BSB.word8 0x00
in padding l (pred k) nacc
+{-# SCC pad_lazy #-}
-- functions and constants used
-- https://datatracker.ietf.org/doc/html/rfc6234#section-5.1
@@ -111,6 +148,24 @@ ssig0 x = B.rotateR x 7 `B.xor` B.rotateR x 18 `B.xor` B.unsafeShiftR x 3
ssig1 :: Word32 -> Word32
ssig1 x = B.rotateR x 17 `B.xor` B.rotateR x 19 `B.xor` B.unsafeShiftR x 10
+u_ch :: Word32# -> Word32# -> Word32# -> Word32#
+u_ch x# y# z# = (x# .&.# y#) .^.# (complement# x# .&.# z#)
+
+u_maj :: Word32# -> Word32# -> Word32# -> Word32#
+u_maj x# y# z# = (x# .&.# y#) .^.# (x# .&.# z#) .^.# (y# .&.# z#)
+
+u_bsig0 :: Word32# -> Word32#
+u_bsig0 x# = rotateR# x# 2# .^.# rotateR# x# 13# .^.# rotateR# x# 22#
+
+u_bsig1 :: Word32# -> Word32#
+u_bsig1 x# = rotateR# x# 6# .^.# rotateR# x# 11# .^.# rotateR# x# 25#
+
+u_ssig0 :: Word32# -> Word32#
+u_ssig0 x# = rotateR# x# 7# .^.# rotateR# x# 18# .^.# unsafeShiftR# x# 3#
+
+u_ssig1 :: Word32# -> Word32#
+u_ssig1 x# = rotateR# x# 17# .^.# rotateR# x# 19# .^.# unsafeShiftR# x# 10#
+
data Schedule = Schedule {
w00 :: !Word32, w01 :: !Word32, w02 :: !Word32, w03 :: !Word32
, w04 :: !Word32, w05 :: !Word32, w06 :: !Word32, w07 :: !Word32
@@ -130,6 +185,73 @@ data Schedule = Schedule {
, w60 :: !Word32, w61 :: !Word32, w62 :: !Word32, w63 :: !Word32
} deriving (Eq, Show)
+type Sd = (#
+ Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ , Word32#
+ #)
+
choose_w :: Schedule -> Int -> Word32
choose_w s = \case
0 -> w00 s; 1 -> w01 s; 2 -> w02 s; 3 -> w03 s
@@ -150,6 +272,140 @@ choose_w s = \case
60 -> w60 s; 61 -> w61 s; 62 -> w62 s; 63 -> w63 s
_ -> error "ppad-sha256: internal error (invalid schedule index)"
+u_choose_w :: Sd -> Int# -> Word32#
+u_choose_w s i# =
+ case s of
+ (# w00#
+ , w01#
+ , w02#
+ , w03#
+ , w04#
+ , w05#
+ , w06#
+ , w07#
+ , w08#
+ , w09#
+ , w10#
+ , w11#
+ , w12#
+ , w13#
+ , w14#
+ , w15#
+ , w16#
+ , w17#
+ , w18#
+ , w19#
+ , w20#
+ , w21#
+ , w22#
+ , w23#
+ , w24#
+ , w25#
+ , w26#
+ , w27#
+ , w28#
+ , w29#
+ , w30#
+ , w31#
+ , w32#
+ , w33#
+ , w34#
+ , w35#
+ , w36#
+ , w37#
+ , w38#
+ , w39#
+ , w40#
+ , w41#
+ , w42#
+ , w43#
+ , w44#
+ , w45#
+ , w46#
+ , w47#
+ , w48#
+ , w49#
+ , w50#
+ , w51#
+ , w52#
+ , w53#
+ , w54#
+ , w55#
+ , w56#
+ , w57#
+ , w58#
+ , w59#
+ , w60#
+ , w61#
+ , w62#
+ , w63# #)
+ | isTrue# (i# ==# 0#) -> w00#
+ | isTrue# (i# ==# 1#) -> w01#
+ | isTrue# (i# ==# 2#) -> w02#
+ | isTrue# (i# ==# 3#) -> w03#
+ | isTrue# (i# ==# 4#) -> w04#
+ | isTrue# (i# ==# 5#) -> w05#
+ | isTrue# (i# ==# 6#) -> w06#
+ | isTrue# (i# ==# 7#) -> w07#
+ | isTrue# (i# ==# 8#) -> w08#
+ | isTrue# (i# ==# 9#) -> w09#
+ | isTrue# (i# ==# 10#) -> w10#
+ | isTrue# (i# ==# 11#) -> w11#
+ | isTrue# (i# ==# 12#) -> w12#
+ | isTrue# (i# ==# 13#) -> w13#
+ | isTrue# (i# ==# 14#) -> w14#
+ | isTrue# (i# ==# 15#) -> w15#
+ | isTrue# (i# ==# 16#) -> w16#
+ | isTrue# (i# ==# 17#) -> w17#
+ | isTrue# (i# ==# 18#) -> w18#
+ | isTrue# (i# ==# 19#) -> w19#
+ | isTrue# (i# ==# 20#) -> w20#
+ | isTrue# (i# ==# 21#) -> w21#
+ | isTrue# (i# ==# 22#) -> w22#
+ | isTrue# (i# ==# 23#) -> w23#
+ | isTrue# (i# ==# 24#) -> w24#
+ | isTrue# (i# ==# 25#) -> w25#
+ | isTrue# (i# ==# 26#) -> w26#
+ | isTrue# (i# ==# 27#) -> w27#
+ | isTrue# (i# ==# 28#) -> w28#
+ | isTrue# (i# ==# 29#) -> w29#
+ | isTrue# (i# ==# 30#) -> w30#
+ | isTrue# (i# ==# 31#) -> w31#
+ | isTrue# (i# ==# 32#) -> w32#
+ | isTrue# (i# ==# 33#) -> w33#
+ | isTrue# (i# ==# 34#) -> w34#
+ | isTrue# (i# ==# 35#) -> w35#
+ | isTrue# (i# ==# 36#) -> w36#
+ | isTrue# (i# ==# 37#) -> w37#
+ | isTrue# (i# ==# 38#) -> w38#
+ | isTrue# (i# ==# 39#) -> w39#
+ | isTrue# (i# ==# 40#) -> w40#
+ | isTrue# (i# ==# 41#) -> w41#
+ | isTrue# (i# ==# 42#) -> w42#
+ | isTrue# (i# ==# 43#) -> w43#
+ | isTrue# (i# ==# 44#) -> w44#
+ | isTrue# (i# ==# 45#) -> w45#
+ | isTrue# (i# ==# 46#) -> w46#
+ | isTrue# (i# ==# 47#) -> w47#
+ | isTrue# (i# ==# 48#) -> w48#
+ | isTrue# (i# ==# 49#) -> w49#
+ | isTrue# (i# ==# 50#) -> w50#
+ | isTrue# (i# ==# 51#) -> w51#
+ | isTrue# (i# ==# 52#) -> w52#
+ | isTrue# (i# ==# 53#) -> w53#
+ | isTrue# (i# ==# 54#) -> w54#
+ | isTrue# (i# ==# 55#) -> w55#
+ | isTrue# (i# ==# 56#) -> w56#
+ | isTrue# (i# ==# 57#) -> w57#
+ | isTrue# (i# ==# 58#) -> w58#
+ | isTrue# (i# ==# 59#) -> w59#
+ | isTrue# (i# ==# 60#) -> w60#
+ | isTrue# (i# ==# 61#) -> w61#
+ | isTrue# (i# ==# 62#) -> w62#
+ | isTrue# (i# ==# 63#) -> w63#
+ | otherwise -> error "boom"
+{-# SCC u_choose_w #-}
+
-- k0-k63 are the first 32 bits of the fractional parts of the cube
-- roots of the first sixty-four prime numbers
choose_k :: Int -> Word32
@@ -172,6 +428,75 @@ choose_k = \case
60 -> 0x90befffa; 61 -> 0xa4506ceb; 62 -> 0xbef9a3f7; 63 -> 0xc67178f2
_ -> error "ppad-sha256: internal error (invalid constant index)"
+u_choose_k :: Int# -> Word32#
+u_choose_k i#
+ | isTrue# (i# ==# 0#) = 0x428a2f98#Word32
+ | isTrue# (i# ==# 1#) = 0x71374491#Word32
+ | isTrue# (i# ==# 2#) = 0xb5c0fbcf#Word32
+ | isTrue# (i# ==# 3#) = 0xe9b5dba5#Word32
+ | isTrue# (i# ==# 4#) = 0x3956c25b#Word32
+ | isTrue# (i# ==# 5#) = 0x59f111f1#Word32
+ | isTrue# (i# ==# 6#) = 0x923f82a4#Word32
+ | isTrue# (i# ==# 7#) = 0xab1c5ed5#Word32
+ | isTrue# (i# ==# 8#) = 0xd807aa98#Word32
+ | isTrue# (i# ==# 9#) = 0x12835b01#Word32
+ | isTrue# (i# ==# 10#) = 0x243185be#Word32
+ | isTrue# (i# ==# 11#) = 0x550c7dc3#Word32
+ | isTrue# (i# ==# 12#) = 0x72be5d74#Word32
+ | isTrue# (i# ==# 13#) = 0x80deb1fe#Word32
+ | isTrue# (i# ==# 14#) = 0x9bdc06a7#Word32
+ | isTrue# (i# ==# 15#) = 0xc19bf174#Word32
+ | isTrue# (i# ==# 16#) = 0xe49b69c1#Word32
+ | isTrue# (i# ==# 17#) = 0xefbe4786#Word32
+ | isTrue# (i# ==# 18#) = 0x0fc19dc6#Word32
+ | isTrue# (i# ==# 19#) = 0x240ca1cc#Word32
+ | isTrue# (i# ==# 20#) = 0x2de92c6f#Word32
+ | isTrue# (i# ==# 21#) = 0x4a7484aa#Word32
+ | isTrue# (i# ==# 22#) = 0x5cb0a9dc#Word32
+ | isTrue# (i# ==# 23#) = 0x76f988da#Word32
+ | isTrue# (i# ==# 24#) = 0x983e5152#Word32
+ | isTrue# (i# ==# 25#) = 0xa831c66d#Word32
+ | isTrue# (i# ==# 26#) = 0xb00327c8#Word32
+ | isTrue# (i# ==# 27#) = 0xbf597fc7#Word32
+ | isTrue# (i# ==# 28#) = 0xc6e00bf3#Word32
+ | isTrue# (i# ==# 29#) = 0xd5a79147#Word32
+ | isTrue# (i# ==# 30#) = 0x06ca6351#Word32
+ | isTrue# (i# ==# 31#) = 0x14292967#Word32
+ | isTrue# (i# ==# 32#) = 0x27b70a85#Word32
+ | isTrue# (i# ==# 33#) = 0x2e1b2138#Word32
+ | isTrue# (i# ==# 34#) = 0x4d2c6dfc#Word32
+ | isTrue# (i# ==# 35#) = 0x53380d13#Word32
+ | isTrue# (i# ==# 36#) = 0x650a7354#Word32
+ | isTrue# (i# ==# 37#) = 0x766a0abb#Word32
+ | isTrue# (i# ==# 38#) = 0x81c2c92e#Word32
+ | isTrue# (i# ==# 39#) = 0x92722c85#Word32
+ | isTrue# (i# ==# 40#) = 0xa2bfe8a1#Word32
+ | isTrue# (i# ==# 41#) = 0xa81a664b#Word32
+ | isTrue# (i# ==# 42#) = 0xc24b8b70#Word32
+ | isTrue# (i# ==# 43#) = 0xc76c51a3#Word32
+ | isTrue# (i# ==# 44#) = 0xd192e819#Word32
+ | isTrue# (i# ==# 45#) = 0xd6990624#Word32
+ | isTrue# (i# ==# 46#) = 0xf40e3585#Word32
+ | isTrue# (i# ==# 47#) = 0x106aa070#Word32
+ | isTrue# (i# ==# 48#) = 0x19a4c116#Word32
+ | isTrue# (i# ==# 49#) = 0x1e376c08#Word32
+ | isTrue# (i# ==# 50#) = 0x2748774c#Word32
+ | isTrue# (i# ==# 51#) = 0x34b0bcb5#Word32
+ | isTrue# (i# ==# 52#) = 0x391c0cb3#Word32
+ | isTrue# (i# ==# 53#) = 0x4ed8aa4a#Word32
+ | isTrue# (i# ==# 54#) = 0x5b9cca4f#Word32
+ | isTrue# (i# ==# 55#) = 0x682e6ff3#Word32
+ | isTrue# (i# ==# 56#) = 0x748f82ee#Word32
+ | isTrue# (i# ==# 57#) = 0x78a5636f#Word32
+ | isTrue# (i# ==# 58#) = 0x84c87814#Word32
+ | isTrue# (i# ==# 59#) = 0x8cc70208#Word32
+ | isTrue# (i# ==# 60#) = 0x90befffa#Word32
+ | isTrue# (i# ==# 61#) = 0xa4506ceb#Word32
+ | isTrue# (i# ==# 62#) = 0xbef9a3f7#Word32
+ | isTrue# (i# ==# 63#) = 0xc67178f2#Word32
+ | otherwise = error "ppad-sha256: internal error (invalid constant index)"
+{-# SCC u_choose_k #-}
+
-- initialization
-- https://datatracker.ietf.org/doc/html/rfc6234#section-6.1
@@ -187,6 +512,9 @@ iv = Registers
0x6a09e667 0xbb67ae85 0x3c6ef372 0xa54ff53a
0x510e527f 0x9b05688c 0x1f83d9ab 0x5be0cd19
+type Rs = (# Word32#, Word32#, Word32#, Word32#
+ , Word32#, Word32#, Word32#, Word32# #)
+
-- processing
-- https://datatracker.ietf.org/doc/html/rfc6234#section-6.2
@@ -197,6 +525,12 @@ data Block = Block {
, m12 :: !Word32, m13 :: !Word32, m14 :: !Word32, m15 :: !Word32
} deriving (Eq, Show)
+type Bl = (#
+ Word32#, Word32#, Word32#, Word32#
+ , Word32#, Word32#, Word32#, Word32#
+ , Word32#, Word32#, Word32#, Word32#
+ , Word32#, Word32#, Word32#, Word32# #)
+
-- parse a 512-bit block into sixteen 32-bit words
parse :: BS.ByteString -> Block
parse bs =
@@ -220,6 +554,199 @@ parse bs =
then Block {..}
else error "ppad-sha256: internal error (bytes remaining)"
+unW32 :: Word32 -> Word32#
+unW32 (fi -> i) = case i of
+ I# i# -> wordToWord32# (int2Word# i#)
+
+u_parse :: BS.ByteString -> Bl
+u_parse bs =
+ let (word32be -> m00, t00) = BS.splitAt 4 bs
+ (word32be -> m01, t01) = BS.splitAt 4 t00
+ (word32be -> m02, t02) = BS.splitAt 4 t01
+ (word32be -> m03, t03) = BS.splitAt 4 t02
+ (word32be -> m04, t04) = BS.splitAt 4 t03
+ (word32be -> m05, t05) = BS.splitAt 4 t04
+ (word32be -> m06, t06) = BS.splitAt 4 t05
+ (word32be -> m07, t07) = BS.splitAt 4 t06
+ (word32be -> m08, t08) = BS.splitAt 4 t07
+ (word32be -> m09, t09) = BS.splitAt 4 t08
+ (word32be -> m10, t10) = BS.splitAt 4 t09
+ (word32be -> m11, t11) = BS.splitAt 4 t10
+ (word32be -> m12, t12) = BS.splitAt 4 t11
+ (word32be -> m13, t13) = BS.splitAt 4 t12
+ (word32be -> m14, t14) = BS.splitAt 4 t13
+ (word32be -> m15, t15) = BS.splitAt 4 t14
+ in if BS.null t15
+ then (# unW32 m00
+ , unW32 m01
+ , unW32 m02
+ , unW32 m03
+ , unW32 m04
+ , unW32 m05
+ , unW32 m06
+ , unW32 m07
+ , unW32 m08
+ , unW32 m09
+ , unW32 m10
+ , unW32 m11
+ , unW32 m12
+ , unW32 m13
+ , unW32 m14
+ , unW32 m15 #)
+ else error "ppad-sha256: internal error (bytes remaining)"
+{-# SCC u_parse #-}
+
+p32# :: Word32# -> Word32# -> Word32#
+p32# = plusWord32#
+
+u_prepare_schedule :: Bl -> Sd
+u_prepare_schedule b = case b of
+ (# m00#
+ , m01#
+ , m02#
+ , m03#
+ , m04#
+ , m05#
+ , m06#
+ , m07#
+ , m08#
+ , m09#
+ , m10#
+ , m11#
+ , m12#
+ , m13#
+ , m14#
+ , m15# #) ->
+ let w00# = m00#
+ w01# = m01#
+ w02# = m02#
+ w03# = m03#
+ w04# = m04#
+ w05# = m05#
+ w06# = m06#
+ w07# = m07#
+ w08# = m08#
+ w09# = m09#
+ w10# = m10#
+ w11# = m11#
+ w12# = m12#
+ w13# = m13#
+ w14# = m14#
+ w15# = m15#
+ w16# = p32# (u_ssig1 w14#) (p32# w09# (p32# (u_ssig0 w01#) w00#))
+ w17# = p32# (u_ssig1 w15#) (p32# w10# (p32# (u_ssig0 w02#) w01#))
+ w18# = p32# (u_ssig1 w16#) (p32# w11# (p32# (u_ssig0 w03#) w02#))
+ w19# = p32# (u_ssig1 w17#) (p32# w12# (p32# (u_ssig0 w04#) w03#))
+ w20# = p32# (u_ssig1 w18#) (p32# w13# (p32# (u_ssig0 w05#) w04#))
+ w21# = p32# (u_ssig1 w19#) (p32# w14# (p32# (u_ssig0 w06#) w05#))
+ w22# = p32# (u_ssig1 w20#) (p32# w15# (p32# (u_ssig0 w07#) w06#))
+ w23# = p32# (u_ssig1 w21#) (p32# w16# (p32# (u_ssig0 w08#) w07#))
+ w24# = p32# (u_ssig1 w22#) (p32# w17# (p32# (u_ssig0 w09#) w08#))
+ w25# = p32# (u_ssig1 w23#) (p32# w18# (p32# (u_ssig0 w10#) w09#))
+ w26# = p32# (u_ssig1 w24#) (p32# w19# (p32# (u_ssig0 w11#) w10#))
+ w27# = p32# (u_ssig1 w25#) (p32# w20# (p32# (u_ssig0 w12#) w11#))
+ w28# = p32# (u_ssig1 w26#) (p32# w21# (p32# (u_ssig0 w13#) w12#))
+ w29# = p32# (u_ssig1 w27#) (p32# w22# (p32# (u_ssig0 w14#) w13#))
+ w30# = p32# (u_ssig1 w28#) (p32# w23# (p32# (u_ssig0 w15#) w14#))
+ w31# = p32# (u_ssig1 w29#) (p32# w24# (p32# (u_ssig0 w16#) w15#))
+ w32# = p32# (u_ssig1 w30#) (p32# w25# (p32# (u_ssig0 w17#) w16#))
+ w33# = p32# (u_ssig1 w31#) (p32# w26# (p32# (u_ssig0 w18#) w17#))
+ w34# = p32# (u_ssig1 w32#) (p32# w27# (p32# (u_ssig0 w19#) w18#))
+ w35# = p32# (u_ssig1 w33#) (p32# w28# (p32# (u_ssig0 w20#) w19#))
+ w36# = p32# (u_ssig1 w34#) (p32# w29# (p32# (u_ssig0 w21#) w20#))
+ w37# = p32# (u_ssig1 w35#) (p32# w30# (p32# (u_ssig0 w22#) w21#))
+ w38# = p32# (u_ssig1 w36#) (p32# w31# (p32# (u_ssig0 w23#) w22#))
+ w39# = p32# (u_ssig1 w37#) (p32# w32# (p32# (u_ssig0 w24#) w23#))
+ w40# = p32# (u_ssig1 w38#) (p32# w33# (p32# (u_ssig0 w25#) w24#))
+ w41# = p32# (u_ssig1 w39#) (p32# w34# (p32# (u_ssig0 w26#) w25#))
+ w42# = p32# (u_ssig1 w40#) (p32# w35# (p32# (u_ssig0 w27#) w26#))
+ w43# = p32# (u_ssig1 w41#) (p32# w36# (p32# (u_ssig0 w28#) w27#))
+ w44# = p32# (u_ssig1 w42#) (p32# w37# (p32# (u_ssig0 w29#) w28#))
+ w45# = p32# (u_ssig1 w43#) (p32# w38# (p32# (u_ssig0 w30#) w29#))
+ w46# = p32# (u_ssig1 w44#) (p32# w39# (p32# (u_ssig0 w31#) w30#))
+ w47# = p32# (u_ssig1 w45#) (p32# w40# (p32# (u_ssig0 w32#) w31#))
+ w48# = p32# (u_ssig1 w46#) (p32# w41# (p32# (u_ssig0 w33#) w32#))
+ w49# = p32# (u_ssig1 w47#) (p32# w42# (p32# (u_ssig0 w34#) w33#))
+ w50# = p32# (u_ssig1 w48#) (p32# w43# (p32# (u_ssig0 w35#) w34#))
+ w51# = p32# (u_ssig1 w49#) (p32# w44# (p32# (u_ssig0 w36#) w35#))
+ w52# = p32# (u_ssig1 w50#) (p32# w45# (p32# (u_ssig0 w37#) w36#))
+ w53# = p32# (u_ssig1 w51#) (p32# w46# (p32# (u_ssig0 w38#) w37#))
+ w54# = p32# (u_ssig1 w52#) (p32# w47# (p32# (u_ssig0 w39#) w38#))
+ w55# = p32# (u_ssig1 w53#) (p32# w48# (p32# (u_ssig0 w40#) w39#))
+ w56# = p32# (u_ssig1 w54#) (p32# w49# (p32# (u_ssig0 w41#) w40#))
+ w57# = p32# (u_ssig1 w55#) (p32# w50# (p32# (u_ssig0 w42#) w41#))
+ w58# = p32# (u_ssig1 w56#) (p32# w51# (p32# (u_ssig0 w43#) w42#))
+ w59# = p32# (u_ssig1 w57#) (p32# w52# (p32# (u_ssig0 w44#) w43#))
+ w60# = p32# (u_ssig1 w58#) (p32# w53# (p32# (u_ssig0 w45#) w44#))
+ w61# = p32# (u_ssig1 w59#) (p32# w54# (p32# (u_ssig0 w46#) w45#))
+ w62# = p32# (u_ssig1 w60#) (p32# w55# (p32# (u_ssig0 w47#) w46#))
+ w63# = p32# (u_ssig1 w61#) (p32# w56# (p32# (u_ssig0 w48#) w47#))
+ in (# w00#
+ , w01#
+ , w02#
+ , w03#
+ , w04#
+ , w05#
+ , w06#
+ , w07#
+ , w08#
+ , w09#
+ , w10#
+ , w11#
+ , w12#
+ , w13#
+ , w14#
+ , w15#
+ , w16#
+ , w17#
+ , w18#
+ , w19#
+ , w20#
+ , w21#
+ , w22#
+ , w23#
+ , w24#
+ , w25#
+ , w26#
+ , w27#
+ , w28#
+ , w29#
+ , w30#
+ , w31#
+ , w32#
+ , w33#
+ , w34#
+ , w35#
+ , w36#
+ , w37#
+ , w38#
+ , w39#
+ , w40#
+ , w41#
+ , w42#
+ , w43#
+ , w44#
+ , w45#
+ , w46#
+ , w47#
+ , w48#
+ , w49#
+ , w50#
+ , w51#
+ , w52#
+ , w53#
+ , w54#
+ , w55#
+ , w56#
+ , w57#
+ , w58#
+ , w59#
+ , w60#
+ , w61#
+ , w62#
+ , w63# #)
+{-# SCC u_prepare_schedule #-}
+
-- RFC 6234 6.2 step 1
prepare_schedule :: Block -> Schedule
prepare_schedule Block {..} = Schedule {..} where
@@ -302,10 +829,45 @@ block_hash r@Registers {..} s = loop 0 r where
nacc = Registers (t1 + t2) a b c (d + t1) e f g
in loop (succ t) nacc
+u_block_hash :: Rs -> Sd -> Rs
+u_block_hash r@(# h0#, h1#, h2#, h3#, h4#, h5#, h6#, h7# #) s# = loop 0# r where
+ loop t# !(# a#, b#, c#, d#, e#, f#, g#, h# #)
+ | isTrue# (t# ==# 64#) = (#
+ plusWord32# a# h0#
+ , plusWord32# b# h1#
+ , plusWord32# c# h2#
+ , plusWord32# d# h3#
+ , plusWord32# e# h4#
+ , plusWord32# f# h5#
+ , plusWord32# g# h6#
+ , plusWord32# h# h7#
+ #)
+ | otherwise =
+ let t1# = {-# SCC t1 #-}
+ plusWord32# h#
+ (plusWord32# (u_bsig1 e#)
+ (plusWord32# (u_ch e# f# g#)
+ (plusWord32# (u_choose_k t#) (u_choose_w s# t#))))
+ t2# = {-# SCC t2 #-} plusWord32# (u_bsig0 a#) (u_maj a# b# c#)
+ nacc = (# plusWord32# t1# t2#
+ , a#
+ , b#
+ , c#
+ , plusWord32# d# t1#
+ , e#
+ , f#
+ , g#
+ #)
+ in loop (t# +# 1#) nacc
+{-# SCC u_block_hash #-}
+
-- RFC 6234 6.2 block pipeline
hash_alg :: Registers -> BS.ByteString -> Registers
hash_alg rs = block_hash rs . prepare_schedule . parse
+u_hash_alg :: Rs -> BS.ByteString -> Rs
+u_hash_alg rs# bs = u_block_hash rs# (u_prepare_schedule (u_parse bs))
+
-- register concatenation
cat :: Registers -> BS.ByteString
cat Registers {..} = BL.toStrict . BSB.toLazyByteString $ mconcat [
@@ -319,6 +881,23 @@ cat Registers {..} = BL.toStrict . BSB.toLazyByteString $ mconcat [
, BSB.word32BE h7
]
+lw32 :: Word32# -> Word32
+lw32 w = fi i
+ where i = I# (word2Int# (word32ToWord# w))
+
+u_cat :: Rs -> BS.ByteString
+u_cat (# h0#, h1#, h2#, h3#, h4#, h5#, h6#, h7# #) =
+ BL.toStrict . BSB.toLazyByteString $ mconcat [
+ BSB.word32BE (lw32 h0#)
+ , BSB.word32BE (lw32 h1#)
+ , BSB.word32BE (lw32 h2#)
+ , BSB.word32BE (lw32 h3#)
+ , BSB.word32BE (lw32 h4#)
+ , BSB.word32BE (lw32 h5#)
+ , BSB.word32BE (lw32 h6#)
+ , BSB.word32BE (lw32 h7#)
+ ]
+
-- | Compute a condensed representation of a strict bytestring via
-- SHA-256.
--
@@ -347,6 +926,24 @@ hash_lazy =
. blocks_lazy 64
. pad_lazy
+u_hash_lazy :: BL.ByteString -> BS.ByteString
+u_hash_lazy bl =
+ u_cat (go u_hash_alg r_iv (pad_lazy bl))
+ where
+ r_iv :: Rs
+ r_iv =
+ (# 0x6a09e667#Word32, 0xbb67ae85#Word32, 0x3c6ef372#Word32, 0xa54ff53a#Word32
+ , 0x510e527f#Word32, 0x9b05688c#Word32, 0x1f83d9ab#Word32, 0x5be0cd19#Word32
+ #)
+
+ go :: (Rs -> BS.ByteString -> Rs) -> Rs -> BL.ByteString -> Rs
+ go alg !acc# bs
+ | BL.null bs = acc#
+ | otherwise = case BL.splitAt 64 bs of
+ (c, r) -> go alg (alg acc# (BL.toStrict c)) r
+{-# SCC u_hash_lazy #-}
+
+
-- HMAC
-- https://datatracker.ietf.org/doc/html/rfc2104#section-2
diff --git a/src/HashLarge.hs b/src/HashLarge.hs
@@ -20,12 +20,12 @@ main = do
hash :: IO ()
hash = do
- input <- BL.readFile "ppad-sha256-large.dat"
- let digest = B16.encode $ SHA256.hash_lazy input
+ input <- BL.readFile "ppad-sha256-hash-large.dat"
+ let digest = B16.encode $ SHA256.u_hash_lazy input
print digest
make :: IO ()
-make = BL.writeFile "ppad-sha256-large.dat" big_input where
+make = BL.writeFile "ppad-sha256-hash-large.dat" big_input where
big_input :: BL.ByteString
big_input = go (16777216 :: Int) mempty where
go j acc
diff --git a/test/Main.hs b/test/Main.hs
@@ -134,6 +134,13 @@ unit_tests = testGroup "ppad-sha256" [
, cmp_hash_lazy "hv3" hv3_put hv3_pec
, cmp_hash_lazy "hv4" hv4_put hv4_pec
]
+ , testGroup "u_hash_lazy" [
+ cmp_u_hash_lazy "hv0" hv0_put hv0_pec
+ , cmp_u_hash_lazy "hv1" hv1_put hv1_pec
+ , cmp_u_hash_lazy "hv2" hv2_put hv2_pec
+ , cmp_u_hash_lazy "hv3" hv3_put hv3_pec
+ , cmp_u_hash_lazy "hv4" hv4_put hv4_pec
+ ]
-- uncomment me to run (slow, ~40s)
--
-- , testGroup "hash_lazy (1GB input)" [
@@ -190,6 +197,11 @@ cmp_hash_lazy msg (BL.fromStrict -> put) pec = testCase msg $ do
let out = B16.encode (SHA256.hash_lazy put)
assertEqual mempty pec out
+cmp_u_hash_lazy :: String -> BS.ByteString -> BS.ByteString -> TestTree
+cmp_u_hash_lazy msg (BL.fromStrict -> put) pec = testCase msg $ do
+ let out = B16.encode (SHA256.u_hash_lazy put)
+ assertEqual mempty pec out
+
cmp_hmac
:: String -> BS.ByteString -> BS.ByteString -> BS.ByteString -> TestTree
cmp_hmac msg key put pec = testCase msg $ do