commit 8a254bb54b76f5b19a5704e17798d6bc143f15c2
parent a8cc368e231be532ae3053ec9586e5e63d68d792
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 7 Jan 2026 21:33:14 +0400
lib: wide-scale refactoring
Diffstat:
9 files changed, 1353 insertions(+), 290 deletions(-)
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -4,6 +4,7 @@ module Main where
import Criterion.Main
import qualified Crypto.Hash.SHA256 as SHA256
+import qualified Crypto.Hash.SHA256D as D
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as BL
import qualified Data.Digest.Pure.SHA as SHA
@@ -18,11 +19,13 @@ suite = env setup $ \ ~(bs, bl) ->
bgroup "ppad-sha256" [
bgroup "SHA256 (32B input)" [
bench "hash" $ whnf SHA256.hash bs
+ , bench "hashd" $ whnf D.hash bs
, bench "hash_lazy" $ whnf SHA256.hash_lazy bl
, bench "SHA.sha256" $ whnf SHA.sha256 bl
]
, bgroup "HMAC-SHA256 (32B input)" [
bench "hmac" $ whnf (SHA256.hmac "key") bs
+ , bench "hmacd" $ whnf (D.hmac "key") bs
, bench "hmac_lazy" $ whnf (SHA256.hmac_lazy "key") bl
, bench "SHA.hmacSha256" $ whnf (SHA.hmacSha256 "key") bl
]
diff --git a/bench/Weight.hs b/bench/Weight.hs
@@ -5,6 +5,7 @@
module Main where
import qualified Crypto.Hash.SHA256 as SHA256
+import qualified Crypto.Hash.SHA256D as D
import qualified Data.ByteString as BS
import Weigh
@@ -19,10 +20,16 @@ hash =
let !bs0 = BS.replicate 32 0
!bs1 = BS.replicate 64 0
!bs2 = BS.replicate 128 0
+ !bs3 = BS.replicate 12288 0
in wgroup "hash" $ do
func' "hash (32B input)" SHA256.hash bs0
func' "hash (64B input)" SHA256.hash bs1
func' "hash (128B input)" SHA256.hash bs2
+ func' "hash (12288B input)" SHA256.hash bs3
+ func' "hashd (64B input)" D.hash bs1
+ func' "hashd (128B input)" D.hash bs2
+ func' "hashd (12288B input)" D.hash bs3
+
hmac :: Weigh ()
hmac =
diff --git a/cbits/sha256_arm.c b/cbits/sha256_arm.c
@@ -0,0 +1,187 @@
+#include <stdint.h>
+#include <string.h>
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_SHA2)
+
+#include <arm_neon.h>
+
+static const uint32_t K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+/*
+ * Process one 64-byte block using ARM SHA256 crypto instructions.
+ *
+ * state: pointer to 8 uint32_t words (a,b,c,d,e,f,g,h)
+ * block: pointer to 64 bytes of message data
+ *
+ * The state is updated in place.
+ */
+void sha256_block_arm(uint32_t *state, const uint8_t *block) {
+ /* Load current hash state */
+ uint32x4_t abcd = vld1q_u32(&state[0]);
+ uint32x4_t efgh = vld1q_u32(&state[4]);
+
+ /* Save original for final addition */
+ uint32x4_t abcd_orig = abcd;
+ uint32x4_t efgh_orig = efgh;
+
+ /* Load message and convert from big-endian */
+ uint32x4_t m0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[0])));
+ uint32x4_t m1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[16])));
+ uint32x4_t m2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[32])));
+ uint32x4_t m3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(&block[48])));
+
+ uint32x4_t tmp, tmp2;
+
+ /* Rounds 0-3 */
+ tmp = vaddq_u32(m0, vld1q_u32(&K[0]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m0 = vsha256su1q_u32(vsha256su0q_u32(m0, m1), m2, m3);
+
+ /* Rounds 4-7 */
+ tmp = vaddq_u32(m1, vld1q_u32(&K[4]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m1 = vsha256su1q_u32(vsha256su0q_u32(m1, m2), m3, m0);
+
+ /* Rounds 8-11 */
+ tmp = vaddq_u32(m2, vld1q_u32(&K[8]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m2 = vsha256su1q_u32(vsha256su0q_u32(m2, m3), m0, m1);
+
+ /* Rounds 12-15 */
+ tmp = vaddq_u32(m3, vld1q_u32(&K[12]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m3 = vsha256su1q_u32(vsha256su0q_u32(m3, m0), m1, m2);
+
+ /* Rounds 16-19 */
+ tmp = vaddq_u32(m0, vld1q_u32(&K[16]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m0 = vsha256su1q_u32(vsha256su0q_u32(m0, m1), m2, m3);
+
+ /* Rounds 20-23 */
+ tmp = vaddq_u32(m1, vld1q_u32(&K[20]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m1 = vsha256su1q_u32(vsha256su0q_u32(m1, m2), m3, m0);
+
+ /* Rounds 24-27 */
+ tmp = vaddq_u32(m2, vld1q_u32(&K[24]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m2 = vsha256su1q_u32(vsha256su0q_u32(m2, m3), m0, m1);
+
+ /* Rounds 28-31 */
+ tmp = vaddq_u32(m3, vld1q_u32(&K[28]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m3 = vsha256su1q_u32(vsha256su0q_u32(m3, m0), m1, m2);
+
+ /* Rounds 32-35 */
+ tmp = vaddq_u32(m0, vld1q_u32(&K[32]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m0 = vsha256su1q_u32(vsha256su0q_u32(m0, m1), m2, m3);
+
+ /* Rounds 36-39 */
+ tmp = vaddq_u32(m1, vld1q_u32(&K[36]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m1 = vsha256su1q_u32(vsha256su0q_u32(m1, m2), m3, m0);
+
+ /* Rounds 40-43 */
+ tmp = vaddq_u32(m2, vld1q_u32(&K[40]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m2 = vsha256su1q_u32(vsha256su0q_u32(m2, m3), m0, m1);
+
+ /* Rounds 44-47 */
+ tmp = vaddq_u32(m3, vld1q_u32(&K[44]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+ m3 = vsha256su1q_u32(vsha256su0q_u32(m3, m0), m1, m2);
+
+ /* Rounds 48-51 */
+ tmp = vaddq_u32(m0, vld1q_u32(&K[48]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+
+ /* Rounds 52-55 */
+ tmp = vaddq_u32(m1, vld1q_u32(&K[52]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+
+ /* Rounds 56-59 */
+ tmp = vaddq_u32(m2, vld1q_u32(&K[56]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+
+ /* Rounds 60-63 */
+ tmp = vaddq_u32(m3, vld1q_u32(&K[60]));
+ tmp2 = abcd;
+ abcd = vsha256hq_u32(abcd, efgh, tmp);
+ efgh = vsha256h2q_u32(efgh, tmp2, tmp);
+
+ /* Add original state back */
+ abcd = vaddq_u32(abcd, abcd_orig);
+ efgh = vaddq_u32(efgh, efgh_orig);
+
+ /* Store result */
+ vst1q_u32(&state[0], abcd);
+ vst1q_u32(&state[4], efgh);
+}
+
+/* Return 1 if ARM SHA2 is available, 0 otherwise */
+int sha256_arm_available(void) {
+ return 1;
+}
+
+#else
+
+/* Stub implementations when ARM SHA2 is not available */
+void sha256_block_arm(uint32_t *state, const uint8_t *block) {
+ (void)state;
+ (void)block;
+ /* Should never be called - use pure Haskell fallback */
+}
+
+int sha256_arm_available(void) {
+ return 0;
+}
+
+#endif
diff --git a/lib/Crypto/Hash/SHA256.hs b/lib/Crypto/Hash/SHA256.hs
@@ -1,6 +1,5 @@
{-# OPTIONS_GHC -funbox-small-strict-fields #-}
{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ViewPatterns #-}
-- |
@@ -25,16 +24,15 @@ module Crypto.Hash.SHA256 (
) where
import qualified Data.Bits as B
-import Data.Bits ((.|.), (.&.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Builder.Extra as BE
import qualified Data.ByteString.Internal as BI
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Internal as BLI
-import qualified Data.ByteString.Unsafe as BU
-import Data.Word (Word32, Word64)
+import Data.Word (Word64)
import Foreign.ForeignPtr (plusForeignPtr)
+import Crypto.Hash.SHA256.Internal
-- preliminary utils
@@ -43,19 +41,6 @@ fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
--- parse strict ByteString in BE order to Word32 (verbatim from
--- Data.Binary)
---
--- invariant:
--- the input bytestring is at least 32 bits in length
-unsafe_word32be :: BS.ByteString -> Word32
-unsafe_word32be s =
- (fi (s `BU.unsafeIndex` 0) `B.unsafeShiftL` 24) .|.
- (fi (s `BU.unsafeIndex` 1) `B.unsafeShiftL` 16) .|.
- (fi (s `BU.unsafeIndex` 2) `B.unsafeShiftL` 8) .|.
- (fi (s `BU.unsafeIndex` 3))
-{-# INLINE unsafe_word32be #-}
-
-- utility types for more efficient ByteString management
data SSPair = SSPair
@@ -64,8 +49,6 @@ data SSPair = SSPair
data SLPair = SLPair {-# UNPACK #-} !BS.ByteString !BL.ByteString
-data WSPair = WSPair {-# UNPACK #-} !Word32 {-# UNPACK #-} !BS.ByteString
-
-- unsafe version of splitAt that does no bounds checking
--
-- invariant:
@@ -89,16 +72,6 @@ splitAt64 = splitAt' (64 :: Int) where
let SLPair cs' cs'' = splitAt' (n - l) cs
in SLPair (c <> cs') cs''
--- variant of Data.ByteString.splitAt that behaves like an incremental
--- Word32 parser
---
--- invariant:
--- the input bytestring is at least 32 bits in length
-unsafe_parseWsPair :: BS.ByteString -> WSPair
-unsafe_parseWsPair (BI.BS x l) =
- WSPair (unsafe_word32be (BI.BS x 4)) (BI.BS (plusForeignPtr x 4) (l - 4))
-{-# INLINE unsafe_parseWsPair #-}
-
-- builder realization strategies
to_strict :: BSB.Builder -> BS.ByteString
@@ -212,235 +185,6 @@ pad_lazy (BL.toChunks -> m) = BL.fromChunks (walk 0 m) where
let nacc = bs <> BSB.word8 0x00
in padding l (pred k) nacc
--- functions and constants used
--- https://datatracker.ietf.org/doc/html/rfc6234#section-5.1
-
-ch :: Word32 -> Word32 -> Word32 -> Word32
-ch x y z = (x .&. y) `B.xor` (B.complement x .&. z)
-{-# INLINE ch #-}
-
--- credit to SHA authors for the following optimisation. their text:
---
--- > note:
--- > the original functions is (x & y) ^ (x & z) ^ (y & z)
--- > if you fire off truth tables, this is equivalent to
--- > (x & y) | (x & z) | (y & z)
--- > which you can the use distribution on:
--- > (x & (y | z)) | (y & z)
--- > which saves us one operation.
-maj :: Word32 -> Word32 -> Word32 -> Word32
-maj x y z = (x .&. (y .|. z)) .|. (y .&. z)
-{-# INLINE maj #-}
-
-bsig0 :: Word32 -> Word32
-bsig0 x = B.rotateR x 2 `B.xor` B.rotateR x 13 `B.xor` B.rotateR x 22
-{-# INLINE bsig0 #-}
-
-bsig1 :: Word32 -> Word32
-bsig1 x = B.rotateR x 6 `B.xor` B.rotateR x 11 `B.xor` B.rotateR x 25
-{-# INLINE bsig1 #-}
-
-ssig0 :: Word32 -> Word32
-ssig0 x = B.rotateR x 7 `B.xor` B.rotateR x 18 `B.xor` B.unsafeShiftR x 3
-{-# INLINE ssig0 #-}
-
-ssig1 :: Word32 -> Word32
-ssig1 x = B.rotateR x 17 `B.xor` B.rotateR x 19 `B.xor` B.unsafeShiftR x 10
-{-# INLINE ssig1 #-}
-
-data Schedule = Schedule {
- w00 :: !Word32, w01 :: !Word32, w02 :: !Word32, w03 :: !Word32
- , w04 :: !Word32, w05 :: !Word32, w06 :: !Word32, w07 :: !Word32
- , w08 :: !Word32, w09 :: !Word32, w10 :: !Word32, w11 :: !Word32
- , w12 :: !Word32, w13 :: !Word32, w14 :: !Word32, w15 :: !Word32
- , w16 :: !Word32, w17 :: !Word32, w18 :: !Word32, w19 :: !Word32
- , w20 :: !Word32, w21 :: !Word32, w22 :: !Word32, w23 :: !Word32
- , w24 :: !Word32, w25 :: !Word32, w26 :: !Word32, w27 :: !Word32
- , w28 :: !Word32, w29 :: !Word32, w30 :: !Word32, w31 :: !Word32
- , w32 :: !Word32, w33 :: !Word32, w34 :: !Word32, w35 :: !Word32
- , w36 :: !Word32, w37 :: !Word32, w38 :: !Word32, w39 :: !Word32
- , w40 :: !Word32, w41 :: !Word32, w42 :: !Word32, w43 :: !Word32
- , w44 :: !Word32, w45 :: !Word32, w46 :: !Word32, w47 :: !Word32
- , w48 :: !Word32, w49 :: !Word32, w50 :: !Word32, w51 :: !Word32
- , w52 :: !Word32, w53 :: !Word32, w54 :: !Word32, w55 :: !Word32
- , w56 :: !Word32, w57 :: !Word32, w58 :: !Word32, w59 :: !Word32
- , w60 :: !Word32, w61 :: !Word32, w62 :: !Word32, w63 :: !Word32
- }
-
--- initialization
--- https://datatracker.ietf.org/doc/html/rfc6234#section-6.1
-
-data Registers = Registers {
- h0 :: !Word32, h1 :: !Word32, h2 :: !Word32, h3 :: !Word32
- , h4 :: !Word32, h5 :: !Word32, h6 :: !Word32, h7 :: !Word32
- }
-
--- first 32 bits of the fractional parts of the square roots of the
--- first eight primes
-iv :: Registers
-iv = Registers
- 0x6a09e667 0xbb67ae85 0x3c6ef372 0xa54ff53a
- 0x510e527f 0x9b05688c 0x1f83d9ab 0x5be0cd19
-
--- processing
--- https://datatracker.ietf.org/doc/html/rfc6234#section-6.2
-
-data Block = Block {
- m00 :: !Word32, m01 :: !Word32, m02 :: !Word32, m03 :: !Word32
- , m04 :: !Word32, m05 :: !Word32, m06 :: !Word32, m07 :: !Word32
- , m08 :: !Word32, m09 :: !Word32, m10 :: !Word32, m11 :: !Word32
- , m12 :: !Word32, m13 :: !Word32, m14 :: !Word32, m15 :: !Word32
- }
-
--- parse strict bytestring to block
---
--- invariant:
--- the input bytestring is exactly 512 bits long
-unsafe_parse :: BS.ByteString -> Block
-unsafe_parse bs =
- let !(WSPair m00 t00) = unsafe_parseWsPair bs
- !(WSPair m01 t01) = unsafe_parseWsPair t00
- !(WSPair m02 t02) = unsafe_parseWsPair t01
- !(WSPair m03 t03) = unsafe_parseWsPair t02
- !(WSPair m04 t04) = unsafe_parseWsPair t03
- !(WSPair m05 t05) = unsafe_parseWsPair t04
- !(WSPair m06 t06) = unsafe_parseWsPair t05
- !(WSPair m07 t07) = unsafe_parseWsPair t06
- !(WSPair m08 t08) = unsafe_parseWsPair t07
- !(WSPair m09 t09) = unsafe_parseWsPair t08
- !(WSPair m10 t10) = unsafe_parseWsPair t09
- !(WSPair m11 t11) = unsafe_parseWsPair t10
- !(WSPair m12 t12) = unsafe_parseWsPair t11
- !(WSPair m13 t13) = unsafe_parseWsPair t12
- !(WSPair m14 t14) = unsafe_parseWsPair t13
- !(WSPair m15 t15) = unsafe_parseWsPair t14
- in if BS.null t15
- then Block {..}
- else error "ppad-sha256: internal error (bytes remaining)"
-
--- RFC 6234 6.2 step 1
-prepare_schedule :: Block -> Schedule
-prepare_schedule Block {..} = Schedule {..} where
- w00 = m00; w01 = m01; w02 = m02; w03 = m03
- w04 = m04; w05 = m05; w06 = m06; w07 = m07
- w08 = m08; w09 = m09; w10 = m10; w11 = m11
- w12 = m12; w13 = m13; w14 = m14; w15 = m15
- w16 = ssig1 w14 + w09 + ssig0 w01 + w00
- w17 = ssig1 w15 + w10 + ssig0 w02 + w01
- w18 = ssig1 w16 + w11 + ssig0 w03 + w02
- w19 = ssig1 w17 + w12 + ssig0 w04 + w03
- w20 = ssig1 w18 + w13 + ssig0 w05 + w04
- w21 = ssig1 w19 + w14 + ssig0 w06 + w05
- w22 = ssig1 w20 + w15 + ssig0 w07 + w06
- w23 = ssig1 w21 + w16 + ssig0 w08 + w07
- w24 = ssig1 w22 + w17 + ssig0 w09 + w08
- w25 = ssig1 w23 + w18 + ssig0 w10 + w09
- w26 = ssig1 w24 + w19 + ssig0 w11 + w10
- w27 = ssig1 w25 + w20 + ssig0 w12 + w11
- w28 = ssig1 w26 + w21 + ssig0 w13 + w12
- w29 = ssig1 w27 + w22 + ssig0 w14 + w13
- w30 = ssig1 w28 + w23 + ssig0 w15 + w14
- w31 = ssig1 w29 + w24 + ssig0 w16 + w15
- w32 = ssig1 w30 + w25 + ssig0 w17 + w16
- w33 = ssig1 w31 + w26 + ssig0 w18 + w17
- w34 = ssig1 w32 + w27 + ssig0 w19 + w18
- w35 = ssig1 w33 + w28 + ssig0 w20 + w19
- w36 = ssig1 w34 + w29 + ssig0 w21 + w20
- w37 = ssig1 w35 + w30 + ssig0 w22 + w21
- w38 = ssig1 w36 + w31 + ssig0 w23 + w22
- w39 = ssig1 w37 + w32 + ssig0 w24 + w23
- w40 = ssig1 w38 + w33 + ssig0 w25 + w24
- w41 = ssig1 w39 + w34 + ssig0 w26 + w25
- w42 = ssig1 w40 + w35 + ssig0 w27 + w26
- w43 = ssig1 w41 + w36 + ssig0 w28 + w27
- w44 = ssig1 w42 + w37 + ssig0 w29 + w28
- w45 = ssig1 w43 + w38 + ssig0 w30 + w29
- w46 = ssig1 w44 + w39 + ssig0 w31 + w30
- w47 = ssig1 w45 + w40 + ssig0 w32 + w31
- w48 = ssig1 w46 + w41 + ssig0 w33 + w32
- w49 = ssig1 w47 + w42 + ssig0 w34 + w33
- w50 = ssig1 w48 + w43 + ssig0 w35 + w34
- w51 = ssig1 w49 + w44 + ssig0 w36 + w35
- w52 = ssig1 w50 + w45 + ssig0 w37 + w36
- w53 = ssig1 w51 + w46 + ssig0 w38 + w37
- w54 = ssig1 w52 + w47 + ssig0 w39 + w38
- w55 = ssig1 w53 + w48 + ssig0 w40 + w39
- w56 = ssig1 w54 + w49 + ssig0 w41 + w40
- w57 = ssig1 w55 + w50 + ssig0 w42 + w41
- w58 = ssig1 w56 + w51 + ssig0 w43 + w42
- w59 = ssig1 w57 + w52 + ssig0 w44 + w43
- w60 = ssig1 w58 + w53 + ssig0 w45 + w44
- w61 = ssig1 w59 + w54 + ssig0 w46 + w45
- w62 = ssig1 w60 + w55 + ssig0 w47 + w46
- w63 = ssig1 w61 + w56 + ssig0 w48 + w47
-
--- RFC 6234 6.2 steps 2, 3, 4
-block_hash :: Registers -> Schedule -> Registers
-block_hash r00@Registers {..} Schedule {..} =
- -- constants are the first 32 bits of the fractional parts of the
- -- cube roots of the first sixty-four prime numbers
- let r01 = step r00 0x428a2f98 w00; r02 = step r01 0x71374491 w01
- r03 = step r02 0xb5c0fbcf w02; r04 = step r03 0xe9b5dba5 w03
- r05 = step r04 0x3956c25b w04; r06 = step r05 0x59f111f1 w05
- r07 = step r06 0x923f82a4 w06; r08 = step r07 0xab1c5ed5 w07
- r09 = step r08 0xd807aa98 w08; r10 = step r09 0x12835b01 w09
- r11 = step r10 0x243185be w10; r12 = step r11 0x550c7dc3 w11
- r13 = step r12 0x72be5d74 w12; r14 = step r13 0x80deb1fe w13
- r15 = step r14 0x9bdc06a7 w14; r16 = step r15 0xc19bf174 w15
- r17 = step r16 0xe49b69c1 w16; r18 = step r17 0xefbe4786 w17
- r19 = step r18 0x0fc19dc6 w18; r20 = step r19 0x240ca1cc w19
- r21 = step r20 0x2de92c6f w20; r22 = step r21 0x4a7484aa w21
- r23 = step r22 0x5cb0a9dc w22; r24 = step r23 0x76f988da w23
- r25 = step r24 0x983e5152 w24; r26 = step r25 0xa831c66d w25
- r27 = step r26 0xb00327c8 w26; r28 = step r27 0xbf597fc7 w27
- r29 = step r28 0xc6e00bf3 w28; r30 = step r29 0xd5a79147 w29
- r31 = step r30 0x06ca6351 w30; r32 = step r31 0x14292967 w31
- r33 = step r32 0x27b70a85 w32; r34 = step r33 0x2e1b2138 w33
- r35 = step r34 0x4d2c6dfc w34; r36 = step r35 0x53380d13 w35
- r37 = step r36 0x650a7354 w36; r38 = step r37 0x766a0abb w37
- r39 = step r38 0x81c2c92e w38; r40 = step r39 0x92722c85 w39
- r41 = step r40 0xa2bfe8a1 w40; r42 = step r41 0xa81a664b w41
- r43 = step r42 0xc24b8b70 w42; r44 = step r43 0xc76c51a3 w43
- r45 = step r44 0xd192e819 w44; r46 = step r45 0xd6990624 w45
- r47 = step r46 0xf40e3585 w46; r48 = step r47 0x106aa070 w47
- r49 = step r48 0x19a4c116 w48; r50 = step r49 0x1e376c08 w49
- r51 = step r50 0x2748774c w50; r52 = step r51 0x34b0bcb5 w51
- r53 = step r52 0x391c0cb3 w52; r54 = step r53 0x4ed8aa4a w53
- r55 = step r54 0x5b9cca4f w54; r56 = step r55 0x682e6ff3 w55
- r57 = step r56 0x748f82ee w56; r58 = step r57 0x78a5636f w57
- r59 = step r58 0x84c87814 w58; r60 = step r59 0x8cc70208 w59
- r61 = step r60 0x90befffa w60; r62 = step r61 0xa4506ceb w61
- r63 = step r62 0xbef9a3f7 w62; r64 = step r63 0xc67178f2 w63
- !(Registers a b c d e f g h) = r64
- in Registers
- (a + h0) (b + h1) (c + h2) (d + h3)
- (e + h4) (f + h5) (g + h6) (h + h7)
-
-step :: Registers -> Word32 -> Word32 -> Registers
-step (Registers a b c d e f g h) k w =
- let t1 = h + bsig1 e + ch e f g + k + w
- t2 = bsig0 a + maj a b c
- in Registers (t1 + t2) a b c (d + t1) e f g
-{-# INLINE step #-}
-
--- RFC 6234 6.2 block pipeline
---
--- invariant:
--- the input bytestring is exactly 512 bits in length
-unsafe_hash_alg :: Registers -> BS.ByteString -> Registers
-unsafe_hash_alg rs bs = block_hash rs (prepare_schedule (unsafe_parse bs))
-
--- register concatenation
-cat :: Registers -> BS.ByteString
-cat Registers {..} = to_strict_small $
- BSB.word64BE w64_0 <> BSB.word64BE w64_1
- <> BSB.word64BE w64_2 <> BSB.word64BE w64_3
- where
- !w64_0 = fi h0 `B.unsafeShiftL` 32 .|. fi h1
- !w64_1 = fi h2 `B.unsafeShiftL` 32 .|. fi h3
- !w64_2 = fi h4 `B.unsafeShiftL` 32 .|. fi h5
- !w64_3 = fi h6 `B.unsafeShiftL` 32 .|. fi h7
-
-- | Compute a condensed representation of a strict bytestring via
-- SHA-256.
--
@@ -449,31 +193,10 @@ cat Registers {..} = to_strict_small $
-- >>> hash "strict bytestring input"
-- "<strict 256-bit message digest>"
hash :: BS.ByteString -> BS.ByteString
-hash bs = cat (go iv (pad bs)) where
- -- proof that 'go' always terminates safely:
- --
- -- let b = pad bs
- -- then length(b) = n * 512 bits for some n >= 0 (1)
+hash bs = cat (go (iv ()) (pad bs)) where
go :: Registers -> BS.ByteString -> Registers
go !acc b
- -- if n == 0, then 'go' terminates safely (2)
| BS.null b = acc
- -- if n > 0, then
- --
- -- let (c, r) = unsafe_splitAt 64 b
- -- then length(c) == 512 bits by (1)
- -- length(r) == m * 512 bits for some m >= 0 by (1)
- --
- -- note 'unsafe_hash_alg' terminates safely for bytestring (3)
- -- input of exactly 512 bits in length
- --
- -- length(c) == 512
- -- => 'unsafe_hash_alg' terminates safely by (3)
- -- => 'go' terminates safely (4)
- -- length(r) == m * 512 bits for m >= 0
- -- => next invocation of 'go' terminates safely by (2), (4)
- --
- -- then by induction, 'go' always terminates safely (QED)
| otherwise = case unsafe_splitAt 64 b of
SSPair c r -> go (unsafe_hash_alg acc c) r
@@ -485,8 +208,7 @@ hash bs = cat (go iv (pad bs)) where
-- >>> hash_lazy "lazy bytestring input"
-- "<strict 256-bit message digest>"
hash_lazy :: BL.ByteString -> BS.ByteString
-hash_lazy bl = cat (go iv (pad_lazy bl)) where
- -- proof of safety proceeds analogously
+hash_lazy bl = cat (go (iv ()) (pad_lazy bl)) where
go :: Registers -> BL.ByteString -> Registers
go !acc bs
| BL.null bs = acc
@@ -553,4 +275,3 @@ hmac_lazy mk@(BI.PS _ _ l) text =
!(KeyAndLen k lk)
| l > 64 = KeyAndLen (hash mk) 32
| otherwise = KeyAndLen mk l
-
diff --git a/lib/Crypto/Hash/SHA256/Internal.hs b/lib/Crypto/Hash/SHA256/Internal.hs
@@ -0,0 +1,422 @@
+{-# OPTIONS_GHC -funbox-small-strict-fields #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedNewtypes #-}
+
+-- |
+-- Module: Crypto.Hash.SHA256.Internal
+-- Copyright: (c) 2024 Jared Tobin
+-- License: MIT
+-- Maintainer: Jared Tobin <jared@ppad.tech>
+--
+-- SHA-256 internals.
+
+module Crypto.Hash.SHA256.Internal (
+ Block(..)
+ , pattern B
+ , Registers(..)
+ , pattern R
+
+ , iv
+ , block_hash
+ , cat
+
+ , word32be
+ , parse_block
+ , unsafe_hash_alg
+ ) where
+
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Internal as BI
+import qualified Data.ByteString.Unsafe as BU
+import Data.Word (Word8)
+import Foreign.Ptr (Ptr, plusPtr)
+import Foreign.Storable (poke)
+import GHC.Exts (Int#)
+import qualified GHC.Exts as Exts
+import qualified GHC.Word (Word8(..))
+
+-- SHA-256 internals (unboxed types for performance)
+-- https://datatracker.ietf.org/doc/html/rfc6234
+
+newtype Block = Block
+ (# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ #)
+
+pattern B
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Block
+pattern B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15 =
+ Block
+ (# w00, w01, w02, w03
+ , w04, w05, w06, w07
+ , w08, w09, w10, w11
+ , w12, w13, w14, w15
+ #)
+{-# COMPLETE B #-}
+
+newtype Registers = Registers
+ (# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ #)
+
+pattern R
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Registers
+pattern R w00 w01 w02 w03 w04 w05 w06 w07 =
+ Registers
+ (# w00, w01, w02, w03
+ , w04, w05, w06, w07
+ #)
+{-# COMPLETE R #-}
+
+-- given a bytestring and offset, parse word32. length not checked.
+word32be :: BS.ByteString -> Int -> Exts.Word32#
+word32be bs m =
+ let !(GHC.Word.W8# ra) = BU.unsafeIndex bs m
+ !(GHC.Word.W8# rb) = BU.unsafeIndex bs (m + 1)
+ !(GHC.Word.W8# rc) = BU.unsafeIndex bs (m + 2)
+ !(GHC.Word.W8# rd) = BU.unsafeIndex bs (m + 3)
+ !a = Exts.wordToWord32# (Exts.word8ToWord# ra)
+ !b = Exts.wordToWord32# (Exts.word8ToWord# rb)
+ !c = Exts.wordToWord32# (Exts.word8ToWord# rc)
+ !d = Exts.wordToWord32# (Exts.word8ToWord# rd)
+ !sa = Exts.uncheckedShiftLWord32# a 24#
+ !sb = Exts.uncheckedShiftLWord32# b 16#
+ !sc = Exts.uncheckedShiftLWord32# c 08#
+ in sa `Exts.orWord32#` sb `Exts.orWord32#` sc `Exts.orWord32#` d
+{-# INLINE word32be #-}
+
+parse_block :: BS.ByteString -> Int -> Block
+parse_block bs m = B
+ (word32be bs m)
+ (word32be bs (m + 04))
+ (word32be bs (m + 08))
+ (word32be bs (m + 12))
+ (word32be bs (m + 16))
+ (word32be bs (m + 20))
+ (word32be bs (m + 24))
+ (word32be bs (m + 28))
+ (word32be bs (m + 32))
+ (word32be bs (m + 36))
+ (word32be bs (m + 40))
+ (word32be bs (m + 44))
+ (word32be bs (m + 48))
+ (word32be bs (m + 52))
+ (word32be bs (m + 56))
+ (word32be bs (m + 60))
+{-# INLINE parse_block #-}
+
+-- rotate right
+rotr# :: Exts.Word32# -> Int# -> Exts.Word32#
+rotr# x n =
+ Exts.uncheckedShiftRLWord32# x n `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# x (32# Exts.-# n)
+{-# INLINE rotr# #-}
+
+-- logical right shift
+shr# :: Exts.Word32# -> Int# -> Exts.Word32#
+shr# = Exts.uncheckedShiftRLWord32#
+{-# INLINE shr# #-}
+
+-- ch(x, y, z) = (x & y) ^ (~x & z)
+ch# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ch# x y z =
+ (x `Exts.andWord32#` y) `Exts.xorWord32#`
+ (Exts.notWord32# x `Exts.andWord32#` z)
+{-# INLINE ch# #-}
+
+-- maj(x, y, z) = (x & (y | z)) | (y & z)
+maj# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+maj# x y z =
+ (x `Exts.andWord32#` (y `Exts.orWord32#` z)) `Exts.orWord32#`
+ (y `Exts.andWord32#` z)
+{-# INLINE maj# #-}
+
+-- big sigma 0: rotr2 ^ rotr13 ^ rotr22
+bsig0# :: Exts.Word32# -> Exts.Word32#
+bsig0# x =
+ rotr# x 2# `Exts.xorWord32#` rotr# x 13# `Exts.xorWord32#` rotr# x 22#
+{-# INLINE bsig0# #-}
+
+-- big sigma 1: rotr6 ^ rotr11 ^ rotr25
+bsig1# :: Exts.Word32# -> Exts.Word32#
+bsig1# x =
+ rotr# x 6# `Exts.xorWord32#` rotr# x 11# `Exts.xorWord32#` rotr# x 25#
+{-# INLINE bsig1# #-}
+
+-- small sigma 0: rotr7 ^ rotr18 ^ shr3
+ssig0# :: Exts.Word32# -> Exts.Word32#
+ssig0# x =
+ rotr# x 7# `Exts.xorWord32#` rotr# x 18# `Exts.xorWord32#` shr# x 3#
+{-# INLINE ssig0# #-}
+
+-- small sigma 1: rotr17 ^ rotr19 ^ shr10
+ssig1# :: Exts.Word32# -> Exts.Word32#
+ssig1# x =
+ rotr# x 17# `Exts.xorWord32#` rotr# x 19# `Exts.xorWord32#` shr# x 10#
+{-# INLINE ssig1# #-}
+
+-- round step
+step#
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32#
+ -> Registers
+step# a b c d e f g h k w =
+ let !t1 = h
+ `Exts.plusWord32#` bsig1# e
+ `Exts.plusWord32#` ch# e f g
+ `Exts.plusWord32#` k
+ `Exts.plusWord32#` w
+ !t2 = bsig0# a `Exts.plusWord32#` maj# a b c
+ in R (t1 `Exts.plusWord32#` t2) a b c (d `Exts.plusWord32#` t1) e f g
+{-# INLINE step# #-}
+
+-- first 32 bits of the fractional parts of the square roots of the
+-- first eight primes
+iv :: () -> Registers
+iv _ = R (Exts.wordToWord32# 0x6a09e667##)
+ (Exts.wordToWord32# 0xbb67ae85##)
+ (Exts.wordToWord32# 0x3c6ef372##)
+ (Exts.wordToWord32# 0xa54ff53a##)
+ (Exts.wordToWord32# 0x510e527f##)
+ (Exts.wordToWord32# 0x9b05688c##)
+ (Exts.wordToWord32# 0x1f83d9ab##)
+ (Exts.wordToWord32# 0x5be0cd19##)
+
+block_hash :: Registers -> Block -> Registers
+block_hash
+ (R h0 h1 h2 h3 h4 h5 h6 h7)
+ (B b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15)
+ =
+ let -- message schedule
+ !w00 = b00; !w01 = b01; !w02 = b02; !w03 = b03
+ !w04 = b04; !w05 = b05; !w06 = b06; !w07 = b07
+ !w08 = b08; !w09 = b09; !w10 = b10; !w11 = b11
+ !w12 = b12; !w13 = b13; !w14 = b14; !w15 = b15
+ !w16 = ssig1# w14 `p` w09 `p` ssig0# w01 `p` w00
+ !w17 = ssig1# w15 `p` w10 `p` ssig0# w02 `p` w01
+ !w18 = ssig1# w16 `p` w11 `p` ssig0# w03 `p` w02
+ !w19 = ssig1# w17 `p` w12 `p` ssig0# w04 `p` w03
+ !w20 = ssig1# w18 `p` w13 `p` ssig0# w05 `p` w04
+ !w21 = ssig1# w19 `p` w14 `p` ssig0# w06 `p` w05
+ !w22 = ssig1# w20 `p` w15 `p` ssig0# w07 `p` w06
+ !w23 = ssig1# w21 `p` w16 `p` ssig0# w08 `p` w07
+ !w24 = ssig1# w22 `p` w17 `p` ssig0# w09 `p` w08
+ !w25 = ssig1# w23 `p` w18 `p` ssig0# w10 `p` w09
+ !w26 = ssig1# w24 `p` w19 `p` ssig0# w11 `p` w10
+ !w27 = ssig1# w25 `p` w20 `p` ssig0# w12 `p` w11
+ !w28 = ssig1# w26 `p` w21 `p` ssig0# w13 `p` w12
+ !w29 = ssig1# w27 `p` w22 `p` ssig0# w14 `p` w13
+ !w30 = ssig1# w28 `p` w23 `p` ssig0# w15 `p` w14
+ !w31 = ssig1# w29 `p` w24 `p` ssig0# w16 `p` w15
+ !w32 = ssig1# w30 `p` w25 `p` ssig0# w17 `p` w16
+ !w33 = ssig1# w31 `p` w26 `p` ssig0# w18 `p` w17
+ !w34 = ssig1# w32 `p` w27 `p` ssig0# w19 `p` w18
+ !w35 = ssig1# w33 `p` w28 `p` ssig0# w20 `p` w19
+ !w36 = ssig1# w34 `p` w29 `p` ssig0# w21 `p` w20
+ !w37 = ssig1# w35 `p` w30 `p` ssig0# w22 `p` w21
+ !w38 = ssig1# w36 `p` w31 `p` ssig0# w23 `p` w22
+ !w39 = ssig1# w37 `p` w32 `p` ssig0# w24 `p` w23
+ !w40 = ssig1# w38 `p` w33 `p` ssig0# w25 `p` w24
+ !w41 = ssig1# w39 `p` w34 `p` ssig0# w26 `p` w25
+ !w42 = ssig1# w40 `p` w35 `p` ssig0# w27 `p` w26
+ !w43 = ssig1# w41 `p` w36 `p` ssig0# w28 `p` w27
+ !w44 = ssig1# w42 `p` w37 `p` ssig0# w29 `p` w28
+ !w45 = ssig1# w43 `p` w38 `p` ssig0# w30 `p` w29
+ !w46 = ssig1# w44 `p` w39 `p` ssig0# w31 `p` w30
+ !w47 = ssig1# w45 `p` w40 `p` ssig0# w32 `p` w31
+ !w48 = ssig1# w46 `p` w41 `p` ssig0# w33 `p` w32
+ !w49 = ssig1# w47 `p` w42 `p` ssig0# w34 `p` w33
+ !w50 = ssig1# w48 `p` w43 `p` ssig0# w35 `p` w34
+ !w51 = ssig1# w49 `p` w44 `p` ssig0# w36 `p` w35
+ !w52 = ssig1# w50 `p` w45 `p` ssig0# w37 `p` w36
+ !w53 = ssig1# w51 `p` w46 `p` ssig0# w38 `p` w37
+ !w54 = ssig1# w52 `p` w47 `p` ssig0# w39 `p` w38
+ !w55 = ssig1# w53 `p` w48 `p` ssig0# w40 `p` w39
+ !w56 = ssig1# w54 `p` w49 `p` ssig0# w41 `p` w40
+ !w57 = ssig1# w55 `p` w50 `p` ssig0# w42 `p` w41
+ !w58 = ssig1# w56 `p` w51 `p` ssig0# w43 `p` w42
+ !w59 = ssig1# w57 `p` w52 `p` ssig0# w44 `p` w43
+ !w60 = ssig1# w58 `p` w53 `p` ssig0# w45 `p` w44
+ !w61 = ssig1# w59 `p` w54 `p` ssig0# w46 `p` w45
+ !w62 = ssig1# w60 `p` w55 `p` ssig0# w47 `p` w46
+ !w63 = ssig1# w61 `p` w56 `p` ssig0# w48 `p` w47
+
+ -- rounds (cube roots of first 64 primes)
+ !(R s00a s00b s00c s00d s00e s00f s00g s00h) =
+ step# h0 h1 h2 h3 h4 h5 h6 h7 (k 0x428a2f98##) w00
+ !(R s01a s01b s01c s01d s01e s01f s01g s01h) =
+ step# s00a s00b s00c s00d s00e s00f s00g s00h (k 0x71374491##) w01
+ !(R s02a s02b s02c s02d s02e s02f s02g s02h) =
+ step# s01a s01b s01c s01d s01e s01f s01g s01h (k 0xb5c0fbcf##) w02
+ !(R s03a s03b s03c s03d s03e s03f s03g s03h) =
+ step# s02a s02b s02c s02d s02e s02f s02g s02h (k 0xe9b5dba5##) w03
+ !(R s04a s04b s04c s04d s04e s04f s04g s04h) =
+ step# s03a s03b s03c s03d s03e s03f s03g s03h (k 0x3956c25b##) w04
+ !(R s05a s05b s05c s05d s05e s05f s05g s05h) =
+ step# s04a s04b s04c s04d s04e s04f s04g s04h (k 0x59f111f1##) w05
+ !(R s06a s06b s06c s06d s06e s06f s06g s06h) =
+ step# s05a s05b s05c s05d s05e s05f s05g s05h (k 0x923f82a4##) w06
+ !(R s07a s07b s07c s07d s07e s07f s07g s07h) =
+ step# s06a s06b s06c s06d s06e s06f s06g s06h (k 0xab1c5ed5##) w07
+ !(R s08a s08b s08c s08d s08e s08f s08g s08h) =
+ step# s07a s07b s07c s07d s07e s07f s07g s07h (k 0xd807aa98##) w08
+ !(R s09a s09b s09c s09d s09e s09f s09g s09h) =
+ step# s08a s08b s08c s08d s08e s08f s08g s08h (k 0x12835b01##) w09
+ !(R s10a s10b s10c s10d s10e s10f s10g s10h) =
+ step# s09a s09b s09c s09d s09e s09f s09g s09h (k 0x243185be##) w10
+ !(R s11a s11b s11c s11d s11e s11f s11g s11h) =
+ step# s10a s10b s10c s10d s10e s10f s10g s10h (k 0x550c7dc3##) w11
+ !(R s12a s12b s12c s12d s12e s12f s12g s12h) =
+ step# s11a s11b s11c s11d s11e s11f s11g s11h (k 0x72be5d74##) w12
+ !(R s13a s13b s13c s13d s13e s13f s13g s13h) =
+ step# s12a s12b s12c s12d s12e s12f s12g s12h (k 0x80deb1fe##) w13
+ !(R s14a s14b s14c s14d s14e s14f s14g s14h) =
+ step# s13a s13b s13c s13d s13e s13f s13g s13h (k 0x9bdc06a7##) w14
+ !(R s15a s15b s15c s15d s15e s15f s15g s15h) =
+ step# s14a s14b s14c s14d s14e s14f s14g s14h (k 0xc19bf174##) w15
+ !(R s16a s16b s16c s16d s16e s16f s16g s16h) =
+ step# s15a s15b s15c s15d s15e s15f s15g s15h (k 0xe49b69c1##) w16
+ !(R s17a s17b s17c s17d s17e s17f s17g s17h) =
+ step# s16a s16b s16c s16d s16e s16f s16g s16h (k 0xefbe4786##) w17
+ !(R s18a s18b s18c s18d s18e s18f s18g s18h) =
+ step# s17a s17b s17c s17d s17e s17f s17g s17h (k 0x0fc19dc6##) w18
+ !(R s19a s19b s19c s19d s19e s19f s19g s19h) =
+ step# s18a s18b s18c s18d s18e s18f s18g s18h (k 0x240ca1cc##) w19
+ !(R s20a s20b s20c s20d s20e s20f s20g s20h) =
+ step# s19a s19b s19c s19d s19e s19f s19g s19h (k 0x2de92c6f##) w20
+ !(R s21a s21b s21c s21d s21e s21f s21g s21h) =
+ step# s20a s20b s20c s20d s20e s20f s20g s20h (k 0x4a7484aa##) w21
+ !(R s22a s22b s22c s22d s22e s22f s22g s22h) =
+ step# s21a s21b s21c s21d s21e s21f s21g s21h (k 0x5cb0a9dc##) w22
+ !(R s23a s23b s23c s23d s23e s23f s23g s23h) =
+ step# s22a s22b s22c s22d s22e s22f s22g s22h (k 0x76f988da##) w23
+ !(R s24a s24b s24c s24d s24e s24f s24g s24h) =
+ step# s23a s23b s23c s23d s23e s23f s23g s23h (k 0x983e5152##) w24
+ !(R s25a s25b s25c s25d s25e s25f s25g s25h) =
+ step# s24a s24b s24c s24d s24e s24f s24g s24h (k 0xa831c66d##) w25
+ !(R s26a s26b s26c s26d s26e s26f s26g s26h) =
+ step# s25a s25b s25c s25d s25e s25f s25g s25h (k 0xb00327c8##) w26
+ !(R s27a s27b s27c s27d s27e s27f s27g s27h) =
+ step# s26a s26b s26c s26d s26e s26f s26g s26h (k 0xbf597fc7##) w27
+ !(R s28a s28b s28c s28d s28e s28f s28g s28h) =
+ step# s27a s27b s27c s27d s27e s27f s27g s27h (k 0xc6e00bf3##) w28
+ !(R s29a s29b s29c s29d s29e s29f s29g s29h) =
+ step# s28a s28b s28c s28d s28e s28f s28g s28h (k 0xd5a79147##) w29
+ !(R s30a s30b s30c s30d s30e s30f s30g s30h) =
+ step# s29a s29b s29c s29d s29e s29f s29g s29h (k 0x06ca6351##) w30
+ !(R s31a s31b s31c s31d s31e s31f s31g s31h) =
+ step# s30a s30b s30c s30d s30e s30f s30g s30h (k 0x14292967##) w31
+ !(R s32a s32b s32c s32d s32e s32f s32g s32h) =
+ step# s31a s31b s31c s31d s31e s31f s31g s31h (k 0x27b70a85##) w32
+ !(R s33a s33b s33c s33d s33e s33f s33g s33h) =
+ step# s32a s32b s32c s32d s32e s32f s32g s32h (k 0x2e1b2138##) w33
+ !(R s34a s34b s34c s34d s34e s34f s34g s34h) =
+ step# s33a s33b s33c s33d s33e s33f s33g s33h (k 0x4d2c6dfc##) w34
+ !(R s35a s35b s35c s35d s35e s35f s35g s35h) =
+ step# s34a s34b s34c s34d s34e s34f s34g s34h (k 0x53380d13##) w35
+ !(R s36a s36b s36c s36d s36e s36f s36g s36h) =
+ step# s35a s35b s35c s35d s35e s35f s35g s35h (k 0x650a7354##) w36
+ !(R s37a s37b s37c s37d s37e s37f s37g s37h) =
+ step# s36a s36b s36c s36d s36e s36f s36g s36h (k 0x766a0abb##) w37
+ !(R s38a s38b s38c s38d s38e s38f s38g s38h) =
+ step# s37a s37b s37c s37d s37e s37f s37g s37h (k 0x81c2c92e##) w38
+ !(R s39a s39b s39c s39d s39e s39f s39g s39h) =
+ step# s38a s38b s38c s38d s38e s38f s38g s38h (k 0x92722c85##) w39
+ !(R s40a s40b s40c s40d s40e s40f s40g s40h) =
+ step# s39a s39b s39c s39d s39e s39f s39g s39h (k 0xa2bfe8a1##) w40
+ !(R s41a s41b s41c s41d s41e s41f s41g s41h) =
+ step# s40a s40b s40c s40d s40e s40f s40g s40h (k 0xa81a664b##) w41
+ !(R s42a s42b s42c s42d s42e s42f s42g s42h) =
+ step# s41a s41b s41c s41d s41e s41f s41g s41h (k 0xc24b8b70##) w42
+ !(R s43a s43b s43c s43d s43e s43f s43g s43h) =
+ step# s42a s42b s42c s42d s42e s42f s42g s42h (k 0xc76c51a3##) w43
+ !(R s44a s44b s44c s44d s44e s44f s44g s44h) =
+ step# s43a s43b s43c s43d s43e s43f s43g s43h (k 0xd192e819##) w44
+ !(R s45a s45b s45c s45d s45e s45f s45g s45h) =
+ step# s44a s44b s44c s44d s44e s44f s44g s44h (k 0xd6990624##) w45
+ !(R s46a s46b s46c s46d s46e s46f s46g s46h) =
+ step# s45a s45b s45c s45d s45e s45f s45g s45h (k 0xf40e3585##) w46
+ !(R s47a s47b s47c s47d s47e s47f s47g s47h) =
+ step# s46a s46b s46c s46d s46e s46f s46g s46h (k 0x106aa070##) w47
+ !(R s48a s48b s48c s48d s48e s48f s48g s48h) =
+ step# s47a s47b s47c s47d s47e s47f s47g s47h (k 0x19a4c116##) w48
+ !(R s49a s49b s49c s49d s49e s49f s49g s49h) =
+ step# s48a s48b s48c s48d s48e s48f s48g s48h (k 0x1e376c08##) w49
+ !(R s50a s50b s50c s50d s50e s50f s50g s50h) =
+ step# s49a s49b s49c s49d s49e s49f s49g s49h (k 0x2748774c##) w50
+ !(R s51a s51b s51c s51d s51e s51f s51g s51h) =
+ step# s50a s50b s50c s50d s50e s50f s50g s50h (k 0x34b0bcb5##) w51
+ !(R s52a s52b s52c s52d s52e s52f s52g s52h) =
+ step# s51a s51b s51c s51d s51e s51f s51g s51h (k 0x391c0cb3##) w52
+ !(R s53a s53b s53c s53d s53e s53f s53g s53h) =
+ step# s52a s52b s52c s52d s52e s52f s52g s52h (k 0x4ed8aa4a##) w53
+ !(R s54a s54b s54c s54d s54e s54f s54g s54h) =
+ step# s53a s53b s53c s53d s53e s53f s53g s53h (k 0x5b9cca4f##) w54
+ !(R s55a s55b s55c s55d s55e s55f s55g s55h) =
+ step# s54a s54b s54c s54d s54e s54f s54g s54h (k 0x682e6ff3##) w55
+ !(R s56a s56b s56c s56d s56e s56f s56g s56h) =
+ step# s55a s55b s55c s55d s55e s55f s55g s55h (k 0x748f82ee##) w56
+ !(R s57a s57b s57c s57d s57e s57f s57g s57h) =
+ step# s56a s56b s56c s56d s56e s56f s56g s56h (k 0x78a5636f##) w57
+ !(R s58a s58b s58c s58d s58e s58f s58g s58h) =
+ step# s57a s57b s57c s57d s57e s57f s57g s57h (k 0x84c87814##) w58
+ !(R s59a s59b s59c s59d s59e s59f s59g s59h) =
+ step# s58a s58b s58c s58d s58e s58f s58g s58h (k 0x8cc70208##) w59
+ !(R s60a s60b s60c s60d s60e s60f s60g s60h) =
+ step# s59a s59b s59c s59d s59e s59f s59g s59h (k 0x90befffa##) w60
+ !(R s61a s61b s61c s61d s61e s61f s61g s61h) =
+ step# s60a s60b s60c s60d s60e s60f s60g s60h (k 0xa4506ceb##) w61
+ !(R s62a s62b s62c s62d s62e s62f s62g s62h) =
+ step# s61a s61b s61c s61d s61e s61f s61g s61h (k 0xbef9a3f7##) w62
+ !(R s63a s63b s63c s63d s63e s63f s63g s63h) =
+ step# s62a s62b s62c s62d s62e s62f s62g s62h (k 0xc67178f2##) w63
+ in R (h0 `p` s63a) (h1 `p` s63b) (h2 `p` s63c) (h3 `p` s63d)
+ (h4 `p` s63e) (h5 `p` s63f) (h6 `p` s63g) (h7 `p` s63h)
+ where
+ p = Exts.plusWord32#
+ {-# INLINE p #-}
+ k :: Exts.Word# -> Exts.Word32#
+ k = Exts.wordToWord32#
+ {-# INLINE k #-}
+
+-- RFC 6234 6.2 block pipeline
+--
+-- invariant:
+-- the input bytestring is exactly 512 bits in length
+unsafe_hash_alg :: Registers -> BS.ByteString -> Registers
+unsafe_hash_alg rs bs = block_hash rs (parse_block bs 0)
+
+-- register concatenation
+cat :: Registers -> BS.ByteString
+cat (R h0 h1 h2 h3 h4 h5 h6 h7) = BI.unsafeCreate 32 $ \ptr -> do
+ poke32be ptr 0 h0
+ poke32be ptr 4 h1
+ poke32be ptr 8 h2
+ poke32be ptr 12 h3
+ poke32be ptr 16 h4
+ poke32be ptr 20 h5
+ poke32be ptr 24 h6
+ poke32be ptr 28 h7
+ where
+ poke32be :: Ptr Word8 -> Int -> Exts.Word32# -> IO ()
+ poke32be p off w = do
+ poke (p `plusPtr` off) (byte w 24#)
+ poke (p `plusPtr` (off + 1)) (byte w 16#)
+ poke (p `plusPtr` (off + 2)) (byte w 8#)
+ poke (p `plusPtr` (off + 3)) (byte w 0#)
+
+ byte :: Exts.Word32# -> Int# -> Word8
+ byte w n = GHC.Word.W8# (Exts.wordToWord8#
+ (Exts.word32ToWord# (Exts.uncheckedShiftRLWord32# w n)))
diff --git a/lib/Crypto/Hash/SHA256/Lazy.hs b/lib/Crypto/Hash/SHA256/Lazy.hs
@@ -0,0 +1,181 @@
+{-# OPTIONS_GHC -funbox-small-strict-fields #-}
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE ViewPatterns #-}
+
+-- |
+-- Module: Crypto.Hash.SHA256.Lazy
+-- Copyright: (c) 2024 Jared Tobin
+-- License: MIT
+-- Maintainer: Jared Tobin <jared@ppad.tech>
+--
+-- Pure SHA-256 and HMAC-SHA256 implementations for lazy ByteStrings,
+-- as specified by RFC's
+-- [6234](https://datatracker.ietf.org/doc/html/rfc6234) and
+-- [2104](https://datatracker.ietf.org/doc/html/rfc2104).
+
+module Crypto.Hash.SHA256.Lazy (
+ -- * SHA-256 message digest functions
+ hash_lazy
+
+ -- * SHA256-based MAC functions
+ , hmac_lazy
+ ) where
+
+import qualified Data.Bits as B
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Builder as BSB
+import qualified Data.ByteString.Builder.Extra as BE
+import qualified Data.ByteString.Internal as BI
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.ByteString.Lazy.Internal as BLI
+import Data.Word (Word64)
+import Foreign.ForeignPtr (plusForeignPtr)
+import Crypto.Hash.SHA256.Internal
+
+-- preliminary utils
+
+-- keystroke saver
+fi :: (Integral a, Num b) => a -> b
+fi = fromIntegral
+{-# INLINE fi #-}
+
+-- utility types for more efficient ByteString management
+
+data SSPair = SSPair
+ {-# UNPACK #-} !BS.ByteString
+ {-# UNPACK #-} !BS.ByteString
+
+data SLPair = SLPair {-# UNPACK #-} !BS.ByteString !BL.ByteString
+
+-- unsafe version of splitAt that does no bounds checking
+--
+-- invariant:
+-- 0 <= n <= l
+unsafe_splitAt :: Int -> BS.ByteString -> SSPair
+unsafe_splitAt n (BI.BS x l) =
+ SSPair (BI.BS x n) (BI.BS (plusForeignPtr x n) (l - n))
+
+-- variant of Data.ByteString.Lazy.splitAt that returns the initial
+-- component as a strict, unboxed ByteString
+splitAt64 :: BL.ByteString -> SLPair
+splitAt64 = splitAt' (64 :: Int) where
+ splitAt' _ BLI.Empty = SLPair mempty BLI.Empty
+ splitAt' n (BLI.Chunk c@(BI.PS _ _ l) cs) =
+ if n < l
+ then
+ -- n < BS.length c, so unsafe_splitAt is safe
+ let !(SSPair c0 c1) = unsafe_splitAt n c
+ in SLPair c0 (BLI.Chunk c1 cs)
+ else
+ let SLPair cs' cs'' = splitAt' (n - l) cs
+ in SLPair (c <> cs') cs''
+
+-- builder realization strategies
+
+to_strict :: BSB.Builder -> BS.ByteString
+to_strict = BL.toStrict . BSB.toLazyByteString
+
+-- message padding and parsing
+-- https://datatracker.ietf.org/doc/html/rfc6234#section-4.1
+
+-- k such that (l + 1 + k) mod 64 = 56
+sol :: Word64 -> Word64
+sol l =
+ let r = 56 - fi l `rem` 64 - 1 :: Integer -- fi prevents underflow
+ in fi (if r < 0 then r + 64 else r)
+
+-- RFC 6234 4.1 (lazy)
+pad_lazy :: BL.ByteString -> BL.ByteString
+pad_lazy (BL.toChunks -> m) = BL.fromChunks (walk 0 m) where
+ walk !l bs = case bs of
+ (c:cs) -> c : walk (l + fi (BS.length c)) cs
+ [] -> padding l (sol l) (BSB.word8 0x80)
+
+ padding l k bs
+ | k == 0 =
+ pure
+ . to_strict
+ -- more efficient for small builder
+ $ bs <> BSB.word64BE (l * 8)
+ | otherwise =
+ let nacc = bs <> BSB.word8 0x00
+ in padding l (pred k) nacc
+
+-- | Compute a condensed representation of a lazy bytestring via
+-- SHA-256.
+--
+-- The 256-bit output digest is returned as a strict bytestring.
+--
+-- >>> hash_lazy "lazy bytestring input"
+-- "<strict 256-bit message digest>"
+hash_lazy :: BL.ByteString -> BS.ByteString
+hash_lazy bl = cat (go (iv ()) (pad_lazy bl)) where
+ go :: Registers -> BL.ByteString -> Registers
+ go !acc bs
+ | BL.null bs = acc
+ | otherwise = case splitAt64 bs of
+ SLPair c r -> go (unsafe_hash_alg acc c) r
+
+-- HMAC -----------------------------------------------------------------------
+-- https://datatracker.ietf.org/doc/html/rfc2104#section-2
+
+data KeyAndLen = KeyAndLen
+ {-# UNPACK #-} !BS.ByteString
+ {-# UNPACK #-} !Int
+
+-- | Produce a message authentication code for a lazy bytestring, based
+-- on the provided (strict, bytestring) key, via SHA-256.
+--
+-- The 256-bit MAC is returned as a strict bytestring.
+--
+-- Per RFC 2104, the key /should/ be a minimum of 32 bytes long. Keys
+-- exceeding 64 bytes in length will first be hashed (via SHA-256).
+--
+-- >>> hmac_lazy "strict bytestring key" "lazy bytestring input"
+-- "<strict 256-bit MAC>"
+hmac_lazy
+ :: BS.ByteString -- ^ key
+ -> BL.ByteString -- ^ text
+ -> BS.ByteString
+hmac_lazy mk@(BI.PS _ _ l) text =
+ let step1 = k <> BS.replicate (64 - lk) 0x00
+ step2 = BS.map (B.xor 0x36) step1
+ step3 = BL.fromStrict step2 <> text
+ step4 = hash_lazy step3
+ step5 = BS.map (B.xor 0x5C) step1
+ step6 = step5 <> step4
+ in hash step6
+ where
+ hash bs = cat (go (iv ()) (pad bs)) where
+ go :: Registers -> BS.ByteString -> Registers
+ go !acc b
+ | BS.null b = acc
+ | otherwise = case unsafe_splitAt 64 b of
+ SSPair c r -> go (unsafe_hash_alg acc c) r
+
+ pad m@(BI.PS _ _ (fi -> len))
+ | len < 128 = to_strict_small padded
+ | otherwise = to_strict padded
+ where
+ padded = BSB.byteString m
+ <> fill (sol len) (BSB.word8 0x80)
+ <> BSB.word64BE (len * 8)
+
+ to_strict_small = BL.toStrict . BE.toLazyByteStringWith
+ (BE.safeStrategy 128 BE.smallChunkSize) mempty
+
+ fill j !acc
+ | j `rem` 8 == 0 = loop64 j acc
+ | otherwise = loop8 j acc
+
+ loop64 j !acc
+ | j == 0 = acc
+ | otherwise = loop64 (j - 8) (acc <> BSB.word64BE 0x00)
+
+ loop8 j !acc
+ | j == 0 = acc
+ | otherwise = loop8 (pred j) (acc <> BSB.word8 0x00)
+
+ !(KeyAndLen k lk)
+ | l > 64 = KeyAndLen (hash mk) 32
+ | otherwise = KeyAndLen mk l
diff --git a/lib/Crypto/Hash/SHA256D.hs b/lib/Crypto/Hash/SHA256D.hs
@@ -0,0 +1,534 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CApiFFI #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE PatternSynonyms #-}
+{-# LANGUAGE UnboxedTuples #-}
+{-# LANGUAGE UnliftedNewtypes #-}
+
+module Crypto.Hash.SHA256D where
+
+import qualified Data.Bits as B
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Internal as BI
+import qualified Data.ByteString.Unsafe as BU
+import Data.Word (Word8, Word32, Word64)
+import Foreign.Marshal.Alloc (allocaBytes)
+import Foreign.Marshal.Utils (copyBytes, fillBytes)
+import Foreign.Ptr (Ptr, plusPtr)
+import Foreign.Storable (poke, peek)
+import GHC.Exts (Int#)
+import qualified GHC.Exts as Exts
+import qualified GHC.Word (Word8(..))
+import System.IO.Unsafe (unsafePerformIO)
+
+fi :: (Integral a, Num b) => a -> b
+fi = fromIntegral
+{-# INLINE fi #-}
+
+foreign import ccall unsafe "sha256_block_arm"
+ c_sha256_block :: Ptr Word32 -> Ptr Word8 -> IO ()
+
+foreign import ccall unsafe "sha256_arm_available"
+ c_sha256_arm_available :: IO Int
+
+newtype Block = Block
+ (# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ #)
+
+pattern B
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Block
+pattern B w00 w01 w02 w03 w04 w05 w06 w07 w08 w09 w10 w11 w12 w13 w14 w15 =
+ Block
+ (# w00, w01, w02, w03
+ , w04, w05, w06, w07
+ , w08, w09, w10, w11
+ , w12, w13, w14, w15
+ #)
+{-# COMPLETE B #-}
+
+newtype Registers = Registers
+ (# Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ , Exts.Word32#, Exts.Word32#, Exts.Word32#, Exts.Word32#
+ #)
+
+pattern R
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Registers
+pattern R w00 w01 w02 w03 w04 w05 w06 w07 =
+ Registers
+ (# w00, w01, w02, w03
+ , w04, w05, w06, w07
+ #)
+{-# COMPLETE R #-}
+
+-- given a bytestring and offset, parse word32. length not checked.
+word32be :: BS.ByteString -> Int -> Exts.Word32#
+word32be bs m =
+ let !(GHC.Word.W8# ra) = BU.unsafeIndex bs m
+ !(GHC.Word.W8# rb) = BU.unsafeIndex bs (m + 1)
+ !(GHC.Word.W8# rc) = BU.unsafeIndex bs (m + 2)
+ !(GHC.Word.W8# rd) = BU.unsafeIndex bs (m + 3)
+ !a = Exts.wordToWord32# (Exts.word8ToWord# ra)
+ !b = Exts.wordToWord32# (Exts.word8ToWord# rb)
+ !c = Exts.wordToWord32# (Exts.word8ToWord# rc)
+ !d = Exts.wordToWord32# (Exts.word8ToWord# rd)
+ !sa = Exts.uncheckedShiftLWord32# a 24#
+ !sb = Exts.uncheckedShiftLWord32# b 16#
+ !sc = Exts.uncheckedShiftLWord32# c 08#
+ in sa `Exts.orWord32#` sb `Exts.orWord32#` sc `Exts.orWord32#` d
+{-# INLINE word32be #-}
+
+block :: BS.ByteString -> Int -> Block
+block bs m = B
+ (word32be bs m)
+ (word32be bs (m + 04))
+ (word32be bs (m + 08))
+ (word32be bs (m + 12))
+ (word32be bs (m + 16))
+ (word32be bs (m + 20))
+ (word32be bs (m + 24))
+ (word32be bs (m + 28))
+ (word32be bs (m + 32))
+ (word32be bs (m + 36))
+ (word32be bs (m + 40))
+ (word32be bs (m + 44))
+ (word32be bs (m + 48))
+ (word32be bs (m + 52))
+ (word32be bs (m + 56))
+ (word32be bs (m + 60))
+{-# INLINE block #-}
+
+-- rotate right
+rotr# :: Exts.Word32# -> Int# -> Exts.Word32#
+rotr# x n =
+ Exts.uncheckedShiftRLWord32# x n `Exts.orWord32#`
+ Exts.uncheckedShiftLWord32# x (32# Exts.-# n)
+{-# INLINE rotr# #-}
+
+-- logical right shift
+shr# :: Exts.Word32# -> Int# -> Exts.Word32#
+shr# = Exts.uncheckedShiftRLWord32#
+{-# INLINE shr# #-}
+
+-- ch(x, y, z) = (x & y) ^ (~x & z)
+ch# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ch# x y z =
+ (x `Exts.andWord32#` y) `Exts.xorWord32#`
+ (Exts.notWord32# x `Exts.andWord32#` z)
+{-# INLINE ch# #-}
+
+-- maj(x, y, z) = (x & (y | z)) | (y & z)
+maj# :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+maj# x y z =
+ (x `Exts.andWord32#` (y `Exts.orWord32#` z)) `Exts.orWord32#`
+ (y `Exts.andWord32#` z)
+{-# INLINE maj# #-}
+
+-- big sigma 0: rotr2 ^ rotr13 ^ rotr22
+bsig0# :: Exts.Word32# -> Exts.Word32#
+bsig0# x =
+ rotr# x 2# `Exts.xorWord32#` rotr# x 13# `Exts.xorWord32#` rotr# x 22#
+{-# INLINE bsig0# #-}
+
+-- big sigma 1: rotr6 ^ rotr11 ^ rotr25
+bsig1# :: Exts.Word32# -> Exts.Word32#
+bsig1# x =
+ rotr# x 6# `Exts.xorWord32#` rotr# x 11# `Exts.xorWord32#` rotr# x 25#
+{-# INLINE bsig1# #-}
+
+-- small sigma 0: rotr7 ^ rotr18 ^ shr3
+ssig0# :: Exts.Word32# -> Exts.Word32#
+ssig0# x =
+ rotr# x 7# `Exts.xorWord32#` rotr# x 18# `Exts.xorWord32#` shr# x 3#
+{-# INLINE ssig0# #-}
+
+-- small sigma 1: rotr17 ^ rotr19 ^ shr10
+ssig1# :: Exts.Word32# -> Exts.Word32#
+ssig1# x =
+ rotr# x 17# `Exts.xorWord32#` rotr# x 19# `Exts.xorWord32#` shr# x 10#
+{-# INLINE ssig1# #-}
+
+-- round step
+step#
+ :: Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32# -> Exts.Word32# -> Exts.Word32#
+ -> Exts.Word32# -> Exts.Word32#
+ -> Registers
+step# a b c d e f g h k w =
+ let !t1 = h
+ `Exts.plusWord32#` bsig1# e
+ `Exts.plusWord32#` ch# e f g
+ `Exts.plusWord32#` k
+ `Exts.plusWord32#` w
+ !t2 = bsig0# a `Exts.plusWord32#` maj# a b c
+ in R (t1 `Exts.plusWord32#` t2) a b c (d `Exts.plusWord32#` t1) e f g
+{-# INLINE step# #-}
+
+block_hash :: Registers -> Block -> Registers
+block_hash
+ (R h0 h1 h2 h3 h4 h5 h6 h7)
+ (B b00 b01 b02 b03 b04 b05 b06 b07 b08 b09 b10 b11 b12 b13 b14 b15)
+ =
+ let -- message schedule
+ !w00 = b00; !w01 = b01; !w02 = b02; !w03 = b03
+ !w04 = b04; !w05 = b05; !w06 = b06; !w07 = b07
+ !w08 = b08; !w09 = b09; !w10 = b10; !w11 = b11
+ !w12 = b12; !w13 = b13; !w14 = b14; !w15 = b15
+ !w16 = ssig1# w14 `p` w09 `p` ssig0# w01 `p` w00
+ !w17 = ssig1# w15 `p` w10 `p` ssig0# w02 `p` w01
+ !w18 = ssig1# w16 `p` w11 `p` ssig0# w03 `p` w02
+ !w19 = ssig1# w17 `p` w12 `p` ssig0# w04 `p` w03
+ !w20 = ssig1# w18 `p` w13 `p` ssig0# w05 `p` w04
+ !w21 = ssig1# w19 `p` w14 `p` ssig0# w06 `p` w05
+ !w22 = ssig1# w20 `p` w15 `p` ssig0# w07 `p` w06
+ !w23 = ssig1# w21 `p` w16 `p` ssig0# w08 `p` w07
+ !w24 = ssig1# w22 `p` w17 `p` ssig0# w09 `p` w08
+ !w25 = ssig1# w23 `p` w18 `p` ssig0# w10 `p` w09
+ !w26 = ssig1# w24 `p` w19 `p` ssig0# w11 `p` w10
+ !w27 = ssig1# w25 `p` w20 `p` ssig0# w12 `p` w11
+ !w28 = ssig1# w26 `p` w21 `p` ssig0# w13 `p` w12
+ !w29 = ssig1# w27 `p` w22 `p` ssig0# w14 `p` w13
+ !w30 = ssig1# w28 `p` w23 `p` ssig0# w15 `p` w14
+ !w31 = ssig1# w29 `p` w24 `p` ssig0# w16 `p` w15
+ !w32 = ssig1# w30 `p` w25 `p` ssig0# w17 `p` w16
+ !w33 = ssig1# w31 `p` w26 `p` ssig0# w18 `p` w17
+ !w34 = ssig1# w32 `p` w27 `p` ssig0# w19 `p` w18
+ !w35 = ssig1# w33 `p` w28 `p` ssig0# w20 `p` w19
+ !w36 = ssig1# w34 `p` w29 `p` ssig0# w21 `p` w20
+ !w37 = ssig1# w35 `p` w30 `p` ssig0# w22 `p` w21
+ !w38 = ssig1# w36 `p` w31 `p` ssig0# w23 `p` w22
+ !w39 = ssig1# w37 `p` w32 `p` ssig0# w24 `p` w23
+ !w40 = ssig1# w38 `p` w33 `p` ssig0# w25 `p` w24
+ !w41 = ssig1# w39 `p` w34 `p` ssig0# w26 `p` w25
+ !w42 = ssig1# w40 `p` w35 `p` ssig0# w27 `p` w26
+ !w43 = ssig1# w41 `p` w36 `p` ssig0# w28 `p` w27
+ !w44 = ssig1# w42 `p` w37 `p` ssig0# w29 `p` w28
+ !w45 = ssig1# w43 `p` w38 `p` ssig0# w30 `p` w29
+ !w46 = ssig1# w44 `p` w39 `p` ssig0# w31 `p` w30
+ !w47 = ssig1# w45 `p` w40 `p` ssig0# w32 `p` w31
+ !w48 = ssig1# w46 `p` w41 `p` ssig0# w33 `p` w32
+ !w49 = ssig1# w47 `p` w42 `p` ssig0# w34 `p` w33
+ !w50 = ssig1# w48 `p` w43 `p` ssig0# w35 `p` w34
+ !w51 = ssig1# w49 `p` w44 `p` ssig0# w36 `p` w35
+ !w52 = ssig1# w50 `p` w45 `p` ssig0# w37 `p` w36
+ !w53 = ssig1# w51 `p` w46 `p` ssig0# w38 `p` w37
+ !w54 = ssig1# w52 `p` w47 `p` ssig0# w39 `p` w38
+ !w55 = ssig1# w53 `p` w48 `p` ssig0# w40 `p` w39
+ !w56 = ssig1# w54 `p` w49 `p` ssig0# w41 `p` w40
+ !w57 = ssig1# w55 `p` w50 `p` ssig0# w42 `p` w41
+ !w58 = ssig1# w56 `p` w51 `p` ssig0# w43 `p` w42
+ !w59 = ssig1# w57 `p` w52 `p` ssig0# w44 `p` w43
+ !w60 = ssig1# w58 `p` w53 `p` ssig0# w45 `p` w44
+ !w61 = ssig1# w59 `p` w54 `p` ssig0# w46 `p` w45
+ !w62 = ssig1# w60 `p` w55 `p` ssig0# w47 `p` w46
+ !w63 = ssig1# w61 `p` w56 `p` ssig0# w48 `p` w47
+
+ -- rounds (cube roots of first 64 primes)
+ !(R s00a s00b s00c s00d s00e s00f s00g s00h) =
+ step# h0 h1 h2 h3 h4 h5 h6 h7 (k 0x428a2f98##) w00
+ !(R s01a s01b s01c s01d s01e s01f s01g s01h) =
+ step# s00a s00b s00c s00d s00e s00f s00g s00h (k 0x71374491##) w01
+ !(R s02a s02b s02c s02d s02e s02f s02g s02h) =
+ step# s01a s01b s01c s01d s01e s01f s01g s01h (k 0xb5c0fbcf##) w02
+ !(R s03a s03b s03c s03d s03e s03f s03g s03h) =
+ step# s02a s02b s02c s02d s02e s02f s02g s02h (k 0xe9b5dba5##) w03
+ !(R s04a s04b s04c s04d s04e s04f s04g s04h) =
+ step# s03a s03b s03c s03d s03e s03f s03g s03h (k 0x3956c25b##) w04
+ !(R s05a s05b s05c s05d s05e s05f s05g s05h) =
+ step# s04a s04b s04c s04d s04e s04f s04g s04h (k 0x59f111f1##) w05
+ !(R s06a s06b s06c s06d s06e s06f s06g s06h) =
+ step# s05a s05b s05c s05d s05e s05f s05g s05h (k 0x923f82a4##) w06
+ !(R s07a s07b s07c s07d s07e s07f s07g s07h) =
+ step# s06a s06b s06c s06d s06e s06f s06g s06h (k 0xab1c5ed5##) w07
+ !(R s08a s08b s08c s08d s08e s08f s08g s08h) =
+ step# s07a s07b s07c s07d s07e s07f s07g s07h (k 0xd807aa98##) w08
+ !(R s09a s09b s09c s09d s09e s09f s09g s09h) =
+ step# s08a s08b s08c s08d s08e s08f s08g s08h (k 0x12835b01##) w09
+ !(R s10a s10b s10c s10d s10e s10f s10g s10h) =
+ step# s09a s09b s09c s09d s09e s09f s09g s09h (k 0x243185be##) w10
+ !(R s11a s11b s11c s11d s11e s11f s11g s11h) =
+ step# s10a s10b s10c s10d s10e s10f s10g s10h (k 0x550c7dc3##) w11
+ !(R s12a s12b s12c s12d s12e s12f s12g s12h) =
+ step# s11a s11b s11c s11d s11e s11f s11g s11h (k 0x72be5d74##) w12
+ !(R s13a s13b s13c s13d s13e s13f s13g s13h) =
+ step# s12a s12b s12c s12d s12e s12f s12g s12h (k 0x80deb1fe##) w13
+ !(R s14a s14b s14c s14d s14e s14f s14g s14h) =
+ step# s13a s13b s13c s13d s13e s13f s13g s13h (k 0x9bdc06a7##) w14
+ !(R s15a s15b s15c s15d s15e s15f s15g s15h) =
+ step# s14a s14b s14c s14d s14e s14f s14g s14h (k 0xc19bf174##) w15
+ !(R s16a s16b s16c s16d s16e s16f s16g s16h) =
+ step# s15a s15b s15c s15d s15e s15f s15g s15h (k 0xe49b69c1##) w16
+ !(R s17a s17b s17c s17d s17e s17f s17g s17h) =
+ step# s16a s16b s16c s16d s16e s16f s16g s16h (k 0xefbe4786##) w17
+ !(R s18a s18b s18c s18d s18e s18f s18g s18h) =
+ step# s17a s17b s17c s17d s17e s17f s17g s17h (k 0x0fc19dc6##) w18
+ !(R s19a s19b s19c s19d s19e s19f s19g s19h) =
+ step# s18a s18b s18c s18d s18e s18f s18g s18h (k 0x240ca1cc##) w19
+ !(R s20a s20b s20c s20d s20e s20f s20g s20h) =
+ step# s19a s19b s19c s19d s19e s19f s19g s19h (k 0x2de92c6f##) w20
+ !(R s21a s21b s21c s21d s21e s21f s21g s21h) =
+ step# s20a s20b s20c s20d s20e s20f s20g s20h (k 0x4a7484aa##) w21
+ !(R s22a s22b s22c s22d s22e s22f s22g s22h) =
+ step# s21a s21b s21c s21d s21e s21f s21g s21h (k 0x5cb0a9dc##) w22
+ !(R s23a s23b s23c s23d s23e s23f s23g s23h) =
+ step# s22a s22b s22c s22d s22e s22f s22g s22h (k 0x76f988da##) w23
+ !(R s24a s24b s24c s24d s24e s24f s24g s24h) =
+ step# s23a s23b s23c s23d s23e s23f s23g s23h (k 0x983e5152##) w24
+ !(R s25a s25b s25c s25d s25e s25f s25g s25h) =
+ step# s24a s24b s24c s24d s24e s24f s24g s24h (k 0xa831c66d##) w25
+ !(R s26a s26b s26c s26d s26e s26f s26g s26h) =
+ step# s25a s25b s25c s25d s25e s25f s25g s25h (k 0xb00327c8##) w26
+ !(R s27a s27b s27c s27d s27e s27f s27g s27h) =
+ step# s26a s26b s26c s26d s26e s26f s26g s26h (k 0xbf597fc7##) w27
+ !(R s28a s28b s28c s28d s28e s28f s28g s28h) =
+ step# s27a s27b s27c s27d s27e s27f s27g s27h (k 0xc6e00bf3##) w28
+ !(R s29a s29b s29c s29d s29e s29f s29g s29h) =
+ step# s28a s28b s28c s28d s28e s28f s28g s28h (k 0xd5a79147##) w29
+ !(R s30a s30b s30c s30d s30e s30f s30g s30h) =
+ step# s29a s29b s29c s29d s29e s29f s29g s29h (k 0x06ca6351##) w30
+ !(R s31a s31b s31c s31d s31e s31f s31g s31h) =
+ step# s30a s30b s30c s30d s30e s30f s30g s30h (k 0x14292967##) w31
+ !(R s32a s32b s32c s32d s32e s32f s32g s32h) =
+ step# s31a s31b s31c s31d s31e s31f s31g s31h (k 0x27b70a85##) w32
+ !(R s33a s33b s33c s33d s33e s33f s33g s33h) =
+ step# s32a s32b s32c s32d s32e s32f s32g s32h (k 0x2e1b2138##) w33
+ !(R s34a s34b s34c s34d s34e s34f s34g s34h) =
+ step# s33a s33b s33c s33d s33e s33f s33g s33h (k 0x4d2c6dfc##) w34
+ !(R s35a s35b s35c s35d s35e s35f s35g s35h) =
+ step# s34a s34b s34c s34d s34e s34f s34g s34h (k 0x53380d13##) w35
+ !(R s36a s36b s36c s36d s36e s36f s36g s36h) =
+ step# s35a s35b s35c s35d s35e s35f s35g s35h (k 0x650a7354##) w36
+ !(R s37a s37b s37c s37d s37e s37f s37g s37h) =
+ step# s36a s36b s36c s36d s36e s36f s36g s36h (k 0x766a0abb##) w37
+ !(R s38a s38b s38c s38d s38e s38f s38g s38h) =
+ step# s37a s37b s37c s37d s37e s37f s37g s37h (k 0x81c2c92e##) w38
+ !(R s39a s39b s39c s39d s39e s39f s39g s39h) =
+ step# s38a s38b s38c s38d s38e s38f s38g s38h (k 0x92722c85##) w39
+ !(R s40a s40b s40c s40d s40e s40f s40g s40h) =
+ step# s39a s39b s39c s39d s39e s39f s39g s39h (k 0xa2bfe8a1##) w40
+ !(R s41a s41b s41c s41d s41e s41f s41g s41h) =
+ step# s40a s40b s40c s40d s40e s40f s40g s40h (k 0xa81a664b##) w41
+ !(R s42a s42b s42c s42d s42e s42f s42g s42h) =
+ step# s41a s41b s41c s41d s41e s41f s41g s41h (k 0xc24b8b70##) w42
+ !(R s43a s43b s43c s43d s43e s43f s43g s43h) =
+ step# s42a s42b s42c s42d s42e s42f s42g s42h (k 0xc76c51a3##) w43
+ !(R s44a s44b s44c s44d s44e s44f s44g s44h) =
+ step# s43a s43b s43c s43d s43e s43f s43g s43h (k 0xd192e819##) w44
+ !(R s45a s45b s45c s45d s45e s45f s45g s45h) =
+ step# s44a s44b s44c s44d s44e s44f s44g s44h (k 0xd6990624##) w45
+ !(R s46a s46b s46c s46d s46e s46f s46g s46h) =
+ step# s45a s45b s45c s45d s45e s45f s45g s45h (k 0xf40e3585##) w46
+ !(R s47a s47b s47c s47d s47e s47f s47g s47h) =
+ step# s46a s46b s46c s46d s46e s46f s46g s46h (k 0x106aa070##) w47
+ !(R s48a s48b s48c s48d s48e s48f s48g s48h) =
+ step# s47a s47b s47c s47d s47e s47f s47g s47h (k 0x19a4c116##) w48
+ !(R s49a s49b s49c s49d s49e s49f s49g s49h) =
+ step# s48a s48b s48c s48d s48e s48f s48g s48h (k 0x1e376c08##) w49
+ !(R s50a s50b s50c s50d s50e s50f s50g s50h) =
+ step# s49a s49b s49c s49d s49e s49f s49g s49h (k 0x2748774c##) w50
+ !(R s51a s51b s51c s51d s51e s51f s51g s51h) =
+ step# s50a s50b s50c s50d s50e s50f s50g s50h (k 0x34b0bcb5##) w51
+ !(R s52a s52b s52c s52d s52e s52f s52g s52h) =
+ step# s51a s51b s51c s51d s51e s51f s51g s51h (k 0x391c0cb3##) w52
+ !(R s53a s53b s53c s53d s53e s53f s53g s53h) =
+ step# s52a s52b s52c s52d s52e s52f s52g s52h (k 0x4ed8aa4a##) w53
+ !(R s54a s54b s54c s54d s54e s54f s54g s54h) =
+ step# s53a s53b s53c s53d s53e s53f s53g s53h (k 0x5b9cca4f##) w54
+ !(R s55a s55b s55c s55d s55e s55f s55g s55h) =
+ step# s54a s54b s54c s54d s54e s54f s54g s54h (k 0x682e6ff3##) w55
+ !(R s56a s56b s56c s56d s56e s56f s56g s56h) =
+ step# s55a s55b s55c s55d s55e s55f s55g s55h (k 0x748f82ee##) w56
+ !(R s57a s57b s57c s57d s57e s57f s57g s57h) =
+ step# s56a s56b s56c s56d s56e s56f s56g s56h (k 0x78a5636f##) w57
+ !(R s58a s58b s58c s58d s58e s58f s58g s58h) =
+ step# s57a s57b s57c s57d s57e s57f s57g s57h (k 0x84c87814##) w58
+ !(R s59a s59b s59c s59d s59e s59f s59g s59h) =
+ step# s58a s58b s58c s58d s58e s58f s58g s58h (k 0x8cc70208##) w59
+ !(R s60a s60b s60c s60d s60e s60f s60g s60h) =
+ step# s59a s59b s59c s59d s59e s59f s59g s59h (k 0x90befffa##) w60
+ !(R s61a s61b s61c s61d s61e s61f s61g s61h) =
+ step# s60a s60b s60c s60d s60e s60f s60g s60h (k 0xa4506ceb##) w61
+ !(R s62a s62b s62c s62d s62e s62f s62g s62h) =
+ step# s61a s61b s61c s61d s61e s61f s61g s61h (k 0xbef9a3f7##) w62
+ !(R s63a s63b s63c s63d s63e s63f s63g s63h) =
+ step# s62a s62b s62c s62d s62e s62f s62g s62h (k 0xc67178f2##) w63
+ in R (h0 `p` s63a) (h1 `p` s63b) (h2 `p` s63c) (h3 `p` s63d)
+ (h4 `p` s63e) (h5 `p` s63f) (h6 `p` s63g) (h7 `p` s63h)
+ where
+ p = Exts.plusWord32#
+ {-# INLINE p #-}
+ k :: Exts.Word# -> Exts.Word32#
+ k = Exts.wordToWord32#
+ {-# INLINE k #-}
+
+cat :: Registers -> BS.ByteString
+cat (R h0 h1 h2 h3 h4 h5 h6 h7) = BI.unsafeCreate 32 $ \p -> do
+ poke32be p 0 h0
+ poke32be p 4 h1
+ poke32be p 8 h2
+ poke32be p 12 h3
+ poke32be p 16 h4
+ poke32be p 20 h5
+ poke32be p 24 h6
+ poke32be p 28 h7
+ where
+ poke32be :: Ptr Word8 -> Int -> Exts.Word32# -> IO ()
+ poke32be p off w = do
+ poke (p `plusPtr` off) (byte w 24#)
+ poke (p `plusPtr` (off + 1)) (byte w 16#)
+ poke (p `plusPtr` (off + 2)) (byte w 8#)
+ poke (p `plusPtr` (off + 3)) (byte w 0#)
+
+ byte :: Exts.Word32# -> Int# -> Word8
+ byte w n = GHC.Word.W8# (Exts.wordToWord8#
+ (Exts.word32ToWord# (Exts.uncheckedShiftRLWord32# w n)))
+{-# INLINE cat #-}
+
+unsafe_padding :: BS.ByteString -> Word64 -> BS.ByteString
+unsafe_padding (BI.PS fp off r) l
+ | r < 56 = BI.unsafeCreate 64 $ \p -> do
+ BI.unsafeWithForeignPtr fp $ \src ->
+ copyBytes p (src `plusPtr` off) r
+ poke (p `plusPtr` r) (0x80 :: Word8)
+ fillBytes (p `plusPtr` (r + 1)) 0 (55 - r)
+ poke_word64be (p `plusPtr` 56) (l * 8)
+ | otherwise = BI.unsafeCreate 128 $ \p -> do
+ BI.unsafeWithForeignPtr fp $ \src ->
+ copyBytes p (src `plusPtr` off) r
+ poke (p `plusPtr` r) (0x80 :: Word8)
+ fillBytes (p `plusPtr` (r + 1)) 0 (63 - r)
+ fillBytes (p `plusPtr` 64) 0 56
+ poke_word64be (p `plusPtr` 120) (l * 8)
+ where
+ poke_word64be :: Ptr Word8 -> Word64 -> IO ()
+ poke_word64be !p !w = do
+ poke p (fi (w `B.unsafeShiftR` 56) :: Word8)
+ poke (p `plusPtr` 1) (fi (w `B.unsafeShiftR` 48) :: Word8)
+ poke (p `plusPtr` 2) (fi (w `B.unsafeShiftR` 40) :: Word8)
+ poke (p `plusPtr` 3) (fi (w `B.unsafeShiftR` 32) :: Word8)
+ poke (p `plusPtr` 4) (fi (w `B.unsafeShiftR` 24) :: Word8)
+ poke (p `plusPtr` 5) (fi (w `B.unsafeShiftR` 16) :: Word8)
+ poke (p `plusPtr` 6) (fi (w `B.unsafeShiftR` 8) :: Word8)
+ poke (p `plusPtr` 7) (fi w :: Word8)
+
+process :: BS.ByteString -> Registers
+process m@(BI.PS _ _ l) = finalize (go iv 0) where
+ iv = R (Exts.wordToWord32# 0x6a09e667##)
+ (Exts.wordToWord32# 0xbb67ae85##)
+ (Exts.wordToWord32# 0x3c6ef372##)
+ (Exts.wordToWord32# 0xa54ff53a##)
+ (Exts.wordToWord32# 0x510e527f##)
+ (Exts.wordToWord32# 0x9b05688c##)
+ (Exts.wordToWord32# 0x1f83d9ab##)
+ (Exts.wordToWord32# 0x5be0cd19##)
+
+ go !acc !j
+ | j + 64 <= l = go (block_hash acc (block m j)) (j + 64)
+ | otherwise = acc
+
+ finalize !acc
+ | len < 56 = block_hash acc (block padded 0)
+ | otherwise = block_hash
+ (block_hash acc (block padded 0))
+ (block padded 64)
+ where
+ !remaining@(BI.PS _ _ len) = BU.unsafeDrop (l - l `rem` 64) m
+ !padded = unsafe_padding remaining (fi l)
+
+hash :: BS.ByteString -> BS.ByteString
+hash m
+ | sha256_arm_available = hash_arm m
+ | otherwise = cat (process m)
+
+sha256_arm_available :: Bool
+sha256_arm_available = unsafePerformIO c_sha256_arm_available /= 0
+{-# NOINLINE sha256_arm_available #-}
+
+hash_arm :: BS.ByteString -> BS.ByteString
+hash_arm m@(BI.PS _ _ l) = unsafePerformIO $
+ allocaBytes 32 $ \state -> do
+ poke state (0x6a09e667 :: Word32)
+ poke (state `plusPtr` 4) (0xbb67ae85 :: Word32)
+ poke (state `plusPtr` 8) (0x3c6ef372 :: Word32)
+ poke (state `plusPtr` 12) (0xa54ff53a :: Word32)
+ poke (state `plusPtr` 16) (0x510e527f :: Word32)
+ poke (state `plusPtr` 20) (0x9b05688c :: Word32)
+ poke (state `plusPtr` 24) (0x1f83d9ab :: Word32)
+ poke (state `plusPtr` 28) (0x5be0cd19 :: Word32)
+ go state 0
+ finalize state
+ BI.create 32 $ \out -> do
+ h0 <- peek state :: IO Word32
+ h1 <- peek (state `plusPtr` 4) :: IO Word32
+ h2 <- peek (state `plusPtr` 8) :: IO Word32
+ h3 <- peek (state `plusPtr` 12) :: IO Word32
+ h4 <- peek (state `plusPtr` 16) :: IO Word32
+ h5 <- peek (state `plusPtr` 20) :: IO Word32
+ h6 <- peek (state `plusPtr` 24) :: IO Word32
+ h7 <- peek (state `plusPtr` 28) :: IO Word32
+ poke_word32be out 0 h0
+ poke_word32be out 4 h1
+ poke_word32be out 8 h2
+ poke_word32be out 12 h3
+ poke_word32be out 16 h4
+ poke_word32be out 20 h5
+ poke_word32be out 24 h6
+ poke_word32be out 28 h7
+ where
+ go !state !j
+ | j + 64 <= l = do
+ BI.unsafeWithForeignPtr fp $ \src ->
+ c_sha256_block state (src `plusPtr` (off + j))
+ go state (j + 64)
+ | otherwise = pure ()
+ where
+ BI.PS fp off _ = m
+
+ finalize !state = do
+ let !remaining@(BI.PS _ _ len) = BU.unsafeDrop (l - l `rem` 64) m
+ BI.PS pfp poff _ = unsafe_padding remaining (fi l)
+ BI.unsafeWithForeignPtr pfp $ \src -> do
+ c_sha256_block state (src `plusPtr` poff)
+ if len >= 56
+ then c_sha256_block state (src `plusPtr` (poff + 64))
+ else pure ()
+
+ poke_word32be :: Ptr Word8 -> Int -> Word32 -> IO ()
+ poke_word32be !p !off !w = do
+ poke (p `plusPtr` off) (fi (w `B.unsafeShiftR` 24) :: Word8)
+ poke (p `plusPtr` (off + 1)) (fi (w `B.unsafeShiftR` 16) :: Word8)
+ poke (p `plusPtr` (off + 2)) (fi (w `B.unsafeShiftR` 8) :: Word8)
+ poke (p `plusPtr` (off + 3)) (fi w :: Word8)
+
+-- HMAC -----------------------------------------------------------------------
+-- https://datatracker.ietf.org/doc/html/rfc2104#section-2
+
+data KeyAndLen = KeyAndLen
+ {-# UNPACK #-} !BS.ByteString
+ {-# UNPACK #-} !Int
+
+hmac
+ :: BS.ByteString -- ^ key
+ -> BS.ByteString -- ^ text
+ -> BS.ByteString
+hmac mk@(BI.PS _ _ l) text =
+ let step1 = k <> BS.replicate (64 - lk) 0x00
+ step2 = BS.map (B.xor 0x36) step1
+ step3 = step2 <> text
+ step4 = hash step3
+ step5 = BS.map (B.xor 0x5C) step1
+ step6 = step5 <> step4
+ in hash step6
+ where
+ !(KeyAndLen k lk)
+ | l > 64 = KeyAndLen (hash mk) 32
+ | otherwise = KeyAndLen mk l
+
diff --git a/ppad-sha256.cabal b/ppad-sha256.cabal
@@ -32,9 +32,16 @@ library
ghc-options: -fllvm -O2
exposed-modules:
Crypto.Hash.SHA256
+ Crypto.Hash.SHA256.Internal
+ Crypto.Hash.SHA256.Lazy
+ Crypto.Hash.SHA256D
build-depends:
base >= 4.9 && < 5
, bytestring >= 0.9 && < 0.13
+ c-sources:
+ cbits/sha256_arm.c
+ if arch(aarch64)
+ cc-options: -march=armv8-a+crypto
test-suite sha256-tests
type: exitcode-stdio-1.0
diff --git a/test/Main.hs b/test/Main.hs
@@ -5,6 +5,7 @@
module Main where
import qualified Crypto.Hash.SHA256 as SHA256
+import qualified Crypto.Hash.SHA256D as D
import qualified Data.Aeson as A
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as BSB
@@ -46,7 +47,7 @@ execute tag_size W.MacTest {..} = testCase t_msg $ do
let key = decodeLenient (TE.encodeUtf8 mt_key)
msg = decodeLenient (TE.encodeUtf8 mt_msg)
pec = decodeLenient (TE.encodeUtf8 mt_tag)
- out = BS.take bytes (SHA256.hmac key msg)
+ out = BS.take bytes (D.hmac key msg)
if mt_result == "invalid"
then assertBool "invalid" (pec /= out)
else assertEqual mempty pec out
@@ -74,7 +75,7 @@ unit_tests = testGroup "unit tests" [
--
-- , testGroup "hash_lazy (1GB input)" [
-- testCase "hv5" $ do
- -- let out = B16.encode (SHA256.hash_lazy hv5_put)
+ -- let out = B16.encode (D.hash_lazy hv5_put)
-- assertEqual mempty hv5_pec out
-- ]
, testGroup "hmac" [
@@ -83,13 +84,13 @@ unit_tests = testGroup "unit tests" [
, cmp_hmac "hmv3" hmv3_key hmv3_put hmv3_pec
, cmp_hmac "hmv4" hmv4_key hmv4_put hmv4_pec
, testCase "hmv5" $ do
- let out = BS.take 32 $ B16.encode (SHA256.hmac hmv5_key hmv5_put)
+ let out = BS.take 32 $ B16.encode (D.hmac hmv5_key hmv5_put)
assertEqual mempty hmv5_pec out
, testCase "hmv6" $ do
- let out = B16.encode (SHA256.hmac hmv6_key hmv6_put)
+ let out = B16.encode (D.hmac hmv6_key hmv6_put)
assertEqual mempty hmv6_pec out
, testCase "hmv7" $ do
- let out = B16.encode (SHA256.hmac hmv7_key hmv7_put)
+ let out = B16.encode (D.hmac hmv7_key hmv7_put)
assertEqual mempty hmv7_pec out
]
, testGroup "hmac_lazy" [
@@ -218,7 +219,7 @@ hmv7_pec = "9b09ffa71b942fcb27635fbcd5b0e944bfdc63644f0713938a7f51535c3a35e2"
cmp_hash :: String -> BS.ByteString -> BS.ByteString -> TestTree
cmp_hash msg put pec = testCase msg $ do
- let out = B16.encode (SHA256.hash put)
+ let out = B16.encode (D.hash put)
assertEqual mempty pec out
cmp_hash_lazy :: String -> BS.ByteString -> BS.ByteString -> TestTree
@@ -229,7 +230,7 @@ cmp_hash_lazy msg (BL.fromStrict -> put) pec = testCase msg $ do
cmp_hmac
:: String -> BS.ByteString -> BS.ByteString -> BS.ByteString -> TestTree
cmp_hmac msg key put pec = testCase msg $ do
- let out = B16.encode (SHA256.hmac key put)
+ let out = B16.encode (D.hmac key put)
assertEqual mempty pec out
cmp_hmac_lazy