bech32

Pure Haskell bech32 and bech32m encodings.
git clone git://git.ppad.tech/bech32.git
Log | Files | Refs | LICENSE

commit 1164ff141edfbac279387d804829ee82405f74d5
parent fae1f9b7ef3a2ddfa4719e218d42949bf8813edb
Author: Jared Tobin <jared@jtobin.io>
Date:   Fri, 13 Dec 2024 03:03:16 -0330

lib: reorg

Diffstat:
Mbench/Main.hs | 23++++++++++++++++++-----
Alib/Data/ByteString/Base32.hs | 212+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlib/Data/ByteString/Bech32.hs | 200++++++-------------------------------------------------------------------------
Alib/Data/ByteString/Bech32m.hs | 39+++++++++++++++++++++++++++++++++++++++
Mppad-bech32.cabal | 4+++-
5 files changed, 287 insertions(+), 191 deletions(-)

diff --git a/bench/Main.hs b/bench/Main.hs @@ -3,7 +3,8 @@ module Main where import Criterion.Main -import qualified Data.ByteString.Bech32 as B32 +import qualified Data.ByteString.Base32 as Base32 +import qualified Data.ByteString.Bech32 as Bech32 main :: IO () main = defaultMain [ @@ -13,10 +14,22 @@ main = defaultMain [ suite :: Benchmark suite = env setup $ \big -> bgroup "ppad-bech32" [ - bench "base32 120b" $ whnf B32.base32 "jtobin was here" - , bench "base32 240b" $ whnf B32.base32 "jtobin was herejtobin was here" - , bench "base32 1200b" $ whnf B32.base32 big - ] + bgroup "base32" [ + bench "base32 120b" $ whnf Base32.encode + "jtobin was here" + , bench "base32 128b" $ whnf Base32.encode + "jtobin was here!" + , bench "base32 240b" $ whnf Base32.encode + "jtobin was herejtobin was here" + , bench "base32 1200b" $ whnf Base32.encode big + ] + , bgroup "bech32" [ + bench "bech32 120b" $ nf (Bech32.encode "bc") + "jtobin was here" + , bench "bech32 128b" $ nf (Bech32.encode "bc") + "jtobin was here!" + ] + ] where setup = pure . mconcat . take 10 $ repeat "jtobin was here" diff --git a/lib/Data/ByteString/Base32.hs b/lib/Data/ByteString/Base32.hs @@ -0,0 +1,212 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} + +module Data.ByteString.Base32 ( + encode + , as_word5 + , as_bech32 + + -- not base32-related, but convenient to put here + , Encoding(..) + , create_checksum + , verify_checksum + , valid_hrp + ) where + +import Data.Bits ((.|.), (.&.)) +import qualified Data.Bits as B +import qualified Data.ByteString as BS +import qualified Data.ByteString.Builder as BSB +import qualified Data.ByteString.Builder.Extra as BE +import qualified Data.ByteString.Unsafe as BU +import qualified Data.Primitive.PrimArray as PA +import Data.Word (Word32) + +_BECH32M_CONST :: Word32 +_BECH32M_CONST = 0x2bc830a3 + +fi :: (Integral a, Num b) => a -> b +fi = fromIntegral +{-# INLINE fi #-} + +word32be :: BS.ByteString -> Word32 +word32be s = + (fi (s `BU.unsafeIndex` 0) `B.shiftL` 24) .|. + (fi (s `BU.unsafeIndex` 1) `B.shiftL` 16) .|. + (fi (s `BU.unsafeIndex` 2) `B.shiftL` 8) .|. + (fi (s `BU.unsafeIndex` 3)) +{-# INLINE word32be #-} + +-- realization for small builders +toStrict :: BSB.Builder -> BS.ByteString +toStrict = BS.toStrict + . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty + +bech32_charset :: BS.ByteString +bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l" + +-- adapted from emilypi's 'base32' library +arrange :: Word32 -> Word32 -> BSB.Builder +arrange w32 w8 = + let mask = 0b00011111 + bech32_char = fi . BS.index bech32_charset . fi + + w8_0 = bech32_char (mask .&. (w32 `B.shiftR` 27)) + w8_1 = bech32_char (mask .&. (w32 `B.shiftR` 22)) + w8_2 = bech32_char (mask .&. (w32 `B.shiftR` 17)) + w8_3 = bech32_char (mask .&. (w32 `B.shiftR` 12)) + w8_4 = bech32_char (mask .&. (w32 `B.shiftR` 07)) + w8_5 = bech32_char (mask .&. (w32 `B.shiftR` 02)) + w8_6 = bech32_char (mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05)) + w8_7 = bech32_char (mask .&. w8) + + w64 = w8_0 + .|. w8_1 `B.shiftL` 8 + .|. w8_2 `B.shiftL` 16 + .|. w8_3 `B.shiftL` 24 + .|. w8_4 `B.shiftL` 32 + .|. w8_5 `B.shiftL` 40 + .|. w8_6 `B.shiftL` 48 + .|. w8_7 `B.shiftL` 56 + + in BSB.word64LE w64 + +-- adapted from emilypi's 'base32' library +encode :: BS.ByteString -> BS.ByteString +encode dat = toStrict (go dat) where + bech32_char = fi . BS.index bech32_charset . fi + go bs = case BS.splitAt 5 bs of + (chunk, etc) -> case BS.length etc of + 0 | BS.length chunk == 5 -> case BS.unsnoc chunk of + Nothing -> error "impossible, chunk length is 5" + Just (word32be -> w32, fi -> w8) -> arrange w32 w8 + + | BS.length chunk == 1 -> + let a = BU.unsafeIndex chunk 0 + t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) + u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2) + in BSB.word8 t <> BSB.word8 u + + | BS.length chunk == 2 -> + let a = BU.unsafeIndex chunk 0 + b = BU.unsafeIndex chunk 1 + t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) + u = bech32_char $ + ((a .&. 0b00000111) `B.shiftL` 2) + .|. ((b .&. 0b11000000) `B.shiftR` 6) + v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) + w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4) + in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w + + | BS.length chunk == 3 -> + let a = BU.unsafeIndex chunk 0 + b = BU.unsafeIndex chunk 1 + c = BU.unsafeIndex chunk 2 + t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) + u = bech32_char $ + ((a .&. 0b00000111) `B.shiftL` 2) + .|. ((b .&. 0b11000000) `B.shiftR` 6) + v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) + w = bech32_char $ + ((b .&. 0b00000001) `B.shiftL` 4) + .|. ((c .&. 0b11110000) `B.shiftR` 4) + x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1) + in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w + <> BSB.word8 x + + | BS.length chunk == 4 -> + let a = BU.unsafeIndex chunk 0 + b = BU.unsafeIndex chunk 1 + c = BU.unsafeIndex chunk 2 + d = BU.unsafeIndex chunk 3 + t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) + u = bech32_char $ + ((a .&. 0b00000111) `B.shiftL` 2) + .|. ((b .&. 0b11000000) `B.shiftR` 6) + v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) + w = bech32_char $ + ((b .&. 0b00000001) `B.shiftL` 4) + .|. ((c .&. 0b11110000) `B.shiftR` 4) + x = bech32_char $ + ((c .&. 0b00001111) `B.shiftL` 1) + .|. ((d .&. 0b10000000) `B.shiftR` 7) + y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2) + z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3) + in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w + <> BSB.word8 x <> BSB.word8 y <> BSB.word8 z + + | otherwise -> mempty + + _ -> case BS.unsnoc chunk of + Nothing -> error "impossible, chunk length is 5" + Just (word32be -> w32, fi -> w8) -> arrange w32 w8 <> go etc + +-- naive base32 -> word5 +as_word5 :: BS.ByteString -> BS.ByteString +as_word5 = BS.map f where + f b = case BS.elemIndex (fi b) bech32_charset of + Nothing -> error "ppad-bech32 (as_word5): input not bech32-encoded" + Just w -> fi w + +-- naive word5 -> base32 +as_bech32 :: BS.ByteString -> BS.ByteString +as_bech32 = BS.map (BS.index bech32_charset . fi) + +polymod :: BS.ByteString -> Word32 +polymod = BS.foldl' alg 1 where + generator = PA.primArrayFromListN 5 + [0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3] + + alg !chk v = + let !b = chk `B.shiftR` 25 + c = (chk .&. 0x1ffffff) `B.shiftL` 5 `B.xor` fi v + in loop_gen 0 b c + + loop_gen i b !chk + | i > 4 = chk + | otherwise = + let sor | B.testBit (b `B.shiftR` i) 0 = + PA.indexPrimArray generator i + | otherwise = 0 + in loop_gen (succ i) b (chk `B.xor` sor) + +valid_hrp :: BS.ByteString -> Bool +valid_hrp hrp + | l == 0 || l > 83 = False + | otherwise = BS.all (\b -> (b > 32) && (b < 127)) hrp + where + l = BS.length hrp + +hrp_expand :: BS.ByteString -> BS.ByteString +hrp_expand bs = toStrict + $ BSB.byteString (BS.map (`B.shiftR` 5) bs) + <> BSB.word8 0 + <> BSB.byteString (BS.map (.&. 0b11111) bs) + +data Encoding = + Bech32 + | Bech32m + +create_checksum :: Encoding -> BS.ByteString -> BS.ByteString -> BS.ByteString +create_checksum enc hrp dat = + let pre = hrp_expand hrp <> dat + pay = toStrict $ + BSB.byteString pre + <> BSB.byteString "\NUL\NUL\NUL\NUL\NUL\NUL" + pm = polymod pay `B.xor` case enc of + Bech32 -> 1 + Bech32m -> _BECH32M_CONST + + code i = (fi (pm `B.shiftR` fi i) .&. 0b11111) + + in BS.map code "\EM\DC4\SI\n\ENQ\NUL" -- BS.pack [25, 20, 15, 10, 5, 0] + +verify_checksum :: Encoding -> BS.ByteString -> BS.ByteString -> Bool +verify_checksum enc hrp dat = + let bs = hrp_expand hrp <> dat + in polymod bs == case enc of + Bech32 -> 1 + Bech32m -> _BECH32M_CONST + diff --git a/lib/Data/ByteString/Bech32.hs b/lib/Data/ByteString/Bech32.hs @@ -1,208 +1,38 @@ -{-# LANGUAGE BangPatterns #-} -{-# LANGUAGE BinaryLiterals #-} -{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} -module Data.ByteString.Bech32 where +module Data.ByteString.Bech32 ( + encode + , verify_checksum + ) where import Control.Monad (guard) -import Data.Bits ((.|.), (.&.)) -import qualified Data.Bits as B import qualified Data.ByteString as BS +import qualified Data.ByteString.Base32 as B32 +import Data.ByteString.Base32 (Encoding(..)) import qualified Data.ByteString.Builder as BSB import qualified Data.ByteString.Builder.Extra as BE -import qualified Data.Primitive.PrimArray as PA -import Data.Word (Word8, Word32, Word64) - -fi :: (Integral a, Num b) => a -> b -fi = fromIntegral -{-# INLINE fi #-} - -word32be :: BS.ByteString -> Word32 -word32be s = - (fi (s `BS.index` 0) `B.shiftL` 24) .|. - (fi (s `BS.index` 1) `B.shiftL` 16) .|. - (fi (s `BS.index` 2) `B.shiftL` 8) .|. - (fi (s `BS.index` 3)) -{-# INLINE word32be #-} -- realization for small builders toStrict :: BSB.Builder -> BS.ByteString toStrict = BS.toStrict . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty -bech32_charset :: BS.ByteString -bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l" - -bech32_char_w64 :: Word32 -> Word64 -bech32_char_w64 = fi . BS.index bech32_charset . fi - -bech32_char :: Word8 -> Word8 -bech32_char = fi . BS.index bech32_charset . fi - --- adapted from emilypi's 'base32' library -w40_to_w64 :: Word32 -> Word32 -> BSB.Builder -w40_to_w64 w32 w8 = - let mask = 0b00011111 - - w8_0 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 27)) - w8_1 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 22)) - w8_2 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 17)) - w8_3 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 12)) - w8_4 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 07)) - w8_5 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 02)) - w8_6 = bech32_char_w64 (mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05)) - w8_7 = bech32_char_w64 (mask .&. w8) - - w64 = w8_0 - .|. w8_1 `B.shiftL` 8 - .|. w8_2 `B.shiftL` 16 - .|. w8_3 `B.shiftL` 24 - .|. w8_4 `B.shiftL` 32 - .|. w8_5 `B.shiftL` 40 - .|. w8_6 `B.shiftL` 48 - .|. w8_7 `B.shiftL` 56 - - in BSB.word64LE w64 - --- adapted from emilypi's 'base32' library -base32 :: BS.ByteString -> BS.ByteString -base32 dat = toStrict (go dat) where - go bs = case BS.splitAt 5 bs of - (chunk, etc) -> case BS.length etc of - 0 | BS.length chunk == 5 -> case BS.unsnoc chunk of - Nothing -> error "impossible, chunk length is 5" - Just (word32be -> w32, fi -> w8) -> w40_to_w64 w32 w8 - - | BS.length chunk == 1 -> - let a = BS.index chunk 0 - t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) - u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2) - in BSB.word8 t <> BSB.word8 u - - | BS.length chunk == 2 -> - let a = BS.index chunk 0 - b = BS.index chunk 1 - t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) - u = bech32_char $ - ((a .&. 0b00000111) `B.shiftL` 2) - .|. ((b .&. 0b11000000) `B.shiftR` 6) - v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) - w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4) - in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w - - | BS.length chunk == 3 -> - let a = BS.index chunk 0 - b = BS.index chunk 1 - c = BS.index chunk 2 - t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) - u = bech32_char $ - ((a .&. 0b00000111) `B.shiftL` 2) - .|. ((b .&. 0b11000000) `B.shiftR` 6) - v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) - w = bech32_char $ - ((b .&. 0b00000001) `B.shiftL` 4) - .|. ((c .&. 0b11110000) `B.shiftR` 4) - x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1) - in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w - <> BSB.word8 x - - | BS.length chunk == 4 -> - let a = BS.index chunk 0 - b = BS.index chunk 1 - c = BS.index chunk 2 - d = BS.index chunk 3 - t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) - u = bech32_char $ - ((a .&. 0b00000111) `B.shiftL` 2) - .|. ((b .&. 0b11000000) `B.shiftR` 6) - v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) - w = bech32_char $ - ((b .&. 0b00000001) `B.shiftL` 4) - .|. ((c .&. 0b11110000) `B.shiftR` 4) - x = bech32_char $ - ((c .&. 0b00001111) `B.shiftL` 1) - .|. ((d .&. 0b10000000) `B.shiftR` 7) - y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2) - z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3) - in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w - <> BSB.word8 x <> BSB.word8 y <> BSB.word8 z - - | otherwise -> mempty - - _ -> case BS.unsnoc chunk of - Nothing -> error "impossible, chunk length is 5" - Just (word32be -> w32, fi -> w8) -> w40_to_w64 w32 w8 <> go etc - --- naive base32 -> word5 -as_w5s :: BS.ByteString -> BS.ByteString -as_w5s bs = BS.map f bs where - f b = case BS.elemIndex (fi b) bech32_charset of - Nothing -> error "ppad-bech32 (as_w5s): input not bech32-encoded" - Just w -> fi w - --- naive word5 -> bech32 -as_bech32 :: BS.ByteString -> BS.ByteString -as_bech32 bs = BS.map f bs where - f b = BS.index bech32_charset (fi b) - -valid_hrp :: BS.ByteString -> Bool -valid_hrp hrp - | l == 0 || l > 83 = False - | otherwise = BS.all (\b -> (b > 32) && (b < 127)) hrp - where - l = BS.length hrp - -bech32_polymod :: BS.ByteString -> Word32 -bech32_polymod = BS.foldl' alg 1 where - generator = PA.primArrayFromListN 5 - [0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3] - - alg !chk v = - let !b = chk `B.shiftR` 25 - c = (chk .&. 0x1ffffff) `B.shiftL` 5 `B.xor` fi v - in loop_gen 0 b c - - loop_gen i b !chk - | i > 4 = chk - | otherwise = - let sor | B.testBit (b `B.shiftR` i) 0 = PA.indexPrimArray generator i - | otherwise = 0 - in loop_gen (succ i) b (chk `B.xor` sor) - -bech32_hrp_expand :: BS.ByteString -> BS.ByteString -bech32_hrp_expand bs = toStrict - $ BSB.byteString (BS.map (`B.shiftR` 5) bs) - <> BSB.word8 0 - <> BSB.byteString (BS.map (.&. 0b11111) bs) - -bech32_verify_checksum :: BS.ByteString -> BS.ByteString -> Bool -bech32_verify_checksum hrp dat = - let bs = bech32_hrp_expand hrp <> dat - in bech32_polymod bs == 1 - -bech32_create_checksum :: BS.ByteString -> BS.ByteString -> BS.ByteString -bech32_create_checksum hrp dat = - let pre = bech32_hrp_expand hrp <> dat - pay = toStrict $ - BSB.byteString pre - <> BSB.byteString "\NUL\NUL\NUL\NUL\NUL\NUL" - pm = bech32_polymod pay `B.xor` 1 - - code i = (fi (pm `B.shiftR` fi i) .&. 0b11111) +verify_checksum :: BS.ByteString -> BS.ByteString -> Bool +verify_checksum = B32.verify_checksum Bech32 - in BS.map code "\EM\DC4\SI\n\ENQ\NUL" -- BS.pack [25, 20, 15, 10, 5, 0] +create_checksum :: BS.ByteString -> BS.ByteString -> BS.ByteString +create_checksum = B32.create_checksum Bech32 --- base255 -> bech32 +-- base255 -> bech32m encode :: BS.ByteString -> BS.ByteString -> Maybe BS.ByteString -encode hrp (base32 -> dat) = do - guard (valid_hrp hrp) - let check = bech32_create_checksum hrp (as_w5s dat) +encode hrp (B32.encode -> dat) = do + guard (B32.valid_hrp hrp) + let check = create_checksum hrp (B32.as_word5 dat) res = toStrict $ BSB.byteString hrp <> BSB.word8 49 -- 1 <> BSB.byteString dat - <> BSB.byteString (as_bech32 check) + <> BSB.byteString (B32.as_bech32 check) guard (BS.length res < 91) pure res diff --git a/lib/Data/ByteString/Bech32m.hs b/lib/Data/ByteString/Bech32m.hs @@ -0,0 +1,39 @@ +{-# LANGUAGE ViewPatterns #-} + +module Data.ByteString.Bech32m ( + encode + , verify_checksum + ) where + +import Control.Monad (guard) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Base32 as B32 +import Data.ByteString.Base32 (Encoding(..)) +import qualified Data.ByteString.Builder as BSB +import qualified Data.ByteString.Builder.Extra as BE + +-- realization for small builders +toStrict :: BSB.Builder -> BS.ByteString +toStrict = BS.toStrict + . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty + +verify_checksum :: BS.ByteString -> BS.ByteString -> Bool +verify_checksum = B32.verify_checksum Bech32m + +-- XX no need for this to be here +create_checksum :: BS.ByteString -> BS.ByteString -> BS.ByteString +create_checksum = B32.create_checksum Bech32m + +-- base255 -> bech32m +encode :: BS.ByteString -> BS.ByteString -> Maybe BS.ByteString +encode hrp (B32.encode -> dat) = do + guard (B32.valid_hrp hrp) + let check = create_checksum hrp (B32.as_word5 dat) + res = toStrict $ + BSB.byteString hrp + <> BSB.word8 49 -- 1 + <> BSB.byteString dat + <> BSB.byteString (B32.as_bech32 check) + guard (BS.length res < 91) + pure res + diff --git a/ppad-bech32.cabal b/ppad-bech32.cabal @@ -24,7 +24,9 @@ library ghc-options: -Wall exposed-modules: - Data.ByteString.Bech32 + Data.ByteString.Base32 + , Data.ByteString.Bech32 + , Data.ByteString.Bech32m build-depends: base >= 4.9 && < 5 , bytestring >= 0.9 && < 0.13