commit 1164ff141edfbac279387d804829ee82405f74d5
parent fae1f9b7ef3a2ddfa4719e218d42949bf8813edb
Author: Jared Tobin <jared@jtobin.io>
Date: Fri, 13 Dec 2024 03:03:16 -0330
lib: reorg
Diffstat:
5 files changed, 287 insertions(+), 191 deletions(-)
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -3,7 +3,8 @@
module Main where
import Criterion.Main
-import qualified Data.ByteString.Bech32 as B32
+import qualified Data.ByteString.Base32 as Base32
+import qualified Data.ByteString.Bech32 as Bech32
main :: IO ()
main = defaultMain [
@@ -13,10 +14,22 @@ main = defaultMain [
suite :: Benchmark
suite = env setup $ \big ->
bgroup "ppad-bech32" [
- bench "base32 120b" $ whnf B32.base32 "jtobin was here"
- , bench "base32 240b" $ whnf B32.base32 "jtobin was herejtobin was here"
- , bench "base32 1200b" $ whnf B32.base32 big
- ]
+ bgroup "base32" [
+ bench "base32 120b" $ whnf Base32.encode
+ "jtobin was here"
+ , bench "base32 128b" $ whnf Base32.encode
+ "jtobin was here!"
+ , bench "base32 240b" $ whnf Base32.encode
+ "jtobin was herejtobin was here"
+ , bench "base32 1200b" $ whnf Base32.encode big
+ ]
+ , bgroup "bech32" [
+ bench "bech32 120b" $ nf (Bech32.encode "bc")
+ "jtobin was here"
+ , bench "bech32 128b" $ nf (Bech32.encode "bc")
+ "jtobin was here!"
+ ]
+ ]
where
setup = pure . mconcat . take 10 $ repeat "jtobin was here"
diff --git a/lib/Data/ByteString/Base32.hs b/lib/Data/ByteString/Base32.hs
@@ -0,0 +1,212 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ViewPatterns #-}
+
+module Data.ByteString.Base32 (
+ encode
+ , as_word5
+ , as_bech32
+
+ -- not base32-related, but convenient to put here
+ , Encoding(..)
+ , create_checksum
+ , verify_checksum
+ , valid_hrp
+ ) where
+
+import Data.Bits ((.|.), (.&.))
+import qualified Data.Bits as B
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Builder as BSB
+import qualified Data.ByteString.Builder.Extra as BE
+import qualified Data.ByteString.Unsafe as BU
+import qualified Data.Primitive.PrimArray as PA
+import Data.Word (Word32)
+
+_BECH32M_CONST :: Word32
+_BECH32M_CONST = 0x2bc830a3
+
+fi :: (Integral a, Num b) => a -> b
+fi = fromIntegral
+{-# INLINE fi #-}
+
+word32be :: BS.ByteString -> Word32
+word32be s =
+ (fi (s `BU.unsafeIndex` 0) `B.shiftL` 24) .|.
+ (fi (s `BU.unsafeIndex` 1) `B.shiftL` 16) .|.
+ (fi (s `BU.unsafeIndex` 2) `B.shiftL` 8) .|.
+ (fi (s `BU.unsafeIndex` 3))
+{-# INLINE word32be #-}
+
+-- realization for small builders
+toStrict :: BSB.Builder -> BS.ByteString
+toStrict = BS.toStrict
+ . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
+
+bech32_charset :: BS.ByteString
+bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"
+
+-- adapted from emilypi's 'base32' library
+arrange :: Word32 -> Word32 -> BSB.Builder
+arrange w32 w8 =
+ let mask = 0b00011111
+ bech32_char = fi . BS.index bech32_charset . fi
+
+ w8_0 = bech32_char (mask .&. (w32 `B.shiftR` 27))
+ w8_1 = bech32_char (mask .&. (w32 `B.shiftR` 22))
+ w8_2 = bech32_char (mask .&. (w32 `B.shiftR` 17))
+ w8_3 = bech32_char (mask .&. (w32 `B.shiftR` 12))
+ w8_4 = bech32_char (mask .&. (w32 `B.shiftR` 07))
+ w8_5 = bech32_char (mask .&. (w32 `B.shiftR` 02))
+ w8_6 = bech32_char (mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05))
+ w8_7 = bech32_char (mask .&. w8)
+
+ w64 = w8_0
+ .|. w8_1 `B.shiftL` 8
+ .|. w8_2 `B.shiftL` 16
+ .|. w8_3 `B.shiftL` 24
+ .|. w8_4 `B.shiftL` 32
+ .|. w8_5 `B.shiftL` 40
+ .|. w8_6 `B.shiftL` 48
+ .|. w8_7 `B.shiftL` 56
+
+ in BSB.word64LE w64
+
+-- adapted from emilypi's 'base32' library
+encode :: BS.ByteString -> BS.ByteString
+encode dat = toStrict (go dat) where
+ bech32_char = fi . BS.index bech32_charset . fi
+ go bs = case BS.splitAt 5 bs of
+ (chunk, etc) -> case BS.length etc of
+ 0 | BS.length chunk == 5 -> case BS.unsnoc chunk of
+ Nothing -> error "impossible, chunk length is 5"
+ Just (word32be -> w32, fi -> w8) -> arrange w32 w8
+
+ | BS.length chunk == 1 ->
+ let a = BU.unsafeIndex chunk 0
+ t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
+ u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2)
+ in BSB.word8 t <> BSB.word8 u
+
+ | BS.length chunk == 2 ->
+ let a = BU.unsafeIndex chunk 0
+ b = BU.unsafeIndex chunk 1
+ t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
+ u = bech32_char $
+ ((a .&. 0b00000111) `B.shiftL` 2)
+ .|. ((b .&. 0b11000000) `B.shiftR` 6)
+ v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
+ w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4)
+ in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w
+
+ | BS.length chunk == 3 ->
+ let a = BU.unsafeIndex chunk 0
+ b = BU.unsafeIndex chunk 1
+ c = BU.unsafeIndex chunk 2
+ t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
+ u = bech32_char $
+ ((a .&. 0b00000111) `B.shiftL` 2)
+ .|. ((b .&. 0b11000000) `B.shiftR` 6)
+ v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
+ w = bech32_char $
+ ((b .&. 0b00000001) `B.shiftL` 4)
+ .|. ((c .&. 0b11110000) `B.shiftR` 4)
+ x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1)
+ in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w
+ <> BSB.word8 x
+
+ | BS.length chunk == 4 ->
+ let a = BU.unsafeIndex chunk 0
+ b = BU.unsafeIndex chunk 1
+ c = BU.unsafeIndex chunk 2
+ d = BU.unsafeIndex chunk 3
+ t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
+ u = bech32_char $
+ ((a .&. 0b00000111) `B.shiftL` 2)
+ .|. ((b .&. 0b11000000) `B.shiftR` 6)
+ v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
+ w = bech32_char $
+ ((b .&. 0b00000001) `B.shiftL` 4)
+ .|. ((c .&. 0b11110000) `B.shiftR` 4)
+ x = bech32_char $
+ ((c .&. 0b00001111) `B.shiftL` 1)
+ .|. ((d .&. 0b10000000) `B.shiftR` 7)
+ y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2)
+ z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3)
+ in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w
+ <> BSB.word8 x <> BSB.word8 y <> BSB.word8 z
+
+ | otherwise -> mempty
+
+ _ -> case BS.unsnoc chunk of
+ Nothing -> error "impossible, chunk length is 5"
+ Just (word32be -> w32, fi -> w8) -> arrange w32 w8 <> go etc
+
+-- naive base32 -> word5
+as_word5 :: BS.ByteString -> BS.ByteString
+as_word5 = BS.map f where
+ f b = case BS.elemIndex (fi b) bech32_charset of
+ Nothing -> error "ppad-bech32 (as_word5): input not bech32-encoded"
+ Just w -> fi w
+
+-- naive word5 -> base32
+as_bech32 :: BS.ByteString -> BS.ByteString
+as_bech32 = BS.map (BS.index bech32_charset . fi)
+
+polymod :: BS.ByteString -> Word32
+polymod = BS.foldl' alg 1 where
+ generator = PA.primArrayFromListN 5
+ [0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3]
+
+ alg !chk v =
+ let !b = chk `B.shiftR` 25
+ c = (chk .&. 0x1ffffff) `B.shiftL` 5 `B.xor` fi v
+ in loop_gen 0 b c
+
+ loop_gen i b !chk
+ | i > 4 = chk
+ | otherwise =
+ let sor | B.testBit (b `B.shiftR` i) 0 =
+ PA.indexPrimArray generator i
+ | otherwise = 0
+ in loop_gen (succ i) b (chk `B.xor` sor)
+
+valid_hrp :: BS.ByteString -> Bool
+valid_hrp hrp
+ | l == 0 || l > 83 = False
+ | otherwise = BS.all (\b -> (b > 32) && (b < 127)) hrp
+ where
+ l = BS.length hrp
+
+hrp_expand :: BS.ByteString -> BS.ByteString
+hrp_expand bs = toStrict
+ $ BSB.byteString (BS.map (`B.shiftR` 5) bs)
+ <> BSB.word8 0
+ <> BSB.byteString (BS.map (.&. 0b11111) bs)
+
+data Encoding =
+ Bech32
+ | Bech32m
+
+create_checksum :: Encoding -> BS.ByteString -> BS.ByteString -> BS.ByteString
+create_checksum enc hrp dat =
+ let pre = hrp_expand hrp <> dat
+ pay = toStrict $
+ BSB.byteString pre
+ <> BSB.byteString "\NUL\NUL\NUL\NUL\NUL\NUL"
+ pm = polymod pay `B.xor` case enc of
+ Bech32 -> 1
+ Bech32m -> _BECH32M_CONST
+
+ code i = (fi (pm `B.shiftR` fi i) .&. 0b11111)
+
+ in BS.map code "\EM\DC4\SI\n\ENQ\NUL" -- BS.pack [25, 20, 15, 10, 5, 0]
+
+verify_checksum :: Encoding -> BS.ByteString -> BS.ByteString -> Bool
+verify_checksum enc hrp dat =
+ let bs = hrp_expand hrp <> dat
+ in polymod bs == case enc of
+ Bech32 -> 1
+ Bech32m -> _BECH32M_CONST
+
diff --git a/lib/Data/ByteString/Bech32.hs b/lib/Data/ByteString/Bech32.hs
@@ -1,208 +1,38 @@
-{-# LANGUAGE BangPatterns #-}
-{-# LANGUAGE BinaryLiterals #-}
-{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
-module Data.ByteString.Bech32 where
+module Data.ByteString.Bech32 (
+ encode
+ , verify_checksum
+ ) where
import Control.Monad (guard)
-import Data.Bits ((.|.), (.&.))
-import qualified Data.Bits as B
import qualified Data.ByteString as BS
+import qualified Data.ByteString.Base32 as B32
+import Data.ByteString.Base32 (Encoding(..))
import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Builder.Extra as BE
-import qualified Data.Primitive.PrimArray as PA
-import Data.Word (Word8, Word32, Word64)
-
-fi :: (Integral a, Num b) => a -> b
-fi = fromIntegral
-{-# INLINE fi #-}
-
-word32be :: BS.ByteString -> Word32
-word32be s =
- (fi (s `BS.index` 0) `B.shiftL` 24) .|.
- (fi (s `BS.index` 1) `B.shiftL` 16) .|.
- (fi (s `BS.index` 2) `B.shiftL` 8) .|.
- (fi (s `BS.index` 3))
-{-# INLINE word32be #-}
-- realization for small builders
toStrict :: BSB.Builder -> BS.ByteString
toStrict = BS.toStrict
. BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
-bech32_charset :: BS.ByteString
-bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"
-
-bech32_char_w64 :: Word32 -> Word64
-bech32_char_w64 = fi . BS.index bech32_charset . fi
-
-bech32_char :: Word8 -> Word8
-bech32_char = fi . BS.index bech32_charset . fi
-
--- adapted from emilypi's 'base32' library
-w40_to_w64 :: Word32 -> Word32 -> BSB.Builder
-w40_to_w64 w32 w8 =
- let mask = 0b00011111
-
- w8_0 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 27))
- w8_1 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 22))
- w8_2 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 17))
- w8_3 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 12))
- w8_4 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 07))
- w8_5 = bech32_char_w64 (mask .&. (w32 `B.shiftR` 02))
- w8_6 = bech32_char_w64 (mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05))
- w8_7 = bech32_char_w64 (mask .&. w8)
-
- w64 = w8_0
- .|. w8_1 `B.shiftL` 8
- .|. w8_2 `B.shiftL` 16
- .|. w8_3 `B.shiftL` 24
- .|. w8_4 `B.shiftL` 32
- .|. w8_5 `B.shiftL` 40
- .|. w8_6 `B.shiftL` 48
- .|. w8_7 `B.shiftL` 56
-
- in BSB.word64LE w64
-
--- adapted from emilypi's 'base32' library
-base32 :: BS.ByteString -> BS.ByteString
-base32 dat = toStrict (go dat) where
- go bs = case BS.splitAt 5 bs of
- (chunk, etc) -> case BS.length etc of
- 0 | BS.length chunk == 5 -> case BS.unsnoc chunk of
- Nothing -> error "impossible, chunk length is 5"
- Just (word32be -> w32, fi -> w8) -> w40_to_w64 w32 w8
-
- | BS.length chunk == 1 ->
- let a = BS.index chunk 0
- t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
- u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2)
- in BSB.word8 t <> BSB.word8 u
-
- | BS.length chunk == 2 ->
- let a = BS.index chunk 0
- b = BS.index chunk 1
- t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
- u = bech32_char $
- ((a .&. 0b00000111) `B.shiftL` 2)
- .|. ((b .&. 0b11000000) `B.shiftR` 6)
- v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
- w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4)
- in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w
-
- | BS.length chunk == 3 ->
- let a = BS.index chunk 0
- b = BS.index chunk 1
- c = BS.index chunk 2
- t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
- u = bech32_char $
- ((a .&. 0b00000111) `B.shiftL` 2)
- .|. ((b .&. 0b11000000) `B.shiftR` 6)
- v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
- w = bech32_char $
- ((b .&. 0b00000001) `B.shiftL` 4)
- .|. ((c .&. 0b11110000) `B.shiftR` 4)
- x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1)
- in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w
- <> BSB.word8 x
-
- | BS.length chunk == 4 ->
- let a = BS.index chunk 0
- b = BS.index chunk 1
- c = BS.index chunk 2
- d = BS.index chunk 3
- t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
- u = bech32_char $
- ((a .&. 0b00000111) `B.shiftL` 2)
- .|. ((b .&. 0b11000000) `B.shiftR` 6)
- v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
- w = bech32_char $
- ((b .&. 0b00000001) `B.shiftL` 4)
- .|. ((c .&. 0b11110000) `B.shiftR` 4)
- x = bech32_char $
- ((c .&. 0b00001111) `B.shiftL` 1)
- .|. ((d .&. 0b10000000) `B.shiftR` 7)
- y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2)
- z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3)
- in BSB.word8 t <> BSB.word8 u <> BSB.word8 v <> BSB.word8 w
- <> BSB.word8 x <> BSB.word8 y <> BSB.word8 z
-
- | otherwise -> mempty
-
- _ -> case BS.unsnoc chunk of
- Nothing -> error "impossible, chunk length is 5"
- Just (word32be -> w32, fi -> w8) -> w40_to_w64 w32 w8 <> go etc
-
--- naive base32 -> word5
-as_w5s :: BS.ByteString -> BS.ByteString
-as_w5s bs = BS.map f bs where
- f b = case BS.elemIndex (fi b) bech32_charset of
- Nothing -> error "ppad-bech32 (as_w5s): input not bech32-encoded"
- Just w -> fi w
-
--- naive word5 -> bech32
-as_bech32 :: BS.ByteString -> BS.ByteString
-as_bech32 bs = BS.map f bs where
- f b = BS.index bech32_charset (fi b)
-
-valid_hrp :: BS.ByteString -> Bool
-valid_hrp hrp
- | l == 0 || l > 83 = False
- | otherwise = BS.all (\b -> (b > 32) && (b < 127)) hrp
- where
- l = BS.length hrp
-
-bech32_polymod :: BS.ByteString -> Word32
-bech32_polymod = BS.foldl' alg 1 where
- generator = PA.primArrayFromListN 5
- [0x3b6a57b2, 0x26508e6d, 0x1ea119fa, 0x3d4233dd, 0x2a1462b3]
-
- alg !chk v =
- let !b = chk `B.shiftR` 25
- c = (chk .&. 0x1ffffff) `B.shiftL` 5 `B.xor` fi v
- in loop_gen 0 b c
-
- loop_gen i b !chk
- | i > 4 = chk
- | otherwise =
- let sor | B.testBit (b `B.shiftR` i) 0 = PA.indexPrimArray generator i
- | otherwise = 0
- in loop_gen (succ i) b (chk `B.xor` sor)
-
-bech32_hrp_expand :: BS.ByteString -> BS.ByteString
-bech32_hrp_expand bs = toStrict
- $ BSB.byteString (BS.map (`B.shiftR` 5) bs)
- <> BSB.word8 0
- <> BSB.byteString (BS.map (.&. 0b11111) bs)
-
-bech32_verify_checksum :: BS.ByteString -> BS.ByteString -> Bool
-bech32_verify_checksum hrp dat =
- let bs = bech32_hrp_expand hrp <> dat
- in bech32_polymod bs == 1
-
-bech32_create_checksum :: BS.ByteString -> BS.ByteString -> BS.ByteString
-bech32_create_checksum hrp dat =
- let pre = bech32_hrp_expand hrp <> dat
- pay = toStrict $
- BSB.byteString pre
- <> BSB.byteString "\NUL\NUL\NUL\NUL\NUL\NUL"
- pm = bech32_polymod pay `B.xor` 1
-
- code i = (fi (pm `B.shiftR` fi i) .&. 0b11111)
+verify_checksum :: BS.ByteString -> BS.ByteString -> Bool
+verify_checksum = B32.verify_checksum Bech32
- in BS.map code "\EM\DC4\SI\n\ENQ\NUL" -- BS.pack [25, 20, 15, 10, 5, 0]
+create_checksum :: BS.ByteString -> BS.ByteString -> BS.ByteString
+create_checksum = B32.create_checksum Bech32
--- base255 -> bech32
+-- base255 -> bech32m
encode :: BS.ByteString -> BS.ByteString -> Maybe BS.ByteString
-encode hrp (base32 -> dat) = do
- guard (valid_hrp hrp)
- let check = bech32_create_checksum hrp (as_w5s dat)
+encode hrp (B32.encode -> dat) = do
+ guard (B32.valid_hrp hrp)
+ let check = create_checksum hrp (B32.as_word5 dat)
res = toStrict $
BSB.byteString hrp
<> BSB.word8 49 -- 1
<> BSB.byteString dat
- <> BSB.byteString (as_bech32 check)
+ <> BSB.byteString (B32.as_bech32 check)
guard (BS.length res < 91)
pure res
diff --git a/lib/Data/ByteString/Bech32m.hs b/lib/Data/ByteString/Bech32m.hs
@@ -0,0 +1,39 @@
+{-# LANGUAGE ViewPatterns #-}
+
+module Data.ByteString.Bech32m (
+ encode
+ , verify_checksum
+ ) where
+
+import Control.Monad (guard)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Base32 as B32
+import Data.ByteString.Base32 (Encoding(..))
+import qualified Data.ByteString.Builder as BSB
+import qualified Data.ByteString.Builder.Extra as BE
+
+-- realization for small builders
+toStrict :: BSB.Builder -> BS.ByteString
+toStrict = BS.toStrict
+ . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
+
+verify_checksum :: BS.ByteString -> BS.ByteString -> Bool
+verify_checksum = B32.verify_checksum Bech32m
+
+-- XX no need for this to be here
+create_checksum :: BS.ByteString -> BS.ByteString -> BS.ByteString
+create_checksum = B32.create_checksum Bech32m
+
+-- base255 -> bech32m
+encode :: BS.ByteString -> BS.ByteString -> Maybe BS.ByteString
+encode hrp (B32.encode -> dat) = do
+ guard (B32.valid_hrp hrp)
+ let check = create_checksum hrp (B32.as_word5 dat)
+ res = toStrict $
+ BSB.byteString hrp
+ <> BSB.word8 49 -- 1
+ <> BSB.byteString dat
+ <> BSB.byteString (B32.as_bech32 check)
+ guard (BS.length res < 91)
+ pure res
+
diff --git a/ppad-bech32.cabal b/ppad-bech32.cabal
@@ -24,7 +24,9 @@ library
ghc-options:
-Wall
exposed-modules:
- Data.ByteString.Bech32
+ Data.ByteString.Base32
+ , Data.ByteString.Bech32
+ , Data.ByteString.Bech32m
build-depends:
base >= 4.9 && < 5
, bytestring >= 0.9 && < 0.13