bech32

Pure Haskell bech32, bech32m encoding/decoding (docs.ppad.tech/bech32).
git clone git://git.ppad.tech/bech32.git
Log | Files | Refs | README | LICENSE

commit 06d049b967a75f1d0dc8d96af22cbdbb2325c617
parent 8d7b99217b929ed6c379dc54f94ec59b7fe9b6cf
Author: Jared Tobin <jared@jtobin.io>
Date:   Sat, 16 May 2026 19:33:24 -0230

lib: refactor bech32 internal helpers

Rewrites 'as_word5', 'as_base32', 'hrp_expand', and 'create_checksum'
in 'Data.ByteString.Bech32.Internal' to use direct buffer allocation
via 'BI.unsafeCreate' / 'BI.mallocByteString'.  Drops the
bytestring 'Builder' + 'toStrict' pattern from each.

'as_word5' and 'as_base32' now share the rodata tables exported by
'Data.ByteString.Base32.Internal', replacing the duplicated
'bech32_charset' literal and the 32-arm 'word5' case-lambda.  The
table-based 'as_word5' uses the same OR-folded validity check as
'Base32.decode'.

'create_checksum' folds the previous '"\NUL\NUL\NUL\NUL\NUL\NUL"'
padding (which had embedded NULs and so couldn't reach static rodata)
into a single shared 'zero6' constant, and writes the 6-byte
checksum directly into a 'BI.unsafeCreate' buffer rather than
'BS.map' over a six-shift-amount literal.

'polymod', 'valid_hrp', 'verify', and 'Encoding' are unchanged.

Diffstat:
Mlib/Data/ByteString/Bech32/Internal.hs | 156+++++++++++++++++++++++++++++++++++++++++---------------------------------------
1 file changed, 80 insertions(+), 76 deletions(-)

diff --git a/lib/Data/ByteString/Bech32/Internal.hs b/lib/Data/ByteString/Bech32/Internal.hs @@ -1,8 +1,6 @@ {-# OPTIONS_HADDOCK hide, prune #-} {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE LambdaCase #-} -{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} module Data.ByteString.Bech32.Internal ( @@ -14,80 +12,65 @@ module Data.ByteString.Bech32.Internal ( , valid_hrp ) where -import Data.Bits ((.&.)) +import Data.Bits ((.&.), (.|.)) import qualified Data.Bits as B import qualified Data.ByteString as BS -import qualified Data.ByteString.Builder as BSB -import qualified Data.ByteString.Builder.Extra as BE +import Data.ByteString.Base32.Internal (enc_tab, dec_tab) import qualified Data.ByteString.Internal as BI import qualified Data.ByteString.Unsafe as BU import Data.Word (Word8, Word32) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (Ptr, plusPtr) +import Foreign.Storable (peekElemOff, pokeElemOff) +import System.IO.Unsafe (unsafeDupablePerformIO) fi :: (Integral a, Num b) => a -> b fi = fromIntegral {-# INLINE fi #-} --- realization for small builders -toStrict :: BSB.Builder -> BS.ByteString -toStrict = BS.toStrict - . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty -{-# INLINE toStrict #-} - _BECH32M_CONST :: Word32 _BECH32M_CONST = 0x2bc830a3 -bech32_charset :: BS.ByteString -bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l" - -word5 :: Word8 -> Maybe Word8 -word5 = \case - 113 -> pure $! 00 -- 'q' - 112 -> pure $! 01 -- 'p' - 122 -> pure $! 02 -- 'z' - 114 -> pure $! 03 -- 'r' - 121 -> pure $! 04 -- 'y' - 57 -> pure $! 05 -- '9' - 120 -> pure $! 06 -- 'x' - 56 -> pure $! 07 -- '8' - 103 -> pure $! 08 -- 'g' - 102 -> pure $! 09 -- 'f' - 50 -> pure $! 10 -- '2' - 116 -> pure $! 11 -- 't' - 118 -> pure $! 12 -- 'v' - 100 -> pure $! 13 -- 'd' - 119 -> pure $! 14 -- 'w' - 48 -> pure $! 15 -- '0' - 115 -> pure $! 16 -- 's' - 51 -> pure $! 17 -- '3' - 106 -> pure $! 18 -- 'j' - 110 -> pure $! 19 -- 'n' - 53 -> pure $! 20 -- '5' - 52 -> pure $! 21 -- '4' - 107 -> pure $! 22 -- 'k' - 104 -> pure $! 23 -- 'h' - 99 -> pure $! 24 -- 'c' - 101 -> pure $! 25 -- 'e' - 54 -> pure $! 26 -- '6' - 109 -> pure $! 27 -- 'm' - 117 -> pure $! 28 -- 'u' - 97 -> pure $! 29 -- 'a' - 55 -> pure $! 30 -- '7' - 108 -> pure $! 31 -- 'l' - _ -> Nothing -{-# INLINE word5 #-} - --- base32 -> word5 +-- | Translate base32 bytestring to its 5-bit-value bytestring. Each +-- input byte is looked up in 'dec_tab'; if any byte is not a valid +-- bech32 char, returns 'Nothing'. as_word5 :: BS.ByteString -> Maybe BS.ByteString -as_word5 = go mempty where - go acc bs = case BS.uncons bs of - Nothing -> pure (toStrict acc) - Just (h, t) -> do - w5 <- word5 (fi h) - go (acc <> BSB.word8 w5) t - --- word5 -> base32 +as_word5 (BI.PS sfp soff l) = case dec_tab of + BI.PS tfp toff _ -> unsafeDupablePerformIO $ do + fp <- BI.mallocByteString l + ok <- withForeignPtr fp $ \dst -> + withForeignPtr sfp $ \sp0 -> + withForeignPtr tfp $ \tp0 -> do + let !sp = sp0 `plusPtr` soff :: Ptr Word8 + !tp = tp0 `plusPtr` toff :: Ptr Word8 + loop !i !acc + | i == l = pure $! acc .&. 0x40 == 0 + | otherwise = do + c <- peekElemOff sp i + n <- peekElemOff tp (fi c) + pokeElemOff dst i (n .&. 0x1f) + loop (i + 1) (acc .|. n) + loop 0 0 + pure $! if ok then Just (BI.PS fp 0 l) else Nothing + +-- | Translate a 5-bit-value bytestring to its bech32 base32 +-- bytestring. as_base32 :: BS.ByteString -> BS.ByteString -as_base32 = BS.map (BU.unsafeIndex bech32_charset . fi) +as_base32 (BI.PS sfp soff l) = case enc_tab of + BI.PS tfp toff _ -> + BI.unsafeCreate l $ \dst -> + withForeignPtr sfp $ \sp0 -> + withForeignPtr tfp $ \tp0 -> do + let !sp = sp0 `plusPtr` soff :: Ptr Word8 + !tp = tp0 `plusPtr` toff :: Ptr Word8 + loop !i + | i == l = pure () + | otherwise = do + v <- peekElemOff sp i + c <- peekElemOff tp (fi v) + pokeElemOff dst i c + loop (i + 1) + loop 0 polymod :: BS.ByteString -> Word32 polymod = BS.foldl' alg 1 where @@ -117,29 +100,51 @@ valid_hrp hrp@(BI.PS _ _ l) | l == 0 || l > 83 = False | otherwise = BS.all (\b -> (b > 32) && (b < 127)) hrp +-- | Build the bech32 HRP expansion: high-5-bits of each HRP byte, +-- then a single 0, then low-5-bits of each HRP byte. hrp_expand :: BS.ByteString -> BS.ByteString -hrp_expand bs = toStrict - $ BSB.byteString (BS.map (`B.shiftR` 5) bs) - <> BSB.word8 0 - <> BSB.byteString (BS.map (.&. 0b11111) bs) +hrp_expand (BI.PS sfp soff l) = + BI.unsafeCreate (2 * l + 1) $ \dst -> + withForeignPtr sfp $ \sp0 -> do + let !sp = sp0 `plusPtr` soff :: Ptr Word8 + loop_hi !i + | i == l = pure () + | otherwise = do + c <- peekElemOff sp i + pokeElemOff dst i (c `B.shiftR` 5) + loop_hi (i + 1) + loop_lo !i + | i == l = pure () + | otherwise = do + c <- peekElemOff sp i + pokeElemOff dst (l + 1 + i) (c .&. 0x1f) + loop_lo (i + 1) + loop_hi 0 + pokeElemOff dst l (0 :: Word8) + loop_lo 0 data Encoding = Bech32 | Bech32m -create_checksum :: Encoding -> BS.ByteString -> BS.ByteString -> BS.ByteString +zero6 :: BS.ByteString +zero6 = BS.replicate 6 0 +{-# NOINLINE zero6 #-} + +create_checksum + :: Encoding -> BS.ByteString -> BS.ByteString -> BS.ByteString create_checksum enc hrp dat = - let pre = hrp_expand hrp <> dat - pay = toStrict $ - BSB.byteString pre - <> BSB.byteString "\NUL\NUL\NUL\NUL\NUL\NUL" - pm = polymod pay `B.xor` case enc of + let !pay = BS.concat [hrp_expand hrp, dat, zero6] + !pm = polymod pay `B.xor` case enc of Bech32 -> 1 Bech32m -> _BECH32M_CONST - - code i = (fi (pm `B.shiftR` fi i) .&. 0b11111) - - in BS.map code "\EM\DC4\SI\n\ENQ\NUL" -- BS.pack [25, 20, 15, 10, 5, 0] + in BI.unsafeCreate 6 $ \dst -> do + pokeElemOff dst 0 (fi (pm `B.shiftR` 25) .&. 0x1f :: Word8) + pokeElemOff dst 1 (fi (pm `B.shiftR` 20) .&. 0x1f :: Word8) + pokeElemOff dst 2 (fi (pm `B.shiftR` 15) .&. 0x1f :: Word8) + pokeElemOff dst 3 (fi (pm `B.shiftR` 10) .&. 0x1f :: Word8) + pokeElemOff dst 4 (fi (pm `B.shiftR` 5) .&. 0x1f :: Word8) + pokeElemOff dst 5 (fi pm .&. 0x1f :: Word8) verify :: Encoding -> BS.ByteString -> Bool verify enc b32 = case BS.elemIndexEnd 0x31 b32 of @@ -154,4 +159,3 @@ verify enc b32 = case BS.elemIndexEnd 0x31 b32 of in polymod bs == case enc of Bech32 -> 1 Bech32m -> _BECH32M_CONST -