bech32

Pure Haskell bech32, bech32m encodings (docs.ppad.tech/bech32).
git clone git://git.ppad.tech/bech32.git
Log | Files | Refs | README | LICENSE

commit c8f5bf01e0c8dc960daec9954314031a2591581e
parent 0a9afb79b09179cbf0ad3f7929ba82a299c56be0
Author: Jared Tobin <jared@jtobin.io>
Date:   Fri,  3 Jan 2025 11:45:39 -0330

lib: base32 decoding

Diffstat:
Mlib/Data/ByteString/Base32.hs | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 155 insertions(+), 26 deletions(-)

diff --git a/lib/Data/ByteString/Base32.hs b/lib/Data/ByteString/Base32.hs @@ -1,14 +1,18 @@ {-# OPTIONS_HADDOCK hide, prune #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE BinaryLiterals #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ViewPatterns #-} module Data.ByteString.Base32 ( encode + , decode , as_word5 , as_base32 + -- XX put this in another module -- not actually base32-related, but convenient to put here , Encoding(..) , create_checksum @@ -16,6 +20,7 @@ module Data.ByteString.Base32 ( , valid_hrp ) where +import Control.Monad (guard) import Data.Bits ((.|.), (.&.)) import qualified Data.Bits as B import qualified Data.ByteString as BS @@ -23,8 +28,9 @@ import qualified Data.ByteString.Builder as BSB import qualified Data.ByteString.Builder.Extra as BE import qualified Data.ByteString.Unsafe as BU import qualified Data.Primitive.PrimArray as PA -import Data.Word (Word32) +import Data.Word (Word8, Word32, Word64) +-- XX move to another module _BECH32M_CONST :: Word32 _BECH32M_CONST = 0x2bc830a3 @@ -50,6 +56,37 @@ bech32_charset :: BS.ByteString bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l" -- adapted from emilypi's 'base32' library +arrange :: Word32 -> Word32 -> BSB.Builder +arrange w32 w8 = + let mask = 0b00011111 -- low 5-bit mask + bech32_char = fi . BS.index bech32_charset . fi -- word5 -> bech32 + + -- split 40 bits into 8 w5's + w5_0 = mask .&. (w32 `B.shiftR` 27) -- highest 5 bits + w5_1 = mask .&. (w32 `B.shiftR` 22) + w5_2 = mask .&. (w32 `B.shiftR` 17) + w5_3 = mask .&. (w32 `B.shiftR` 12) + w5_4 = mask .&. (w32 `B.shiftR` 07) + w5_5 = mask .&. (w32 `B.shiftR` 02) + -- combine lowest 2 bits of w32 with highest 3 bits of w8 + w5_6 = mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05) + -- lowest 5 bits of w8 + w5_7 = mask .&. w8 + + -- get (w8) bech32 char for each w5, pack all into little-endian w64 + !w64 = bech32_char w5_0 + .|. bech32_char w5_1 `B.shiftL` 8 + .|. bech32_char w5_2 `B.shiftL` 16 + .|. bech32_char w5_3 `B.shiftL` 24 + .|. bech32_char w5_4 `B.shiftL` 32 + .|. bech32_char w5_5 `B.shiftL` 40 + .|. bech32_char w5_6 `B.shiftL` 48 + .|. bech32_char w5_7 `B.shiftL` 56 + + in BSB.word64LE w64 +{-# INLINE arrange #-} + +-- adapted from emilypi's 'base32' library encode :: BS.ByteString -> BS.ByteString encode dat = toStrict (go dat) where bech32_char = fi . BS.index bech32_charset . fi @@ -144,32 +181,124 @@ encode dat = toStrict (go dat) where Nothing -> error "impossible, chunk length is 5" Just (word32be -> w32, fi -> w8) -> arrange w32 w8 <> go etc --- adapted from emilypi's 'base32' library -arrange :: Word32 -> Word32 -> BSB.Builder -arrange w32 w8 = - let mask = 0b00011111 - bech32_char = fi . BS.index bech32_charset . fi - - w8_0 = bech32_char (mask .&. (w32 `B.shiftR` 27)) - w8_1 = bech32_char (mask .&. (w32 `B.shiftR` 22)) - w8_2 = bech32_char (mask .&. (w32 `B.shiftR` 17)) - w8_3 = bech32_char (mask .&. (w32 `B.shiftR` 12)) - w8_4 = bech32_char (mask .&. (w32 `B.shiftR` 07)) - w8_5 = bech32_char (mask .&. (w32 `B.shiftR` 02)) - w8_6 = bech32_char (mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05)) - w8_7 = bech32_char (mask .&. w8) - - !w64 = w8_0 - .|. w8_1 `B.shiftL` 8 - .|. w8_2 `B.shiftL` 16 - .|. w8_3 `B.shiftL` 24 - .|. w8_4 `B.shiftL` 32 - .|. w8_5 `B.shiftL` 40 - .|. w8_6 `B.shiftL` 48 - .|. w8_7 `B.shiftL` 56 +decode + :: BS.ByteString -- ^ base32-encoded bytestring + -> Maybe BS.ByteString -- ^ base256-encoded bytestring +decode = handle . go mempty where + handle = \case + Nothing -> Nothing + Just s -> Just (toStrict s) + + go acc bs + | BS.length bs < 8 = do + fin <- finalize bs + pure (acc <> fin) + | otherwise = case BS.splitAt 8 bs of + (chunk, etc) -> do + res <- decode_chunk chunk + go (acc <> res) etc + +finalize :: BS.ByteString -> Maybe BSB.Builder +finalize bs + | l == 0 = Just mempty + | otherwise = do + guard (l >= 2) + w5_0 <- word5 0 + w5_1 <- word5 1 + let w8_0 = fi w5_0 `B.shiftL` 3 + .|. fi w5_1 `B.shiftR` 2 + + -- https://datatracker.ietf.org/doc/html/rfc4648#section-6 + if | l == 2 -> do -- 2 w5's, need 1 w8 + pure (BSB.word8 w8_0) + + | l == 4 -> do -- 4 w5's, need 2 w8's + w5_2 <- word5 2 + w5_3 <- word5 3 + let w8_1 :: Word8 + w8_1 = fi w5_1 `B.shiftL` 6 + .|. fi w5_2 `B.shiftL` 1 + .|. fi w5_3 `B.shiftR` 4 + + w16 = fi w8_1 + .|. fi w8_0 `B.shiftL` 8 + + pure (BSB.word16LE w16) + + | l == 5 -> do -- 5 w5's, need 3 w8's + w5_2 <- word5 2 + w5_3 <- word5 3 + w5_4 <- word5 4 + let w8_1, w8_2 :: Word8 + w8_1 = fi w5_1 `B.shiftL` 6 + .|. fi w5_2 `B.shiftL` 1 + .|. fi w5_3 `B.shiftR` 4 + w8_2 = fi w5_3 `B.shiftL` 4 + .|. fi w5_4 `B.shiftR` 1 + + w16 = fi w8_1 + .|. fi w8_0 `B.shiftL` 8 + + pure (BSB.word16LE w16 <> BSB.word8 w8_2) + + | l == 7 -> do -- 7 w5's, need 4 w8's + w5_2 <- word5 2 + w5_3 <- word5 3 + w5_4 <- word5 4 + w5_5 <- word5 5 + w5_6 <- word5 6 + let w8_1, w8_2, w8_3 :: Word8 + w8_1 = fi w5_1 `B.shiftL` 6 + .|. fi w5_2 `B.shiftL` 1 + .|. fi w5_3 `B.shiftR` 4 + w8_2 = fi w5_3 `B.shiftL` 4 + .|. fi w5_4 `B.shiftR` 1 + w8_3 = fi w5_4 `B.shiftL` 7 + .|. fi w5_5 `B.shiftL` 2 + .|. fi w5_6 `B.shiftR` 3 + + w32 = fi w8_3 + .|. fi w8_2 `B.shiftL` 8 + .|. fi w8_2 `B.shiftL` 16 + .|. fi w8_1 `B.shiftL` 24 + + pure (BSB.word32LE w32) + + | otherwise -> Nothing - in BSB.word64LE w64 -{-# INLINE arrange #-} + where + l = BS.length bs + word5 i = BS.elemIndex (fi (BU.unsafeIndex bs i)) bech32_charset + +-- length 8 guaranteed +decode_chunk :: BS.ByteString -> Maybe BSB.Builder +decode_chunk bs = do + let word5 i = BS.elemIndex (fi (BU.unsafeIndex bs i)) bech32_charset + + w5_0 <- word5 0 + w5_1 <- word5 1 + w5_2 <- word5 2 + w5_3 <- word5 3 + w5_4 <- word5 4 + w5_5 <- word5 5 + w5_6 <- word5 6 + w5_7 <- word5 7 + + let w40 :: Word64 + w40 = fi w5_0 `B.shiftL` 35 + .|. fi w5_1 `B.shiftL` 30 + .|. fi w5_2 `B.shiftL` 25 + .|. fi w5_3 `B.shiftL` 20 + .|. fi w5_4 `B.shiftL` 15 + .|. fi w5_5 `B.shiftL` 10 + .|. fi w5_6 `B.shiftL` 05 + .|. fi w5_7 + w32 = fi (w40 `B.shiftR` 8) :: Word32 + w8 = fi (0b11111111 .&. w40) :: Word8 + + pure $ BSB.word32BE w32 <> BSB.word8 w8 + +-- XX move all of the below to another module -- naive base32 -> word5 as_word5 :: BS.ByteString -> BS.ByteString