commit c8f5bf01e0c8dc960daec9954314031a2591581e
parent 0a9afb79b09179cbf0ad3f7929ba82a299c56be0
Author: Jared Tobin <jared@jtobin.io>
Date: Fri, 3 Jan 2025 11:45:39 -0330
lib: base32 decoding
Diffstat:
1 file changed, 155 insertions(+), 26 deletions(-)
diff --git a/lib/Data/ByteString/Base32.hs b/lib/Data/ByteString/Base32.hs
@@ -1,14 +1,18 @@
{-# OPTIONS_HADDOCK hide, prune #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns #-}
module Data.ByteString.Base32 (
encode
+ , decode
, as_word5
, as_base32
+ -- XX put this in another module
-- not actually base32-related, but convenient to put here
, Encoding(..)
, create_checksum
@@ -16,6 +20,7 @@ module Data.ByteString.Base32 (
, valid_hrp
) where
+import Control.Monad (guard)
import Data.Bits ((.|.), (.&.))
import qualified Data.Bits as B
import qualified Data.ByteString as BS
@@ -23,8 +28,9 @@ import qualified Data.ByteString.Builder as BSB
import qualified Data.ByteString.Builder.Extra as BE
import qualified Data.ByteString.Unsafe as BU
import qualified Data.Primitive.PrimArray as PA
-import Data.Word (Word32)
+import Data.Word (Word8, Word32, Word64)
+-- XX move to another module
_BECH32M_CONST :: Word32
_BECH32M_CONST = 0x2bc830a3
@@ -50,6 +56,37 @@ bech32_charset :: BS.ByteString
bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"
-- adapted from emilypi's 'base32' library
+arrange :: Word32 -> Word32 -> BSB.Builder
+arrange w32 w8 =
+ let mask = 0b00011111 -- low 5-bit mask
+ bech32_char = fi . BS.index bech32_charset . fi -- word5 -> bech32
+
+ -- split 40 bits into 8 w5's
+ w5_0 = mask .&. (w32 `B.shiftR` 27) -- highest 5 bits
+ w5_1 = mask .&. (w32 `B.shiftR` 22)
+ w5_2 = mask .&. (w32 `B.shiftR` 17)
+ w5_3 = mask .&. (w32 `B.shiftR` 12)
+ w5_4 = mask .&. (w32 `B.shiftR` 07)
+ w5_5 = mask .&. (w32 `B.shiftR` 02)
+ -- combine lowest 2 bits of w32 with highest 3 bits of w8
+ w5_6 = mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05)
+ -- lowest 5 bits of w8
+ w5_7 = mask .&. w8
+
+ -- get (w8) bech32 char for each w5, pack all into little-endian w64
+ !w64 = bech32_char w5_0
+ .|. bech32_char w5_1 `B.shiftL` 8
+ .|. bech32_char w5_2 `B.shiftL` 16
+ .|. bech32_char w5_3 `B.shiftL` 24
+ .|. bech32_char w5_4 `B.shiftL` 32
+ .|. bech32_char w5_5 `B.shiftL` 40
+ .|. bech32_char w5_6 `B.shiftL` 48
+ .|. bech32_char w5_7 `B.shiftL` 56
+
+ in BSB.word64LE w64
+{-# INLINE arrange #-}
+
+-- adapted from emilypi's 'base32' library
encode :: BS.ByteString -> BS.ByteString
encode dat = toStrict (go dat) where
bech32_char = fi . BS.index bech32_charset . fi
@@ -144,32 +181,124 @@ encode dat = toStrict (go dat) where
Nothing -> error "impossible, chunk length is 5"
Just (word32be -> w32, fi -> w8) -> arrange w32 w8 <> go etc
--- adapted from emilypi's 'base32' library
-arrange :: Word32 -> Word32 -> BSB.Builder
-arrange w32 w8 =
- let mask = 0b00011111
- bech32_char = fi . BS.index bech32_charset . fi
-
- w8_0 = bech32_char (mask .&. (w32 `B.shiftR` 27))
- w8_1 = bech32_char (mask .&. (w32 `B.shiftR` 22))
- w8_2 = bech32_char (mask .&. (w32 `B.shiftR` 17))
- w8_3 = bech32_char (mask .&. (w32 `B.shiftR` 12))
- w8_4 = bech32_char (mask .&. (w32 `B.shiftR` 07))
- w8_5 = bech32_char (mask .&. (w32 `B.shiftR` 02))
- w8_6 = bech32_char (mask .&. (w32 `B.shiftL` 03 .|. w8 `B.shiftR` 05))
- w8_7 = bech32_char (mask .&. w8)
-
- !w64 = w8_0
- .|. w8_1 `B.shiftL` 8
- .|. w8_2 `B.shiftL` 16
- .|. w8_3 `B.shiftL` 24
- .|. w8_4 `B.shiftL` 32
- .|. w8_5 `B.shiftL` 40
- .|. w8_6 `B.shiftL` 48
- .|. w8_7 `B.shiftL` 56
+decode
+ :: BS.ByteString -- ^ base32-encoded bytestring
+ -> Maybe BS.ByteString -- ^ base256-encoded bytestring
+decode = handle . go mempty where
+ handle = \case
+ Nothing -> Nothing
+ Just s -> Just (toStrict s)
+
+ go acc bs
+ | BS.length bs < 8 = do
+ fin <- finalize bs
+ pure (acc <> fin)
+ | otherwise = case BS.splitAt 8 bs of
+ (chunk, etc) -> do
+ res <- decode_chunk chunk
+ go (acc <> res) etc
+
+finalize :: BS.ByteString -> Maybe BSB.Builder
+finalize bs
+ | l == 0 = Just mempty
+ | otherwise = do
+ guard (l >= 2)
+ w5_0 <- word5 0
+ w5_1 <- word5 1
+ let w8_0 = fi w5_0 `B.shiftL` 3
+ .|. fi w5_1 `B.shiftR` 2
+
+ -- https://datatracker.ietf.org/doc/html/rfc4648#section-6
+ if | l == 2 -> do -- 2 w5's, need 1 w8
+ pure (BSB.word8 w8_0)
+
+ | l == 4 -> do -- 4 w5's, need 2 w8's
+ w5_2 <- word5 2
+ w5_3 <- word5 3
+ let w8_1 :: Word8
+ w8_1 = fi w5_1 `B.shiftL` 6
+ .|. fi w5_2 `B.shiftL` 1
+ .|. fi w5_3 `B.shiftR` 4
+
+ w16 = fi w8_1
+ .|. fi w8_0 `B.shiftL` 8
+
+ pure (BSB.word16LE w16)
+
+ | l == 5 -> do -- 5 w5's, need 3 w8's
+ w5_2 <- word5 2
+ w5_3 <- word5 3
+ w5_4 <- word5 4
+ let w8_1, w8_2 :: Word8
+ w8_1 = fi w5_1 `B.shiftL` 6
+ .|. fi w5_2 `B.shiftL` 1
+ .|. fi w5_3 `B.shiftR` 4
+ w8_2 = fi w5_3 `B.shiftL` 4
+ .|. fi w5_4 `B.shiftR` 1
+
+ w16 = fi w8_1
+ .|. fi w8_0 `B.shiftL` 8
+
+ pure (BSB.word16LE w16 <> BSB.word8 w8_2)
+
+ | l == 7 -> do -- 7 w5's, need 4 w8's
+ w5_2 <- word5 2
+ w5_3 <- word5 3
+ w5_4 <- word5 4
+ w5_5 <- word5 5
+ w5_6 <- word5 6
+ let w8_1, w8_2, w8_3 :: Word8
+ w8_1 = fi w5_1 `B.shiftL` 6
+ .|. fi w5_2 `B.shiftL` 1
+ .|. fi w5_3 `B.shiftR` 4
+ w8_2 = fi w5_3 `B.shiftL` 4
+ .|. fi w5_4 `B.shiftR` 1
+ w8_3 = fi w5_4 `B.shiftL` 7
+ .|. fi w5_5 `B.shiftL` 2
+ .|. fi w5_6 `B.shiftR` 3
+
+ w32 = fi w8_3
+ .|. fi w8_2 `B.shiftL` 8
+ .|. fi w8_2 `B.shiftL` 16
+ .|. fi w8_1 `B.shiftL` 24
+
+ pure (BSB.word32LE w32)
+
+ | otherwise -> Nothing
- in BSB.word64LE w64
-{-# INLINE arrange #-}
+ where
+ l = BS.length bs
+ word5 i = BS.elemIndex (fi (BU.unsafeIndex bs i)) bech32_charset
+
+-- length 8 guaranteed
+decode_chunk :: BS.ByteString -> Maybe BSB.Builder
+decode_chunk bs = do
+ let word5 i = BS.elemIndex (fi (BU.unsafeIndex bs i)) bech32_charset
+
+ w5_0 <- word5 0
+ w5_1 <- word5 1
+ w5_2 <- word5 2
+ w5_3 <- word5 3
+ w5_4 <- word5 4
+ w5_5 <- word5 5
+ w5_6 <- word5 6
+ w5_7 <- word5 7
+
+ let w40 :: Word64
+ w40 = fi w5_0 `B.shiftL` 35
+ .|. fi w5_1 `B.shiftL` 30
+ .|. fi w5_2 `B.shiftL` 25
+ .|. fi w5_3 `B.shiftL` 20
+ .|. fi w5_4 `B.shiftL` 15
+ .|. fi w5_5 `B.shiftL` 10
+ .|. fi w5_6 `B.shiftL` 05
+ .|. fi w5_7
+ w32 = fi (w40 `B.shiftR` 8) :: Word32
+ w8 = fi (0b11111111 .&. w40) :: Word8
+
+ pure $ BSB.word32BE w32 <> BSB.word8 w8
+
+-- XX move all of the below to another module
-- naive base32 -> word5
as_word5 :: BS.ByteString -> BS.ByteString