bech32

Pure Haskell bech32, bech32m encoding/decoding (docs.ppad.tech/bech32).
git clone git://git.ppad.tech/bech32.git
Log | Files | Refs | README | LICENSE

Base32.hs (10196B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE BangPatterns #-}
      3 {-# LANGUAGE BinaryLiterals #-}
      4 {-# LANGUAGE LambdaCase #-}
      5 {-# LANGUAGE MultiWayIf #-}
      6 {-# LANGUAGE OverloadedStrings #-}
      7 {-# LANGUAGE ViewPatterns #-}
      8 
      9 -- |
     10 -- Module: Data.ByteString.Base32
     11 -- Copyright: (c) 2024 Jared Tobin
     12 -- License: MIT
     13 -- Maintainer: Jared Tobin <jared@ppad.tech>
     14 --
     15 -- Unpadded base32 encoding & decoding using the bech32 character set.
     16 
     17 -- this module is an adaptation of emilypi's 'base32' library
     18 
     19 module Data.ByteString.Base32 (
     20     -- * base32 encoding and decoding
     21     encode
     22   , decode
     23   ) where
     24 
     25 import Control.Monad (guard)
     26 import Data.Bits ((.|.), (.&.))
     27 import qualified Data.Bits as B
     28 import qualified Data.ByteString as BS
     29 import qualified Data.ByteString.Builder as BSB
     30 import qualified Data.ByteString.Builder.Extra as BE
     31 import qualified Data.ByteString.Internal as BI
     32 import qualified Data.ByteString.Unsafe as BU
     33 import Data.Word (Word8, Word32, Word64)
     34 
     35 fi :: (Integral a, Num b) => a -> b
     36 fi = fromIntegral
     37 {-# INLINE fi #-}
     38 
     39 word32be :: BS.ByteString -> Word32
     40 word32be s =
     41   (fi (s `BU.unsafeIndex` 0) `B.shiftL` 24) .|.
     42   (fi (s `BU.unsafeIndex` 1) `B.shiftL` 16) .|.
     43   (fi (s `BU.unsafeIndex` 2) `B.shiftL`  8) .|.
     44   (fi (s `BU.unsafeIndex` 3))
     45 {-# INLINE word32be #-}
     46 
     47 -- realization for small builders
     48 toStrict :: BSB.Builder -> BS.ByteString
     49 toStrict = BS.toStrict
     50   . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
     51 {-# INLINE toStrict #-}
     52 
     53 bech32_charset :: BS.ByteString
     54 bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"
     55 
     56 word5 :: Word8 -> Maybe Word8
     57 word5 w8 = fmap fi (BS.elemIndex w8 bech32_charset)
     58 
     59 arrange :: Word32 -> Word8 -> BSB.Builder
     60 arrange w32 w8 =
     61   let mask = 0b00011111                                 -- low 5-bit mask
     62       bech32_char = fi . BS.index bech32_charset . fi   -- word5 -> bech32
     63 
     64       -- split 40 bits into 8 w5's
     65       w5_0 = mask .&. (w32 `B.shiftR` 27) -- highest 5 bits
     66       w5_1 = mask .&. (w32 `B.shiftR` 22)
     67       w5_2 = mask .&. (w32 `B.shiftR` 17)
     68       w5_3 = mask .&. (w32 `B.shiftR` 12)
     69       w5_4 = mask .&. (w32 `B.shiftR` 07)
     70       w5_5 = mask .&. (w32 `B.shiftR` 02)
     71       -- combine lowest 2 bits of w32 with highest 3 bits of w8
     72       w5_6 = mask .&. (w32 `B.shiftL` 03 .|. fi w8 `B.shiftR` 05)
     73       -- lowest 5 bits of w8
     74       w5_7 = mask .&. fi w8
     75 
     76       -- get (w8) bech32 char for each w5, pack all into little-endian w64
     77       !w64 = bech32_char w5_0
     78          .|. bech32_char w5_1 `B.shiftL` 8
     79          .|. bech32_char w5_2 `B.shiftL` 16
     80          .|. bech32_char w5_3 `B.shiftL` 24
     81          .|. bech32_char w5_4 `B.shiftL` 32
     82          .|. bech32_char w5_5 `B.shiftL` 40
     83          .|. bech32_char w5_6 `B.shiftL` 48
     84          .|. bech32_char w5_7 `B.shiftL` 56
     85 
     86   in  BSB.word64LE w64
     87 {-# INLINE arrange #-}
     88 
     89 -- | Encode a base256-encoded 'ByteString' as a base32-encoded
     90 --   'ByteString', using the bech32 character set.
     91 --
     92 --   >>> encode "jtobin was here!"
     93 --   "df6x7cnfdcs8wctnyp5x2un9yy"
     94 encode
     95   :: BS.ByteString -- ^ base256-encoded bytestring
     96   -> BS.ByteString -- ^ base32-encoded bytestring
     97 encode dat = toStrict (go dat) where
     98   bech32_char = fi . BS.index bech32_charset . fi
     99 
    100   go bs@(BI.PS _ _ l)
    101     | l >= 5 = case BS.splitAt 5 bs of
    102         (chunk, etc) -> case BS.unsnoc chunk of
    103           Nothing -> error "impossible, chunk length is 5"
    104           Just (word32be -> w32, w8) -> arrange w32 w8 <> go etc
    105     | l == 0 = mempty
    106     | l == 1 =
    107         let a = BU.unsafeIndex bs 0
    108             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    109             u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2)
    110 
    111             !w16 = fi t
    112                .|. fi u `B.shiftL` 8
    113 
    114         in  BSB.word16LE w16
    115     | l == 2 =
    116         let a = BU.unsafeIndex bs 0
    117             b = BU.unsafeIndex bs 1
    118             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    119             u = bech32_char $
    120                       ((a .&. 0b00000111) `B.shiftL` 2)
    121                   .|. ((b .&. 0b11000000) `B.shiftR` 6)
    122             v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
    123             w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4)
    124 
    125             !w32 = fi t
    126                .|. fi u `B.shiftL` 8
    127                .|. fi v `B.shiftL` 16
    128                .|. fi w `B.shiftL` 24
    129 
    130         in  BSB.word32LE w32
    131     | l == 3 =
    132         let a = BU.unsafeIndex bs 0
    133             b = BU.unsafeIndex bs 1
    134             c = BU.unsafeIndex bs 2
    135             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    136             u = bech32_char $
    137                       ((a .&. 0b00000111) `B.shiftL` 2)
    138                   .|. ((b .&. 0b11000000) `B.shiftR` 6)
    139             v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
    140             w = bech32_char $
    141                       ((b .&. 0b00000001) `B.shiftL` 4)
    142                   .|. ((c .&. 0b11110000) `B.shiftR` 4)
    143             x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1)
    144 
    145             !w32 = fi t
    146                .|. fi u `B.shiftL` 8
    147                .|. fi v `B.shiftL` 16
    148                .|. fi w `B.shiftL` 24
    149 
    150         in  BSB.word32LE w32 <> BSB.word8 x
    151     | l == 4 =
    152         let a = BU.unsafeIndex bs 0
    153             b = BU.unsafeIndex bs 1
    154             c = BU.unsafeIndex bs 2
    155             d = BU.unsafeIndex bs 3
    156             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    157             u = bech32_char $
    158                       ((a .&. 0b00000111) `B.shiftL` 2)
    159                   .|. ((b .&. 0b11000000) `B.shiftR` 6)
    160             v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
    161             w = bech32_char $
    162                       ((b .&. 0b00000001) `B.shiftL` 4)
    163                   .|. ((c .&. 0b11110000) `B.shiftR` 4)
    164             x = bech32_char $
    165                       ((c .&. 0b00001111) `B.shiftL` 1)
    166                   .|. ((d .&. 0b10000000) `B.shiftR` 7)
    167             y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2)
    168             z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3)
    169 
    170             !w32 = fi t
    171                .|. fi u `B.shiftL` 8
    172                .|. fi v `B.shiftL` 16
    173                .|. fi w `B.shiftL` 24
    174 
    175             !w16 = fi x
    176                .|. fi y `B.shiftL` 8
    177 
    178         in  BSB.word32LE w32 <> BSB.word16LE w16 <> BSB.word8 z
    179 
    180     | otherwise =
    181         error "impossible"
    182 
    183 -- | Decode a 'ByteString', encoded as base32 using the bech32 character
    184 --   set, to a base256-encoded 'ByteString'.
    185 --
    186 --   >>> decode "df6x7cnfdcs8wctnyp5x2un9yy"
    187 --   Just "jtobin was here!"
    188 --   >>> decode "dfOx7cnfdcs8wctnyp5x2un9yy" -- s/6/O (non-bech32 character)
    189 --   Nothing
    190 decode
    191   :: BS.ByteString        -- ^ base32-encoded bytestring
    192   -> Maybe BS.ByteString  -- ^ base256-encoded bytestring
    193 decode = fmap toStrict . go mempty where
    194   go acc bs@(BI.PS _ _ l)
    195     | l < 8 = do
    196         fin <- finalize bs
    197         pure (acc <> fin)
    198     | otherwise = case BS.splitAt 8 bs of
    199         (chunk, etc) -> do
    200            res <- decode_chunk chunk
    201            go (acc <> res) etc
    202 
    203 finalize :: BS.ByteString -> Maybe BSB.Builder
    204 finalize bs@(BI.PS _ _ l)
    205   | l == 0 = Just mempty
    206   | otherwise = do
    207       guard (l >= 2)
    208       w5_0 <- word5 (BU.unsafeIndex bs 0)
    209       w5_1 <- word5 (BU.unsafeIndex bs 1)
    210       let w8_0 = w5_0 `B.shiftL` 3
    211              .|. w5_1 `B.shiftR` 2
    212 
    213       -- https://datatracker.ietf.org/doc/html/rfc4648#section-6
    214       if | l == 2 -> do -- 2 w5's, need 1 w8; 2 bits remain
    215              guard (w5_1 `B.shiftL` 6 == 0)
    216              pure (BSB.word8 w8_0)
    217 
    218          | l == 4 -> do -- 4 w5's, need 2 w8's; 4 bits remain
    219              w5_2 <- word5 (BU.unsafeIndex bs 2)
    220              w5_3 <- word5 (BU.unsafeIndex bs 3)
    221              let w8_1 = w5_1 `B.shiftL` 6
    222                     .|. w5_2 `B.shiftL` 1
    223                     .|. w5_3 `B.shiftR` 4
    224 
    225                  !w16 = fi w8_1
    226                     .|. fi w8_0 `B.shiftL` 8
    227 
    228              guard (w5_3 `B.shiftL` 4 == 0)
    229              pure (BSB.word16BE w16)
    230 
    231          | l == 5 -> do -- 5 w5's, need 3 w8's; 1 bit remains
    232              w5_2 <- word5 (BU.unsafeIndex bs 2)
    233              w5_3 <- word5 (BU.unsafeIndex bs 3)
    234              w5_4 <- word5 (BU.unsafeIndex bs 4)
    235              let w8_1 = w5_1 `B.shiftL` 6
    236                     .|. w5_2 `B.shiftL` 1
    237                     .|. w5_3 `B.shiftR` 4
    238                  w8_2 = w5_3 `B.shiftL` 4
    239                     .|. w5_4 `B.shiftR` 1
    240 
    241                  w16  = fi w8_1
    242                     .|. fi w8_0 `B.shiftL` 8
    243 
    244              guard (w5_4 `B.shiftL` 7 == 0)
    245              pure (BSB.word16BE w16 <> BSB.word8 w8_2)
    246 
    247          | l == 7 -> do -- 7 w5's, need 4 w8's; 3 bits remain
    248              w5_2 <- word5 (BU.unsafeIndex bs 2)
    249              w5_3 <- word5 (BU.unsafeIndex bs 3)
    250              w5_4 <- word5 (BU.unsafeIndex bs 4)
    251              w5_5 <- word5 (BU.unsafeIndex bs 5)
    252              w5_6 <- word5 (BU.unsafeIndex bs 6)
    253              let w8_1 = w5_1 `B.shiftL` 6
    254                     .|. w5_2 `B.shiftL` 1
    255                     .|. w5_3 `B.shiftR` 4
    256                  w8_2 = w5_3 `B.shiftL` 4
    257                     .|. w5_4 `B.shiftR` 1
    258                  w8_3 = w5_4 `B.shiftL` 7
    259                     .|. w5_5 `B.shiftL` 2
    260                     .|. w5_6 `B.shiftR` 3
    261 
    262                  w32  = fi w8_3
    263                     .|. fi w8_2 `B.shiftL` 8
    264                     .|. fi w8_1 `B.shiftL` 16
    265                     .|. fi w8_0 `B.shiftL` 24
    266 
    267              guard (w5_6 `B.shiftL` 5 == 0)
    268              pure (BSB.word32BE w32)
    269 
    270          | otherwise -> Nothing
    271 
    272 -- assumes length 8 input
    273 decode_chunk :: BS.ByteString -> Maybe BSB.Builder
    274 decode_chunk bs = do
    275   w5_0 <- word5 (BU.unsafeIndex bs 0)
    276   w5_1 <- word5 (BU.unsafeIndex bs 1)
    277   w5_2 <- word5 (BU.unsafeIndex bs 2)
    278   w5_3 <- word5 (BU.unsafeIndex bs 3)
    279   w5_4 <- word5 (BU.unsafeIndex bs 4)
    280   w5_5 <- word5 (BU.unsafeIndex bs 5)
    281   w5_6 <- word5 (BU.unsafeIndex bs 6)
    282   w5_7 <- word5 (BU.unsafeIndex bs 7)
    283 
    284   let w40 :: Word64
    285       !w40 = fi w5_0 `B.shiftL` 35
    286          .|. fi w5_1 `B.shiftL` 30
    287          .|. fi w5_2 `B.shiftL` 25
    288          .|. fi w5_3 `B.shiftL` 20
    289          .|. fi w5_4 `B.shiftL` 15
    290          .|. fi w5_5 `B.shiftL` 10
    291          .|. fi w5_6 `B.shiftL` 05
    292          .|. fi w5_7
    293       !w32 = fi (w40 `B.shiftR` 8)   :: Word32
    294       !w8  = fi (0b11111111 .&. w40) :: Word8
    295 
    296   pure $ BSB.word32BE w32 <> BSB.word8 w8
    297