bech32

Pure Haskell bech32, bech32m encoding/decoding (docs.ppad.tech/bech32).
git clone git://git.ppad.tech/bech32.git
Log | Files | Refs | README | LICENSE

Base32.hs (10826B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE BangPatterns #-}
      3 {-# LANGUAGE BinaryLiterals #-}
      4 {-# LANGUAGE LambdaCase #-}
      5 {-# LANGUAGE MultiWayIf #-}
      6 {-# LANGUAGE OverloadedStrings #-}
      7 {-# LANGUAGE ViewPatterns #-}
      8 
      9 -- |
     10 -- Module: Data.ByteString.Base32
     11 -- Copyright: (c) 2024 Jared Tobin
     12 -- License: MIT
     13 -- Maintainer: Jared Tobin <jared@ppad.tech>
     14 --
     15 -- Unpadded base32 encoding & decoding using the bech32 character set.
     16 
     17 -- this module is an adaptation of emilypi's 'base32' library
     18 
     19 module Data.ByteString.Base32 (
     20     -- * base32 encoding and decoding
     21     encode
     22   , decode
     23   ) where
     24 
     25 import Control.Monad (guard)
     26 import Data.Bits ((.|.), (.&.))
     27 import qualified Data.Bits as B
     28 import qualified Data.ByteString as BS
     29 import qualified Data.ByteString.Builder as BSB
     30 import qualified Data.ByteString.Builder.Extra as BE
     31 import qualified Data.ByteString.Internal as BI
     32 import qualified Data.ByteString.Unsafe as BU
     33 import Data.Word (Word8, Word32, Word64)
     34 
     35 fi :: (Integral a, Num b) => a -> b
     36 fi = fromIntegral
     37 {-# INLINE fi #-}
     38 
     39 word32be :: BS.ByteString -> Word32
     40 word32be s =
     41   (fi (s `BU.unsafeIndex` 0) `B.shiftL` 24) .|.
     42   (fi (s `BU.unsafeIndex` 1) `B.shiftL` 16) .|.
     43   (fi (s `BU.unsafeIndex` 2) `B.shiftL`  8) .|.
     44   (fi (s `BU.unsafeIndex` 3))
     45 {-# INLINE word32be #-}
     46 
     47 -- realization for small builders
     48 toStrict :: BSB.Builder -> BS.ByteString
     49 toStrict = BS.toStrict
     50   . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
     51 {-# INLINE toStrict #-}
     52 
     53 bech32_charset :: BS.ByteString
     54 bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l"
     55 
     56 word5 :: Word8 -> Maybe Word8
     57 word5 = \case
     58   113 -> pure $! 0
     59   112 -> pure $! 1
     60   122 -> pure $! 2
     61   114 -> pure $! 3
     62   121 -> pure $! 4
     63   57  -> pure $! 5
     64   120 -> pure $! 6
     65   56  -> pure $! 7
     66   103 -> pure $! 8
     67   102 -> pure $! 9
     68   50  -> pure $! 10
     69   116 -> pure $! 11
     70   118 -> pure $! 12
     71   100 -> pure $! 13
     72   119 -> pure $! 14
     73   48  -> pure $! 15
     74   115 -> pure $! 16
     75   51  -> pure $! 17
     76   106 -> pure $! 18
     77   110 -> pure $! 19
     78   53  -> pure $! 20
     79   52  -> pure $! 21
     80   107 -> pure $! 22
     81   104 -> pure $! 23
     82   99  -> pure $! 24
     83   101 -> pure $! 25
     84   54  -> pure $! 26
     85   109 -> pure $! 27
     86   117 -> pure $! 28
     87   97  -> pure $! 29
     88   55  -> pure $! 30
     89   108 -> pure $! 31
     90   _   -> Nothing
     91 {-# INLINE word5 #-}
     92 
     93 arrange :: Word32 -> Word8 -> BSB.Builder
     94 arrange w32 w8 =
     95   let mask = 0b00011111                                 -- low 5-bit mask
     96       bech32_char = fi . BS.index bech32_charset . fi   -- word5 -> bech32
     97 
     98       -- split 40 bits into 8 w5's
     99       w5_0 = mask .&. (w32 `B.shiftR` 27) -- highest 5 bits
    100       w5_1 = mask .&. (w32 `B.shiftR` 22)
    101       w5_2 = mask .&. (w32 `B.shiftR` 17)
    102       w5_3 = mask .&. (w32 `B.shiftR` 12)
    103       w5_4 = mask .&. (w32 `B.shiftR` 07)
    104       w5_5 = mask .&. (w32 `B.shiftR` 02)
    105       -- combine lowest 2 bits of w32 with highest 3 bits of w8
    106       w5_6 = mask .&. (w32 `B.shiftL` 03 .|. fi w8 `B.shiftR` 05)
    107       -- lowest 5 bits of w8
    108       w5_7 = mask .&. fi w8
    109 
    110       -- get (w8) bech32 char for each w5, pack all into little-endian w64
    111       !w64 = bech32_char w5_0
    112          .|. bech32_char w5_1 `B.shiftL` 8
    113          .|. bech32_char w5_2 `B.shiftL` 16
    114          .|. bech32_char w5_3 `B.shiftL` 24
    115          .|. bech32_char w5_4 `B.shiftL` 32
    116          .|. bech32_char w5_5 `B.shiftL` 40
    117          .|. bech32_char w5_6 `B.shiftL` 48
    118          .|. bech32_char w5_7 `B.shiftL` 56
    119 
    120   in  BSB.word64LE w64
    121 {-# INLINE arrange #-}
    122 
    123 -- | Encode a base256-encoded 'ByteString' as a base32-encoded
    124 --   'ByteString', using the bech32 character set.
    125 --
    126 --   >>> encode "jtobin was here!"
    127 --   "df6x7cnfdcs8wctnyp5x2un9yy"
    128 encode
    129   :: BS.ByteString -- ^ base256-encoded bytestring
    130   -> BS.ByteString -- ^ base32-encoded bytestring
    131 encode dat = toStrict (go dat) where
    132   bech32_char = fi . BS.index bech32_charset . fi
    133 
    134   go bs@(BI.PS _ _ l)
    135     | l >= 5 = case BS.splitAt 5 bs of
    136         (chunk, etc) -> case BS.unsnoc chunk of
    137           Nothing -> error "impossible, chunk length is 5"
    138           Just (word32be -> w32, w8) -> arrange w32 w8 <> go etc
    139     | l == 0 = mempty
    140     | l == 1 =
    141         let a = BU.unsafeIndex bs 0
    142             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    143             u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2)
    144 
    145             !w16 = fi t
    146                .|. fi u `B.shiftL` 8
    147 
    148         in  BSB.word16LE w16
    149     | l == 2 =
    150         let a = BU.unsafeIndex bs 0
    151             b = BU.unsafeIndex bs 1
    152             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    153             u = bech32_char $
    154                       ((a .&. 0b00000111) `B.shiftL` 2)
    155                   .|. ((b .&. 0b11000000) `B.shiftR` 6)
    156             v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
    157             w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4)
    158 
    159             !w32 = fi t
    160                .|. fi u `B.shiftL` 8
    161                .|. fi v `B.shiftL` 16
    162                .|. fi w `B.shiftL` 24
    163 
    164         in  BSB.word32LE w32
    165     | l == 3 =
    166         let a = BU.unsafeIndex bs 0
    167             b = BU.unsafeIndex bs 1
    168             c = BU.unsafeIndex bs 2
    169             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    170             u = bech32_char $
    171                       ((a .&. 0b00000111) `B.shiftL` 2)
    172                   .|. ((b .&. 0b11000000) `B.shiftR` 6)
    173             v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
    174             w = bech32_char $
    175                       ((b .&. 0b00000001) `B.shiftL` 4)
    176                   .|. ((c .&. 0b11110000) `B.shiftR` 4)
    177             x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1)
    178 
    179             !w32 = fi t
    180                .|. fi u `B.shiftL` 8
    181                .|. fi v `B.shiftL` 16
    182                .|. fi w `B.shiftL` 24
    183 
    184         in  BSB.word32LE w32 <> BSB.word8 x
    185     | l == 4 =
    186         let a = BU.unsafeIndex bs 0
    187             b = BU.unsafeIndex bs 1
    188             c = BU.unsafeIndex bs 2
    189             d = BU.unsafeIndex bs 3
    190             t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3)
    191             u = bech32_char $
    192                       ((a .&. 0b00000111) `B.shiftL` 2)
    193                   .|. ((b .&. 0b11000000) `B.shiftR` 6)
    194             v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1)
    195             w = bech32_char $
    196                       ((b .&. 0b00000001) `B.shiftL` 4)
    197                   .|. ((c .&. 0b11110000) `B.shiftR` 4)
    198             x = bech32_char $
    199                       ((c .&. 0b00001111) `B.shiftL` 1)
    200                   .|. ((d .&. 0b10000000) `B.shiftR` 7)
    201             y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2)
    202             z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3)
    203 
    204             !w32 = fi t
    205                .|. fi u `B.shiftL` 8
    206                .|. fi v `B.shiftL` 16
    207                .|. fi w `B.shiftL` 24
    208 
    209             !w16 = fi x
    210                .|. fi y `B.shiftL` 8
    211 
    212         in  BSB.word32LE w32 <> BSB.word16LE w16 <> BSB.word8 z
    213 
    214     | otherwise =
    215         error "impossible"
    216 
    217 -- | Decode a 'ByteString', encoded as base32 using the bech32 character
    218 --   set, to a base256-encoded 'ByteString'.
    219 --
    220 --   >>> decode "df6x7cnfdcs8wctnyp5x2un9yy"
    221 --   Just "jtobin was here!"
    222 --   >>> decode "dfOx7cnfdcs8wctnyp5x2un9yy" -- s/6/O (non-bech32 character)
    223 --   Nothing
    224 decode
    225   :: BS.ByteString        -- ^ base32-encoded bytestring
    226   -> Maybe BS.ByteString  -- ^ base256-encoded bytestring
    227 decode = fmap toStrict . go mempty where
    228   go acc bs@(BI.PS _ _ l)
    229     | l < 8 = do
    230         fin <- finalize bs
    231         pure (acc <> fin)
    232     | otherwise = case BS.splitAt 8 bs of
    233         (chunk, etc) -> do
    234            res <- decode_chunk chunk
    235            go (acc <> res) etc
    236 
    237 finalize :: BS.ByteString -> Maybe BSB.Builder
    238 finalize bs@(BI.PS _ _ l)
    239   | l == 0 = Just mempty
    240   | otherwise = do
    241       guard (l >= 2)
    242       w5_0 <- word5 (BU.unsafeIndex bs 0)
    243       w5_1 <- word5 (BU.unsafeIndex bs 1)
    244       let w8_0 = w5_0 `B.shiftL` 3
    245              .|. w5_1 `B.shiftR` 2
    246 
    247       -- https://datatracker.ietf.org/doc/html/rfc4648#section-6
    248       if | l == 2 -> do -- 2 w5's, need 1 w8; 2 bits remain
    249              guard (w5_1 `B.shiftL` 6 == 0)
    250              pure (BSB.word8 w8_0)
    251 
    252          | l == 4 -> do -- 4 w5's, need 2 w8's; 4 bits remain
    253              w5_2 <- word5 (BU.unsafeIndex bs 2)
    254              w5_3 <- word5 (BU.unsafeIndex bs 3)
    255              let w8_1 = w5_1 `B.shiftL` 6
    256                     .|. w5_2 `B.shiftL` 1
    257                     .|. w5_3 `B.shiftR` 4
    258 
    259                  !w16 = fi w8_1
    260                     .|. fi w8_0 `B.shiftL` 8
    261 
    262              guard (w5_3 `B.shiftL` 4 == 0)
    263              pure (BSB.word16BE w16)
    264 
    265          | l == 5 -> do -- 5 w5's, need 3 w8's; 1 bit remains
    266              w5_2 <- word5 (BU.unsafeIndex bs 2)
    267              w5_3 <- word5 (BU.unsafeIndex bs 3)
    268              w5_4 <- word5 (BU.unsafeIndex bs 4)
    269              let w8_1 = w5_1 `B.shiftL` 6
    270                     .|. w5_2 `B.shiftL` 1
    271                     .|. w5_3 `B.shiftR` 4
    272                  w8_2 = w5_3 `B.shiftL` 4
    273                     .|. w5_4 `B.shiftR` 1
    274 
    275                  w16  = fi w8_1
    276                     .|. fi w8_0 `B.shiftL` 8
    277 
    278              guard (w5_4 `B.shiftL` 7 == 0)
    279              pure (BSB.word16BE w16 <> BSB.word8 w8_2)
    280 
    281          | l == 7 -> do -- 7 w5's, need 4 w8's; 3 bits remain
    282              w5_2 <- word5 (BU.unsafeIndex bs 2)
    283              w5_3 <- word5 (BU.unsafeIndex bs 3)
    284              w5_4 <- word5 (BU.unsafeIndex bs 4)
    285              w5_5 <- word5 (BU.unsafeIndex bs 5)
    286              w5_6 <- word5 (BU.unsafeIndex bs 6)
    287              let w8_1 = w5_1 `B.shiftL` 6
    288                     .|. w5_2 `B.shiftL` 1
    289                     .|. w5_3 `B.shiftR` 4
    290                  w8_2 = w5_3 `B.shiftL` 4
    291                     .|. w5_4 `B.shiftR` 1
    292                  w8_3 = w5_4 `B.shiftL` 7
    293                     .|. w5_5 `B.shiftL` 2
    294                     .|. w5_6 `B.shiftR` 3
    295 
    296                  w32  = fi w8_3
    297                     .|. fi w8_2 `B.shiftL` 8
    298                     .|. fi w8_1 `B.shiftL` 16
    299                     .|. fi w8_0 `B.shiftL` 24
    300 
    301              guard (w5_6 `B.shiftL` 5 == 0)
    302              pure (BSB.word32BE w32)
    303 
    304          | otherwise -> Nothing
    305 
    306 -- assumes length 8 input
    307 decode_chunk :: BS.ByteString -> Maybe BSB.Builder
    308 decode_chunk bs = do
    309   w5_0 <- word5 (BU.unsafeIndex bs 0)
    310   w5_1 <- word5 (BU.unsafeIndex bs 1)
    311   w5_2 <- word5 (BU.unsafeIndex bs 2)
    312   w5_3 <- word5 (BU.unsafeIndex bs 3)
    313   w5_4 <- word5 (BU.unsafeIndex bs 4)
    314   w5_5 <- word5 (BU.unsafeIndex bs 5)
    315   w5_6 <- word5 (BU.unsafeIndex bs 6)
    316   w5_7 <- word5 (BU.unsafeIndex bs 7)
    317 
    318   let w40 :: Word64
    319       !w40 = fi w5_0 `B.shiftL` 35
    320          .|. fi w5_1 `B.shiftL` 30
    321          .|. fi w5_2 `B.shiftL` 25
    322          .|. fi w5_3 `B.shiftL` 20
    323          .|. fi w5_4 `B.shiftL` 15
    324          .|. fi w5_5 `B.shiftL` 10
    325          .|. fi w5_6 `B.shiftL` 05
    326          .|. fi w5_7
    327       !w32 = fi (w40 `B.shiftR` 8)   :: Word32
    328       !w8  = fi (0b11111111 .&. w40) :: Word8
    329 
    330   pure $ BSB.word32BE w32 <> BSB.word8 w8
    331