Base32.hs (10826B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE BangPatterns #-} 3 {-# LANGUAGE BinaryLiterals #-} 4 {-# LANGUAGE LambdaCase #-} 5 {-# LANGUAGE MultiWayIf #-} 6 {-# LANGUAGE OverloadedStrings #-} 7 {-# LANGUAGE ViewPatterns #-} 8 9 -- | 10 -- Module: Data.ByteString.Base32 11 -- Copyright: (c) 2024 Jared Tobin 12 -- License: MIT 13 -- Maintainer: Jared Tobin <jared@ppad.tech> 14 -- 15 -- Unpadded base32 encoding & decoding using the bech32 character set. 16 17 -- this module is an adaptation of emilypi's 'base32' library 18 19 module Data.ByteString.Base32 ( 20 -- * base32 encoding and decoding 21 encode 22 , decode 23 ) where 24 25 import Control.Monad (guard) 26 import Data.Bits ((.|.), (.&.)) 27 import qualified Data.Bits as B 28 import qualified Data.ByteString as BS 29 import qualified Data.ByteString.Builder as BSB 30 import qualified Data.ByteString.Builder.Extra as BE 31 import qualified Data.ByteString.Internal as BI 32 import qualified Data.ByteString.Unsafe as BU 33 import Data.Word (Word8, Word32, Word64) 34 35 fi :: (Integral a, Num b) => a -> b 36 fi = fromIntegral 37 {-# INLINE fi #-} 38 39 word32be :: BS.ByteString -> Word32 40 word32be s = 41 (fi (s `BU.unsafeIndex` 0) `B.shiftL` 24) .|. 42 (fi (s `BU.unsafeIndex` 1) `B.shiftL` 16) .|. 43 (fi (s `BU.unsafeIndex` 2) `B.shiftL` 8) .|. 44 (fi (s `BU.unsafeIndex` 3)) 45 {-# INLINE word32be #-} 46 47 -- realization for small builders 48 toStrict :: BSB.Builder -> BS.ByteString 49 toStrict = BS.toStrict 50 . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty 51 {-# INLINE toStrict #-} 52 53 bech32_charset :: BS.ByteString 54 bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l" 55 56 word5 :: Word8 -> Maybe Word8 57 word5 = \case 58 113 -> pure $! 0 59 112 -> pure $! 1 60 122 -> pure $! 2 61 114 -> pure $! 3 62 121 -> pure $! 4 63 57 -> pure $! 5 64 120 -> pure $! 6 65 56 -> pure $! 7 66 103 -> pure $! 8 67 102 -> pure $! 9 68 50 -> pure $! 10 69 116 -> pure $! 11 70 118 -> pure $! 12 71 100 -> pure $! 13 72 119 -> pure $! 14 73 48 -> pure $! 15 74 115 -> pure $! 16 75 51 -> pure $! 17 76 106 -> pure $! 18 77 110 -> pure $! 19 78 53 -> pure $! 20 79 52 -> pure $! 21 80 107 -> pure $! 22 81 104 -> pure $! 23 82 99 -> pure $! 24 83 101 -> pure $! 25 84 54 -> pure $! 26 85 109 -> pure $! 27 86 117 -> pure $! 28 87 97 -> pure $! 29 88 55 -> pure $! 30 89 108 -> pure $! 31 90 _ -> Nothing 91 {-# INLINE word5 #-} 92 93 arrange :: Word32 -> Word8 -> BSB.Builder 94 arrange w32 w8 = 95 let mask = 0b00011111 -- low 5-bit mask 96 bech32_char = fi . BS.index bech32_charset . fi -- word5 -> bech32 97 98 -- split 40 bits into 8 w5's 99 w5_0 = mask .&. (w32 `B.shiftR` 27) -- highest 5 bits 100 w5_1 = mask .&. (w32 `B.shiftR` 22) 101 w5_2 = mask .&. (w32 `B.shiftR` 17) 102 w5_3 = mask .&. (w32 `B.shiftR` 12) 103 w5_4 = mask .&. (w32 `B.shiftR` 07) 104 w5_5 = mask .&. (w32 `B.shiftR` 02) 105 -- combine lowest 2 bits of w32 with highest 3 bits of w8 106 w5_6 = mask .&. (w32 `B.shiftL` 03 .|. fi w8 `B.shiftR` 05) 107 -- lowest 5 bits of w8 108 w5_7 = mask .&. fi w8 109 110 -- get (w8) bech32 char for each w5, pack all into little-endian w64 111 !w64 = bech32_char w5_0 112 .|. bech32_char w5_1 `B.shiftL` 8 113 .|. bech32_char w5_2 `B.shiftL` 16 114 .|. bech32_char w5_3 `B.shiftL` 24 115 .|. bech32_char w5_4 `B.shiftL` 32 116 .|. bech32_char w5_5 `B.shiftL` 40 117 .|. bech32_char w5_6 `B.shiftL` 48 118 .|. bech32_char w5_7 `B.shiftL` 56 119 120 in BSB.word64LE w64 121 {-# INLINE arrange #-} 122 123 -- | Encode a base256-encoded 'ByteString' as a base32-encoded 124 -- 'ByteString', using the bech32 character set. 125 -- 126 -- >>> encode "jtobin was here!" 127 -- "df6x7cnfdcs8wctnyp5x2un9yy" 128 encode 129 :: BS.ByteString -- ^ base256-encoded bytestring 130 -> BS.ByteString -- ^ base32-encoded bytestring 131 encode dat = toStrict (go dat) where 132 bech32_char = fi . BS.index bech32_charset . fi 133 134 go bs@(BI.PS _ _ l) 135 | l >= 5 = case BS.splitAt 5 bs of 136 (chunk, etc) -> case BS.unsnoc chunk of 137 Nothing -> error "impossible, chunk length is 5" 138 Just (word32be -> w32, w8) -> arrange w32 w8 <> go etc 139 | l == 0 = mempty 140 | l == 1 = 141 let a = BU.unsafeIndex bs 0 142 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 143 u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2) 144 145 !w16 = fi t 146 .|. fi u `B.shiftL` 8 147 148 in BSB.word16LE w16 149 | l == 2 = 150 let a = BU.unsafeIndex bs 0 151 b = BU.unsafeIndex bs 1 152 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 153 u = bech32_char $ 154 ((a .&. 0b00000111) `B.shiftL` 2) 155 .|. ((b .&. 0b11000000) `B.shiftR` 6) 156 v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) 157 w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4) 158 159 !w32 = fi t 160 .|. fi u `B.shiftL` 8 161 .|. fi v `B.shiftL` 16 162 .|. fi w `B.shiftL` 24 163 164 in BSB.word32LE w32 165 | l == 3 = 166 let a = BU.unsafeIndex bs 0 167 b = BU.unsafeIndex bs 1 168 c = BU.unsafeIndex bs 2 169 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 170 u = bech32_char $ 171 ((a .&. 0b00000111) `B.shiftL` 2) 172 .|. ((b .&. 0b11000000) `B.shiftR` 6) 173 v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) 174 w = bech32_char $ 175 ((b .&. 0b00000001) `B.shiftL` 4) 176 .|. ((c .&. 0b11110000) `B.shiftR` 4) 177 x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1) 178 179 !w32 = fi t 180 .|. fi u `B.shiftL` 8 181 .|. fi v `B.shiftL` 16 182 .|. fi w `B.shiftL` 24 183 184 in BSB.word32LE w32 <> BSB.word8 x 185 | l == 4 = 186 let a = BU.unsafeIndex bs 0 187 b = BU.unsafeIndex bs 1 188 c = BU.unsafeIndex bs 2 189 d = BU.unsafeIndex bs 3 190 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 191 u = bech32_char $ 192 ((a .&. 0b00000111) `B.shiftL` 2) 193 .|. ((b .&. 0b11000000) `B.shiftR` 6) 194 v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) 195 w = bech32_char $ 196 ((b .&. 0b00000001) `B.shiftL` 4) 197 .|. ((c .&. 0b11110000) `B.shiftR` 4) 198 x = bech32_char $ 199 ((c .&. 0b00001111) `B.shiftL` 1) 200 .|. ((d .&. 0b10000000) `B.shiftR` 7) 201 y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2) 202 z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3) 203 204 !w32 = fi t 205 .|. fi u `B.shiftL` 8 206 .|. fi v `B.shiftL` 16 207 .|. fi w `B.shiftL` 24 208 209 !w16 = fi x 210 .|. fi y `B.shiftL` 8 211 212 in BSB.word32LE w32 <> BSB.word16LE w16 <> BSB.word8 z 213 214 | otherwise = 215 error "impossible" 216 217 -- | Decode a 'ByteString', encoded as base32 using the bech32 character 218 -- set, to a base256-encoded 'ByteString'. 219 -- 220 -- >>> decode "df6x7cnfdcs8wctnyp5x2un9yy" 221 -- Just "jtobin was here!" 222 -- >>> decode "dfOx7cnfdcs8wctnyp5x2un9yy" -- s/6/O (non-bech32 character) 223 -- Nothing 224 decode 225 :: BS.ByteString -- ^ base32-encoded bytestring 226 -> Maybe BS.ByteString -- ^ base256-encoded bytestring 227 decode = fmap toStrict . go mempty where 228 go acc bs@(BI.PS _ _ l) 229 | l < 8 = do 230 fin <- finalize bs 231 pure (acc <> fin) 232 | otherwise = case BS.splitAt 8 bs of 233 (chunk, etc) -> do 234 res <- decode_chunk chunk 235 go (acc <> res) etc 236 237 finalize :: BS.ByteString -> Maybe BSB.Builder 238 finalize bs@(BI.PS _ _ l) 239 | l == 0 = Just mempty 240 | otherwise = do 241 guard (l >= 2) 242 w5_0 <- word5 (BU.unsafeIndex bs 0) 243 w5_1 <- word5 (BU.unsafeIndex bs 1) 244 let w8_0 = w5_0 `B.shiftL` 3 245 .|. w5_1 `B.shiftR` 2 246 247 -- https://datatracker.ietf.org/doc/html/rfc4648#section-6 248 if | l == 2 -> do -- 2 w5's, need 1 w8; 2 bits remain 249 guard (w5_1 `B.shiftL` 6 == 0) 250 pure (BSB.word8 w8_0) 251 252 | l == 4 -> do -- 4 w5's, need 2 w8's; 4 bits remain 253 w5_2 <- word5 (BU.unsafeIndex bs 2) 254 w5_3 <- word5 (BU.unsafeIndex bs 3) 255 let w8_1 = w5_1 `B.shiftL` 6 256 .|. w5_2 `B.shiftL` 1 257 .|. w5_3 `B.shiftR` 4 258 259 !w16 = fi w8_1 260 .|. fi w8_0 `B.shiftL` 8 261 262 guard (w5_3 `B.shiftL` 4 == 0) 263 pure (BSB.word16BE w16) 264 265 | l == 5 -> do -- 5 w5's, need 3 w8's; 1 bit remains 266 w5_2 <- word5 (BU.unsafeIndex bs 2) 267 w5_3 <- word5 (BU.unsafeIndex bs 3) 268 w5_4 <- word5 (BU.unsafeIndex bs 4) 269 let w8_1 = w5_1 `B.shiftL` 6 270 .|. w5_2 `B.shiftL` 1 271 .|. w5_3 `B.shiftR` 4 272 w8_2 = w5_3 `B.shiftL` 4 273 .|. w5_4 `B.shiftR` 1 274 275 w16 = fi w8_1 276 .|. fi w8_0 `B.shiftL` 8 277 278 guard (w5_4 `B.shiftL` 7 == 0) 279 pure (BSB.word16BE w16 <> BSB.word8 w8_2) 280 281 | l == 7 -> do -- 7 w5's, need 4 w8's; 3 bits remain 282 w5_2 <- word5 (BU.unsafeIndex bs 2) 283 w5_3 <- word5 (BU.unsafeIndex bs 3) 284 w5_4 <- word5 (BU.unsafeIndex bs 4) 285 w5_5 <- word5 (BU.unsafeIndex bs 5) 286 w5_6 <- word5 (BU.unsafeIndex bs 6) 287 let w8_1 = w5_1 `B.shiftL` 6 288 .|. w5_2 `B.shiftL` 1 289 .|. w5_3 `B.shiftR` 4 290 w8_2 = w5_3 `B.shiftL` 4 291 .|. w5_4 `B.shiftR` 1 292 w8_3 = w5_4 `B.shiftL` 7 293 .|. w5_5 `B.shiftL` 2 294 .|. w5_6 `B.shiftR` 3 295 296 w32 = fi w8_3 297 .|. fi w8_2 `B.shiftL` 8 298 .|. fi w8_1 `B.shiftL` 16 299 .|. fi w8_0 `B.shiftL` 24 300 301 guard (w5_6 `B.shiftL` 5 == 0) 302 pure (BSB.word32BE w32) 303 304 | otherwise -> Nothing 305 306 -- assumes length 8 input 307 decode_chunk :: BS.ByteString -> Maybe BSB.Builder 308 decode_chunk bs = do 309 w5_0 <- word5 (BU.unsafeIndex bs 0) 310 w5_1 <- word5 (BU.unsafeIndex bs 1) 311 w5_2 <- word5 (BU.unsafeIndex bs 2) 312 w5_3 <- word5 (BU.unsafeIndex bs 3) 313 w5_4 <- word5 (BU.unsafeIndex bs 4) 314 w5_5 <- word5 (BU.unsafeIndex bs 5) 315 w5_6 <- word5 (BU.unsafeIndex bs 6) 316 w5_7 <- word5 (BU.unsafeIndex bs 7) 317 318 let w40 :: Word64 319 !w40 = fi w5_0 `B.shiftL` 35 320 .|. fi w5_1 `B.shiftL` 30 321 .|. fi w5_2 `B.shiftL` 25 322 .|. fi w5_3 `B.shiftL` 20 323 .|. fi w5_4 `B.shiftL` 15 324 .|. fi w5_5 `B.shiftL` 10 325 .|. fi w5_6 `B.shiftL` 05 326 .|. fi w5_7 327 !w32 = fi (w40 `B.shiftR` 8) :: Word32 328 !w8 = fi (0b11111111 .&. w40) :: Word8 329 330 pure $ BSB.word32BE w32 <> BSB.word8 w8 331