Base32.hs (10196B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE BangPatterns #-} 3 {-# LANGUAGE BinaryLiterals #-} 4 {-# LANGUAGE LambdaCase #-} 5 {-# LANGUAGE MultiWayIf #-} 6 {-# LANGUAGE OverloadedStrings #-} 7 {-# LANGUAGE ViewPatterns #-} 8 9 -- | 10 -- Module: Data.ByteString.Base32 11 -- Copyright: (c) 2024 Jared Tobin 12 -- License: MIT 13 -- Maintainer: Jared Tobin <jared@ppad.tech> 14 -- 15 -- Unpadded base32 encoding & decoding using the bech32 character set. 16 17 -- this module is an adaptation of emilypi's 'base32' library 18 19 module Data.ByteString.Base32 ( 20 -- * base32 encoding and decoding 21 encode 22 , decode 23 ) where 24 25 import Control.Monad (guard) 26 import Data.Bits ((.|.), (.&.)) 27 import qualified Data.Bits as B 28 import qualified Data.ByteString as BS 29 import qualified Data.ByteString.Builder as BSB 30 import qualified Data.ByteString.Builder.Extra as BE 31 import qualified Data.ByteString.Internal as BI 32 import qualified Data.ByteString.Unsafe as BU 33 import Data.Word (Word8, Word32, Word64) 34 35 fi :: (Integral a, Num b) => a -> b 36 fi = fromIntegral 37 {-# INLINE fi #-} 38 39 word32be :: BS.ByteString -> Word32 40 word32be s = 41 (fi (s `BU.unsafeIndex` 0) `B.shiftL` 24) .|. 42 (fi (s `BU.unsafeIndex` 1) `B.shiftL` 16) .|. 43 (fi (s `BU.unsafeIndex` 2) `B.shiftL` 8) .|. 44 (fi (s `BU.unsafeIndex` 3)) 45 {-# INLINE word32be #-} 46 47 -- realization for small builders 48 toStrict :: BSB.Builder -> BS.ByteString 49 toStrict = BS.toStrict 50 . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty 51 {-# INLINE toStrict #-} 52 53 bech32_charset :: BS.ByteString 54 bech32_charset = "qpzry9x8gf2tvdw0s3jn54khce6mua7l" 55 56 word5 :: Word8 -> Maybe Word8 57 word5 w8 = fmap fi (BS.elemIndex w8 bech32_charset) 58 59 arrange :: Word32 -> Word8 -> BSB.Builder 60 arrange w32 w8 = 61 let mask = 0b00011111 -- low 5-bit mask 62 bech32_char = fi . BS.index bech32_charset . fi -- word5 -> bech32 63 64 -- split 40 bits into 8 w5's 65 w5_0 = mask .&. (w32 `B.shiftR` 27) -- highest 5 bits 66 w5_1 = mask .&. (w32 `B.shiftR` 22) 67 w5_2 = mask .&. (w32 `B.shiftR` 17) 68 w5_3 = mask .&. (w32 `B.shiftR` 12) 69 w5_4 = mask .&. (w32 `B.shiftR` 07) 70 w5_5 = mask .&. (w32 `B.shiftR` 02) 71 -- combine lowest 2 bits of w32 with highest 3 bits of w8 72 w5_6 = mask .&. (w32 `B.shiftL` 03 .|. fi w8 `B.shiftR` 05) 73 -- lowest 5 bits of w8 74 w5_7 = mask .&. fi w8 75 76 -- get (w8) bech32 char for each w5, pack all into little-endian w64 77 !w64 = bech32_char w5_0 78 .|. bech32_char w5_1 `B.shiftL` 8 79 .|. bech32_char w5_2 `B.shiftL` 16 80 .|. bech32_char w5_3 `B.shiftL` 24 81 .|. bech32_char w5_4 `B.shiftL` 32 82 .|. bech32_char w5_5 `B.shiftL` 40 83 .|. bech32_char w5_6 `B.shiftL` 48 84 .|. bech32_char w5_7 `B.shiftL` 56 85 86 in BSB.word64LE w64 87 {-# INLINE arrange #-} 88 89 -- | Encode a base256-encoded 'ByteString' as a base32-encoded 90 -- 'ByteString', using the bech32 character set. 91 -- 92 -- >>> encode "jtobin was here!" 93 -- "df6x7cnfdcs8wctnyp5x2un9yy" 94 encode 95 :: BS.ByteString -- ^ base256-encoded bytestring 96 -> BS.ByteString -- ^ base32-encoded bytestring 97 encode dat = toStrict (go dat) where 98 bech32_char = fi . BS.index bech32_charset . fi 99 100 go bs@(BI.PS _ _ l) 101 | l >= 5 = case BS.splitAt 5 bs of 102 (chunk, etc) -> case BS.unsnoc chunk of 103 Nothing -> error "impossible, chunk length is 5" 104 Just (word32be -> w32, w8) -> arrange w32 w8 <> go etc 105 | l == 0 = mempty 106 | l == 1 = 107 let a = BU.unsafeIndex bs 0 108 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 109 u = bech32_char ((a .&. 0b00000111) `B.shiftL` 2) 110 111 !w16 = fi t 112 .|. fi u `B.shiftL` 8 113 114 in BSB.word16LE w16 115 | l == 2 = 116 let a = BU.unsafeIndex bs 0 117 b = BU.unsafeIndex bs 1 118 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 119 u = bech32_char $ 120 ((a .&. 0b00000111) `B.shiftL` 2) 121 .|. ((b .&. 0b11000000) `B.shiftR` 6) 122 v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) 123 w = bech32_char ((b .&. 0b00000001) `B.shiftL` 4) 124 125 !w32 = fi t 126 .|. fi u `B.shiftL` 8 127 .|. fi v `B.shiftL` 16 128 .|. fi w `B.shiftL` 24 129 130 in BSB.word32LE w32 131 | l == 3 = 132 let a = BU.unsafeIndex bs 0 133 b = BU.unsafeIndex bs 1 134 c = BU.unsafeIndex bs 2 135 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 136 u = bech32_char $ 137 ((a .&. 0b00000111) `B.shiftL` 2) 138 .|. ((b .&. 0b11000000) `B.shiftR` 6) 139 v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) 140 w = bech32_char $ 141 ((b .&. 0b00000001) `B.shiftL` 4) 142 .|. ((c .&. 0b11110000) `B.shiftR` 4) 143 x = bech32_char ((c .&. 0b00001111) `B.shiftL` 1) 144 145 !w32 = fi t 146 .|. fi u `B.shiftL` 8 147 .|. fi v `B.shiftL` 16 148 .|. fi w `B.shiftL` 24 149 150 in BSB.word32LE w32 <> BSB.word8 x 151 | l == 4 = 152 let a = BU.unsafeIndex bs 0 153 b = BU.unsafeIndex bs 1 154 c = BU.unsafeIndex bs 2 155 d = BU.unsafeIndex bs 3 156 t = bech32_char ((a .&. 0b11111000) `B.shiftR` 3) 157 u = bech32_char $ 158 ((a .&. 0b00000111) `B.shiftL` 2) 159 .|. ((b .&. 0b11000000) `B.shiftR` 6) 160 v = bech32_char ((b .&. 0b00111110) `B.shiftR` 1) 161 w = bech32_char $ 162 ((b .&. 0b00000001) `B.shiftL` 4) 163 .|. ((c .&. 0b11110000) `B.shiftR` 4) 164 x = bech32_char $ 165 ((c .&. 0b00001111) `B.shiftL` 1) 166 .|. ((d .&. 0b10000000) `B.shiftR` 7) 167 y = bech32_char ((d .&. 0b01111100) `B.shiftR` 2) 168 z = bech32_char ((d .&. 0b00000011) `B.shiftL` 3) 169 170 !w32 = fi t 171 .|. fi u `B.shiftL` 8 172 .|. fi v `B.shiftL` 16 173 .|. fi w `B.shiftL` 24 174 175 !w16 = fi x 176 .|. fi y `B.shiftL` 8 177 178 in BSB.word32LE w32 <> BSB.word16LE w16 <> BSB.word8 z 179 180 | otherwise = 181 error "impossible" 182 183 -- | Decode a 'ByteString', encoded as base32 using the bech32 character 184 -- set, to a base256-encoded 'ByteString'. 185 -- 186 -- >>> decode "df6x7cnfdcs8wctnyp5x2un9yy" 187 -- Just "jtobin was here!" 188 -- >>> decode "dfOx7cnfdcs8wctnyp5x2un9yy" -- s/6/O (non-bech32 character) 189 -- Nothing 190 decode 191 :: BS.ByteString -- ^ base32-encoded bytestring 192 -> Maybe BS.ByteString -- ^ base256-encoded bytestring 193 decode = fmap toStrict . go mempty where 194 go acc bs@(BI.PS _ _ l) 195 | l < 8 = do 196 fin <- finalize bs 197 pure (acc <> fin) 198 | otherwise = case BS.splitAt 8 bs of 199 (chunk, etc) -> do 200 res <- decode_chunk chunk 201 go (acc <> res) etc 202 203 finalize :: BS.ByteString -> Maybe BSB.Builder 204 finalize bs@(BI.PS _ _ l) 205 | l == 0 = Just mempty 206 | otherwise = do 207 guard (l >= 2) 208 w5_0 <- word5 (BU.unsafeIndex bs 0) 209 w5_1 <- word5 (BU.unsafeIndex bs 1) 210 let w8_0 = w5_0 `B.shiftL` 3 211 .|. w5_1 `B.shiftR` 2 212 213 -- https://datatracker.ietf.org/doc/html/rfc4648#section-6 214 if | l == 2 -> do -- 2 w5's, need 1 w8; 2 bits remain 215 guard (w5_1 `B.shiftL` 6 == 0) 216 pure (BSB.word8 w8_0) 217 218 | l == 4 -> do -- 4 w5's, need 2 w8's; 4 bits remain 219 w5_2 <- word5 (BU.unsafeIndex bs 2) 220 w5_3 <- word5 (BU.unsafeIndex bs 3) 221 let w8_1 = w5_1 `B.shiftL` 6 222 .|. w5_2 `B.shiftL` 1 223 .|. w5_3 `B.shiftR` 4 224 225 !w16 = fi w8_1 226 .|. fi w8_0 `B.shiftL` 8 227 228 guard (w5_3 `B.shiftL` 4 == 0) 229 pure (BSB.word16BE w16) 230 231 | l == 5 -> do -- 5 w5's, need 3 w8's; 1 bit remains 232 w5_2 <- word5 (BU.unsafeIndex bs 2) 233 w5_3 <- word5 (BU.unsafeIndex bs 3) 234 w5_4 <- word5 (BU.unsafeIndex bs 4) 235 let w8_1 = w5_1 `B.shiftL` 6 236 .|. w5_2 `B.shiftL` 1 237 .|. w5_3 `B.shiftR` 4 238 w8_2 = w5_3 `B.shiftL` 4 239 .|. w5_4 `B.shiftR` 1 240 241 w16 = fi w8_1 242 .|. fi w8_0 `B.shiftL` 8 243 244 guard (w5_4 `B.shiftL` 7 == 0) 245 pure (BSB.word16BE w16 <> BSB.word8 w8_2) 246 247 | l == 7 -> do -- 7 w5's, need 4 w8's; 3 bits remain 248 w5_2 <- word5 (BU.unsafeIndex bs 2) 249 w5_3 <- word5 (BU.unsafeIndex bs 3) 250 w5_4 <- word5 (BU.unsafeIndex bs 4) 251 w5_5 <- word5 (BU.unsafeIndex bs 5) 252 w5_6 <- word5 (BU.unsafeIndex bs 6) 253 let w8_1 = w5_1 `B.shiftL` 6 254 .|. w5_2 `B.shiftL` 1 255 .|. w5_3 `B.shiftR` 4 256 w8_2 = w5_3 `B.shiftL` 4 257 .|. w5_4 `B.shiftR` 1 258 w8_3 = w5_4 `B.shiftL` 7 259 .|. w5_5 `B.shiftL` 2 260 .|. w5_6 `B.shiftR` 3 261 262 w32 = fi w8_3 263 .|. fi w8_2 `B.shiftL` 8 264 .|. fi w8_1 `B.shiftL` 16 265 .|. fi w8_0 `B.shiftL` 24 266 267 guard (w5_6 `B.shiftL` 5 == 0) 268 pure (BSB.word32BE w32) 269 270 | otherwise -> Nothing 271 272 -- assumes length 8 input 273 decode_chunk :: BS.ByteString -> Maybe BSB.Builder 274 decode_chunk bs = do 275 w5_0 <- word5 (BU.unsafeIndex bs 0) 276 w5_1 <- word5 (BU.unsafeIndex bs 1) 277 w5_2 <- word5 (BU.unsafeIndex bs 2) 278 w5_3 <- word5 (BU.unsafeIndex bs 3) 279 w5_4 <- word5 (BU.unsafeIndex bs 4) 280 w5_5 <- word5 (BU.unsafeIndex bs 5) 281 w5_6 <- word5 (BU.unsafeIndex bs 6) 282 w5_7 <- word5 (BU.unsafeIndex bs 7) 283 284 let w40 :: Word64 285 !w40 = fi w5_0 `B.shiftL` 35 286 .|. fi w5_1 `B.shiftL` 30 287 .|. fi w5_2 `B.shiftL` 25 288 .|. fi w5_3 `B.shiftL` 20 289 .|. fi w5_4 `B.shiftL` 15 290 .|. fi w5_5 `B.shiftL` 10 291 .|. fi w5_6 `B.shiftL` 05 292 .|. fi w5_7 293 !w32 = fi (w40 `B.shiftR` 8) :: Word32 294 !w8 = fi (0b11111111 .&. w40) :: Word8 295 296 pure $ BSB.word32BE w32 <> BSB.word8 w8 297