Base64.hs (10970B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE BangPatterns #-} 3 {-# LANGUAGE OverloadedStrings #-} 4 5 -- | 6 -- Module: Data.ByteString.Base64 7 -- Copyright: (c) 2026 Jared Tobin 8 -- License: MIT 9 -- Maintainer: Jared Tobin <jared@ppad.tech> 10 -- 11 -- Pure base64 encoding and decoding of strict bytestrings. 12 13 module Data.ByteString.Base64 ( 14 encode 15 , decode 16 ) where 17 18 import qualified Data.Bits as B 19 import Data.Bits ((.&.), (.|.)) 20 import qualified Data.ByteString as BS 21 import qualified Data.ByteString.Base64.Arm as Arm 22 import qualified Data.ByteString.Internal as BI 23 import Data.Word (Word8) 24 import Foreign.ForeignPtr (withForeignPtr) 25 import Foreign.Ptr (Ptr, plusPtr) 26 import Foreign.Storable (peekElemOff, pokeElemOff) 27 import System.IO.Unsafe (unsafeDupablePerformIO) 28 29 fi :: (Num a, Integral b) => b -> a 30 fi = fromIntegral 31 {-# INLINE fi #-} 32 33 -- 64-byte table. Indexed by 6-bit value (0..63), yields the 34 -- corresponding base64 alphabet character. All-ASCII content means 35 -- the bytestring 'IsString' rule rewrites this to 'unsafePackAddress' 36 -- and the bytes live in static rodata. 37 enc_tab :: BS.ByteString 38 enc_tab = 39 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" 40 {-# NOINLINE enc_tab #-} 41 42 -- 256-byte table. Index by an ASCII byte to obtain its 6-bit value; 43 -- valid base64 chars ('A'..'Z', 'a'..'z', '0'..'9', '+', '/') map to 44 -- 0x40..0x7f, every other byte (including '=') maps to 0x80. 45 -- 46 -- The encoding is chosen so the literal is strictly ASCII and contains 47 -- no embedded NUL, which is what the bytestring 'IsString' rule needs 48 -- to rewrite it into 'unsafePackAddress' (cf. 'enc_tab') — the bytes 49 -- end up in static rodata, with no CAF allocation. 50 -- 51 -- The 0x80 sentinel is distinguished by bit 7; no value 0x40..0x7f 52 -- carries that bit, so 'decode' OR-folds every lookup into an 53 -- accumulator and tests 'acc .&. 0x80 == 0' once at the end. The 54 -- low 6 bits of each entry are the 6-bit value, possibly contaminated 55 -- by the 0x40 flag bit; the b0/b1/b2 formulas mask each subexpression 56 -- before combining so the flag never bleeds into the output bytes. 57 dec_tab :: BS.ByteString 58 dec_tab = 59 "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 60 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 61 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x7E\x80\x80\x80\x7F\ 62 \\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x80\x80\x80\x80\x80\x80\ 63 \\x80\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\ 64 \\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x80\x80\x80\x80\x80\ 65 \\x80\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\ 66 \\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x80\x80\x80\x80\x80\ 67 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 68 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 69 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 70 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 71 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 72 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 73 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\ 74 \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" 75 {-# NOINLINE dec_tab #-} 76 77 -- | Encode a base256 'ByteString' as base64. 78 -- 79 -- Uses ARM NEON extensions when available, otherwise a pure 80 -- Haskell scalar loop. 81 -- 82 -- >>> encode "hello world" 83 -- "aGVsbG8gd29ybGQ=" 84 encode :: BS.ByteString -> BS.ByteString 85 encode bs 86 | Arm.base64_arm_available = Arm.encode bs 87 | otherwise = encode_scalar bs 88 {-# INLINABLE encode #-} 89 90 -- | Decode a base64 'ByteString' to base256. 91 -- 92 -- Uses ARM NEON extensions when available, otherwise a pure 93 -- Haskell scalar loop. Invalid inputs (including incorrectly- 94 -- padded or non-canonical inputs) will produce 'Nothing'. 95 -- 96 -- >>> decode "aGVsbG8gd29ybGQ=" 97 -- Just "hello world" 98 -- >>> decode "aGVsbG8gd29ybGQ" -- missing padding 99 -- Nothing 100 decode :: BS.ByteString -> Maybe BS.ByteString 101 decode bs 102 | Arm.base64_arm_available = Arm.decode bs 103 | otherwise = decode_scalar bs 104 {-# INLINABLE decode #-} 105 106 encode_scalar :: BS.ByteString -> BS.ByteString 107 encode_scalar (BI.PS sfp soff l) = 108 case enc_tab of 109 BI.PS tfp toff _ -> 110 BI.unsafeCreate ((l + 2) `quot` 3 * 4) $ \dst -> 111 withForeignPtr sfp $ \sp0 -> 112 withForeignPtr tfp $ \tp0 -> do 113 let !sp = sp0 `plusPtr` soff :: Ptr Word8 114 !tp = tp0 `plusPtr` toff :: Ptr Word8 115 !nfull = l `quot` 3 116 !rmn = l - nfull * 3 117 loop !i 118 | i == nfull = pure () 119 | otherwise = do 120 let !ii = i * 3 121 !oo = i * 4 122 b0 <- peekElemOff sp ii 123 b1 <- peekElemOff sp (ii + 1) 124 b2 <- peekElemOff sp (ii + 2) 125 c0 <- peekElemOff tp (fi (b0 `B.shiftR` 2)) 126 c1 <- peekElemOff tp (fi 127 (((b0 .&. 0x03) `B.shiftL` 4) 128 .|. (b1 `B.shiftR` 4))) 129 c2 <- peekElemOff tp (fi 130 (((b1 .&. 0x0F) `B.shiftL` 2) 131 .|. (b2 `B.shiftR` 6))) 132 c3 <- peekElemOff tp (fi (b2 .&. 0x3F)) 133 pokeElemOff dst oo (c0 :: Word8) 134 pokeElemOff dst (oo + 1) c1 135 pokeElemOff dst (oo + 2) c2 136 pokeElemOff dst (oo + 3) c3 137 loop (i + 1) 138 loop 0 139 case rmn of 140 0 -> pure () 141 1 -> do 142 let !ii = nfull * 3 143 !oo = nfull * 4 144 b0 <- peekElemOff sp ii 145 c0 <- peekElemOff tp (fi (b0 `B.shiftR` 2)) 146 c1 <- peekElemOff tp (fi ((b0 .&. 0x03) `B.shiftL` 4)) 147 pokeElemOff dst oo (c0 :: Word8) 148 pokeElemOff dst (oo + 1) c1 149 pokeElemOff dst (oo + 2) 0x3D 150 pokeElemOff dst (oo + 3) 0x3D 151 _ -> do 152 let !ii = nfull * 3 153 !oo = nfull * 4 154 b0 <- peekElemOff sp ii 155 b1 <- peekElemOff sp (ii + 1) 156 c0 <- peekElemOff tp (fi (b0 `B.shiftR` 2)) 157 c1 <- peekElemOff tp (fi 158 (((b0 .&. 0x03) `B.shiftL` 4) 159 .|. (b1 `B.shiftR` 4))) 160 c2 <- peekElemOff tp (fi ((b1 .&. 0x0F) `B.shiftL` 2)) 161 pokeElemOff dst oo (c0 :: Word8) 162 pokeElemOff dst (oo + 1) c1 163 pokeElemOff dst (oo + 2) c2 164 pokeElemOff dst (oo + 3) 0x3D 165 166 decode_scalar :: BS.ByteString -> Maybe BS.ByteString 167 decode_scalar (BI.PS sfp soff l) 168 | l == 0 = Just BS.empty 169 | l .&. 0x03 /= 0 = Nothing 170 | otherwise = case dec_tab of 171 BI.PS tfp toff _ -> unsafeDupablePerformIO $ 172 withForeignPtr sfp $ \sp0 -> 173 withForeignPtr tfp $ \tp0 -> do 174 let !sp = sp0 `plusPtr` soff :: Ptr Word8 175 !tp = tp0 `plusPtr` toff :: Ptr Word8 176 c_pre <- peekElemOff sp (l - 2) 177 c_end <- peekElemOff sp (l - 1) 178 let !pad_pre = c_pre == 0x3D 179 !pad_end = c_end == 0x3D 180 if pad_pre && not pad_end 181 then pure Nothing 182 else do 183 let !pad = (if pad_pre then 2 else if pad_end then 1 else 0) 184 :: Int 185 !nfull = l `B.shiftR` 2 186 !nbody = if pad > 0 then nfull - 1 else nfull 187 !outlen = nfull * 3 - pad 188 fp <- BI.mallocByteString outlen 189 ok <- withForeignPtr fp $ \dst -> do 190 let body_loop !acc !i 191 | i == nbody = pure acc 192 | otherwise = do 193 let !ii = i `B.shiftL` 2 194 !oo = i * 3 195 c0 <- peekElemOff sp ii 196 c1 <- peekElemOff sp (ii + 1) 197 c2 <- peekElemOff sp (ii + 2) 198 c3 <- peekElemOff sp (ii + 3) 199 v0 <- peekElemOff tp (fi c0) 200 v1 <- peekElemOff tp (fi c1) 201 v2 <- peekElemOff tp (fi c2) 202 v3 <- peekElemOff tp (fi c3) 203 let !b0 = (v0 `B.shiftL` 2) 204 .|. ((v1 `B.shiftR` 4) .&. 0x03) 205 !b1 = ((v1 .&. 0x0F) `B.shiftL` 4) 206 .|. ((v2 `B.shiftR` 2) .&. 0x0F) 207 !b2 = ((v2 .&. 0x03) `B.shiftL` 6) 208 .|. (v3 .&. 0x3F) 209 pokeElemOff dst oo b0 210 pokeElemOff dst (oo + 1) b1 211 pokeElemOff dst (oo + 2) b2 212 body_loop 213 (acc .|. v0 .|. v1 .|. v2 .|. v3) (i + 1) 214 acc <- body_loop 0 0 215 if acc .&. 0x80 /= 0 216 then pure False 217 else case pad of 218 0 -> pure True 219 1 -> do 220 let !ii = nbody `B.shiftL` 2 221 !oo = nbody * 3 222 c0 <- peekElemOff sp ii 223 c1 <- peekElemOff sp (ii + 1) 224 c2 <- peekElemOff sp (ii + 2) 225 v0 <- peekElemOff tp (fi c0) 226 v1 <- peekElemOff tp (fi c1) 227 v2 <- peekElemOff tp (fi c2) 228 let !tail_acc = v0 .|. v1 .|. v2 229 if tail_acc .&. 0x80 /= 0 || v2 .&. 0x03 /= 0 230 then pure False 231 else do 232 let !b0 = (v0 `B.shiftL` 2) 233 .|. ((v1 `B.shiftR` 4) .&. 0x03) 234 !b1 = ((v1 .&. 0x0F) `B.shiftL` 4) 235 .|. ((v2 `B.shiftR` 2) .&. 0x0F) 236 pokeElemOff dst oo b0 237 pokeElemOff dst (oo + 1) b1 238 pure True 239 _ -> do 240 let !ii = nbody `B.shiftL` 2 241 !oo = nbody * 3 242 c0 <- peekElemOff sp ii 243 c1 <- peekElemOff sp (ii + 1) 244 v0 <- peekElemOff tp (fi c0) 245 v1 <- peekElemOff tp (fi c1) 246 let !tail_acc = v0 .|. v1 247 if tail_acc .&. 0x80 /= 0 || v1 .&. 0x0F /= 0 248 then pure False 249 else do 250 let !b0 = (v0 `B.shiftL` 2) 251 .|. ((v1 `B.shiftR` 4) .&. 0x03) 252 pokeElemOff dst oo b0 253 pure True 254 pure $! if ok then Just (BI.PS fp 0 outlen) else Nothing