base64

Fast Haskell base64 encoding/decoding (docs.ppad.tech/base64).
git clone git://git.ppad.tech/base64.git
Log | Files | Refs | README | LICENSE

Base64.hs (10970B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE BangPatterns #-}
      3 {-# LANGUAGE OverloadedStrings #-}
      4 
      5 -- |
      6 -- Module: Data.ByteString.Base64
      7 -- Copyright: (c) 2026 Jared Tobin
      8 -- License: MIT
      9 -- Maintainer: Jared Tobin <jared@ppad.tech>
     10 --
     11 -- Pure base64 encoding and decoding of strict bytestrings.
     12 
     13 module Data.ByteString.Base64 (
     14     encode
     15   , decode
     16   ) where
     17 
     18 import qualified Data.Bits as B
     19 import Data.Bits ((.&.), (.|.))
     20 import qualified Data.ByteString as BS
     21 import qualified Data.ByteString.Base64.Arm as Arm
     22 import qualified Data.ByteString.Internal as BI
     23 import Data.Word (Word8)
     24 import Foreign.ForeignPtr (withForeignPtr)
     25 import Foreign.Ptr (Ptr, plusPtr)
     26 import Foreign.Storable (peekElemOff, pokeElemOff)
     27 import System.IO.Unsafe (unsafeDupablePerformIO)
     28 
     29 fi :: (Num a, Integral b) => b -> a
     30 fi = fromIntegral
     31 {-# INLINE fi #-}
     32 
     33 -- 64-byte table.  Indexed by 6-bit value (0..63), yields the
     34 -- corresponding base64 alphabet character.  All-ASCII content means
     35 -- the bytestring 'IsString' rule rewrites this to 'unsafePackAddress'
     36 -- and the bytes live in static rodata.
     37 enc_tab :: BS.ByteString
     38 enc_tab =
     39   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
     40 {-# NOINLINE enc_tab #-}
     41 
     42 -- 256-byte table.  Index by an ASCII byte to obtain its 6-bit value;
     43 -- valid base64 chars ('A'..'Z', 'a'..'z', '0'..'9', '+', '/') map to
     44 -- 0x40..0x7f, every other byte (including '=') maps to 0x80.
     45 --
     46 -- The encoding is chosen so the literal is strictly ASCII and contains
     47 -- no embedded NUL, which is what the bytestring 'IsString' rule needs
     48 -- to rewrite it into 'unsafePackAddress' (cf. 'enc_tab') — the bytes
     49 -- end up in static rodata, with no CAF allocation.
     50 --
     51 -- The 0x80 sentinel is distinguished by bit 7; no value 0x40..0x7f
     52 -- carries that bit, so 'decode' OR-folds every lookup into an
     53 -- accumulator and tests 'acc .&. 0x80 == 0' once at the end.  The
     54 -- low 6 bits of each entry are the 6-bit value, possibly contaminated
     55 -- by the 0x40 flag bit; the b0/b1/b2 formulas mask each subexpression
     56 -- before combining so the flag never bleeds into the output bytes.
     57 dec_tab :: BS.ByteString
     58 dec_tab =
     59   "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     60   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     61   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x7E\x80\x80\x80\x7F\
     62   \\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x80\x80\x80\x80\x80\x80\
     63   \\x80\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4A\x4B\x4C\x4D\x4E\
     64   \\x4F\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x80\x80\x80\x80\x80\
     65   \\x80\x5A\x5B\x5C\x5D\x5E\x5F\x60\x61\x62\x63\x64\x65\x66\x67\x68\
     66   \\x69\x6A\x6B\x6C\x6D\x6E\x6F\x70\x71\x72\x73\x80\x80\x80\x80\x80\
     67   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     68   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     69   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     70   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     71   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     72   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     73   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\
     74   \\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80"
     75 {-# NOINLINE dec_tab #-}
     76 
     77 -- | Encode a base256 'ByteString' as base64.
     78 --
     79 --   Uses ARM NEON extensions when available, otherwise a pure
     80 --   Haskell scalar loop.
     81 --
     82 --   >>> encode "hello world"
     83 --   "aGVsbG8gd29ybGQ="
     84 encode :: BS.ByteString -> BS.ByteString
     85 encode bs
     86   | Arm.base64_arm_available = Arm.encode bs
     87   | otherwise                = encode_scalar bs
     88 {-# INLINABLE encode #-}
     89 
     90 -- | Decode a base64 'ByteString' to base256.
     91 --
     92 --   Uses ARM NEON extensions when available, otherwise a pure
     93 --   Haskell scalar loop.  Invalid inputs (including incorrectly-
     94 --   padded or non-canonical inputs) will produce 'Nothing'.
     95 --
     96 --   >>> decode "aGVsbG8gd29ybGQ="
     97 --   Just "hello world"
     98 --   >>> decode "aGVsbG8gd29ybGQ" -- missing padding
     99 --   Nothing
    100 decode :: BS.ByteString -> Maybe BS.ByteString
    101 decode bs
    102   | Arm.base64_arm_available = Arm.decode bs
    103   | otherwise                = decode_scalar bs
    104 {-# INLINABLE decode #-}
    105 
    106 encode_scalar :: BS.ByteString -> BS.ByteString
    107 encode_scalar (BI.PS sfp soff l) =
    108   case enc_tab of
    109     BI.PS tfp toff _ ->
    110       BI.unsafeCreate ((l + 2) `quot` 3 * 4) $ \dst ->
    111         withForeignPtr sfp $ \sp0 ->
    112         withForeignPtr tfp $ \tp0 -> do
    113           let !sp = sp0 `plusPtr` soff :: Ptr Word8
    114               !tp = tp0 `plusPtr` toff :: Ptr Word8
    115               !nfull = l `quot` 3
    116               !rmn   = l - nfull * 3
    117               loop !i
    118                 | i == nfull = pure ()
    119                 | otherwise = do
    120                     let !ii = i * 3
    121                         !oo = i * 4
    122                     b0 <- peekElemOff sp ii
    123                     b1 <- peekElemOff sp (ii + 1)
    124                     b2 <- peekElemOff sp (ii + 2)
    125                     c0 <- peekElemOff tp (fi (b0 `B.shiftR` 2))
    126                     c1 <- peekElemOff tp (fi
    127                             (((b0 .&. 0x03) `B.shiftL` 4)
    128                          .|.  (b1 `B.shiftR` 4)))
    129                     c2 <- peekElemOff tp (fi
    130                             (((b1 .&. 0x0F) `B.shiftL` 2)
    131                          .|.  (b2 `B.shiftR` 6)))
    132                     c3 <- peekElemOff tp (fi (b2 .&. 0x3F))
    133                     pokeElemOff dst  oo      (c0 :: Word8)
    134                     pokeElemOff dst (oo + 1) c1
    135                     pokeElemOff dst (oo + 2) c2
    136                     pokeElemOff dst (oo + 3) c3
    137                     loop (i + 1)
    138           loop 0
    139           case rmn of
    140             0 -> pure ()
    141             1 -> do
    142               let !ii = nfull * 3
    143                   !oo = nfull * 4
    144               b0 <- peekElemOff sp ii
    145               c0 <- peekElemOff tp (fi (b0 `B.shiftR` 2))
    146               c1 <- peekElemOff tp (fi ((b0 .&. 0x03) `B.shiftL` 4))
    147               pokeElemOff dst  oo      (c0 :: Word8)
    148               pokeElemOff dst (oo + 1) c1
    149               pokeElemOff dst (oo + 2) 0x3D
    150               pokeElemOff dst (oo + 3) 0x3D
    151             _ -> do
    152               let !ii = nfull * 3
    153                   !oo = nfull * 4
    154               b0 <- peekElemOff sp ii
    155               b1 <- peekElemOff sp (ii + 1)
    156               c0 <- peekElemOff tp (fi (b0 `B.shiftR` 2))
    157               c1 <- peekElemOff tp (fi
    158                       (((b0 .&. 0x03) `B.shiftL` 4)
    159                    .|.  (b1 `B.shiftR` 4)))
    160               c2 <- peekElemOff tp (fi ((b1 .&. 0x0F) `B.shiftL` 2))
    161               pokeElemOff dst  oo      (c0 :: Word8)
    162               pokeElemOff dst (oo + 1) c1
    163               pokeElemOff dst (oo + 2) c2
    164               pokeElemOff dst (oo + 3) 0x3D
    165 
    166 decode_scalar :: BS.ByteString -> Maybe BS.ByteString
    167 decode_scalar (BI.PS sfp soff l)
    168   | l == 0          = Just BS.empty
    169   | l .&. 0x03 /= 0 = Nothing
    170   | otherwise = case dec_tab of
    171       BI.PS tfp toff _ -> unsafeDupablePerformIO $
    172         withForeignPtr sfp $ \sp0 ->
    173         withForeignPtr tfp $ \tp0 -> do
    174           let !sp = sp0 `plusPtr` soff :: Ptr Word8
    175               !tp = tp0 `plusPtr` toff :: Ptr Word8
    176           c_pre <- peekElemOff sp (l - 2)
    177           c_end <- peekElemOff sp (l - 1)
    178           let !pad_pre = c_pre == 0x3D
    179               !pad_end = c_end == 0x3D
    180           if pad_pre && not pad_end
    181             then pure Nothing
    182             else do
    183               let !pad = (if pad_pre then 2 else if pad_end then 1 else 0)
    184                        :: Int
    185                   !nfull  = l `B.shiftR` 2
    186                   !nbody  = if pad > 0 then nfull - 1 else nfull
    187                   !outlen = nfull * 3 - pad
    188               fp <- BI.mallocByteString outlen
    189               ok <- withForeignPtr fp $ \dst -> do
    190                 let body_loop !acc !i
    191                       | i == nbody = pure acc
    192                       | otherwise = do
    193                           let !ii = i `B.shiftL` 2
    194                               !oo = i * 3
    195                           c0 <- peekElemOff sp  ii
    196                           c1 <- peekElemOff sp (ii + 1)
    197                           c2 <- peekElemOff sp (ii + 2)
    198                           c3 <- peekElemOff sp (ii + 3)
    199                           v0 <- peekElemOff tp (fi c0)
    200                           v1 <- peekElemOff tp (fi c1)
    201                           v2 <- peekElemOff tp (fi c2)
    202                           v3 <- peekElemOff tp (fi c3)
    203                           let !b0 = (v0 `B.shiftL` 2)
    204                                 .|. ((v1 `B.shiftR` 4) .&. 0x03)
    205                               !b1 = ((v1 .&. 0x0F) `B.shiftL` 4)
    206                                 .|. ((v2 `B.shiftR` 2) .&. 0x0F)
    207                               !b2 = ((v2 .&. 0x03) `B.shiftL` 6)
    208                                 .|. (v3 .&. 0x3F)
    209                           pokeElemOff dst  oo      b0
    210                           pokeElemOff dst (oo + 1) b1
    211                           pokeElemOff dst (oo + 2) b2
    212                           body_loop
    213                             (acc .|. v0 .|. v1 .|. v2 .|. v3) (i + 1)
    214                 acc <- body_loop 0 0
    215                 if acc .&. 0x80 /= 0
    216                   then pure False
    217                   else case pad of
    218                     0 -> pure True
    219                     1 -> do
    220                       let !ii = nbody `B.shiftL` 2
    221                           !oo = nbody * 3
    222                       c0 <- peekElemOff sp  ii
    223                       c1 <- peekElemOff sp (ii + 1)
    224                       c2 <- peekElemOff sp (ii + 2)
    225                       v0 <- peekElemOff tp (fi c0)
    226                       v1 <- peekElemOff tp (fi c1)
    227                       v2 <- peekElemOff tp (fi c2)
    228                       let !tail_acc = v0 .|. v1 .|. v2
    229                       if tail_acc .&. 0x80 /= 0 || v2 .&. 0x03 /= 0
    230                         then pure False
    231                         else do
    232                           let !b0 = (v0 `B.shiftL` 2)
    233                                 .|. ((v1 `B.shiftR` 4) .&. 0x03)
    234                               !b1 = ((v1 .&. 0x0F) `B.shiftL` 4)
    235                                 .|. ((v2 `B.shiftR` 2) .&. 0x0F)
    236                           pokeElemOff dst  oo      b0
    237                           pokeElemOff dst (oo + 1) b1
    238                           pure True
    239                     _ -> do
    240                       let !ii = nbody `B.shiftL` 2
    241                           !oo = nbody * 3
    242                       c0 <- peekElemOff sp  ii
    243                       c1 <- peekElemOff sp (ii + 1)
    244                       v0 <- peekElemOff tp (fi c0)
    245                       v1 <- peekElemOff tp (fi c1)
    246                       let !tail_acc = v0 .|. v1
    247                       if tail_acc .&. 0x80 /= 0 || v1 .&. 0x0F /= 0
    248                         then pure False
    249                         else do
    250                           let !b0 = (v0 `B.shiftL` 2)
    251                                 .|. ((v1 `B.shiftR` 4) .&. 0x03)
    252                           pokeElemOff dst oo b0
    253                           pure True
    254               pure $! if ok then Just (BI.PS fp 0 outlen) else Nothing