base16

Fast Haskell base16 encoding/decoding (docs.ppad.tech/base16).
git clone git://git.ppad.tech/base16.git
Log | Files | Refs | README | LICENSE

Base16.hs (6866B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE BangPatterns #-}
      3 {-# LANGUAGE OverloadedStrings #-}
      4 
      5 -- |
      6 -- Module: Data.ByteString.Base16
      7 -- Copyright: (c) 2025 Jared Tobin
      8 -- License: MIT
      9 -- Maintainer: Jared Tobin <jared@ppad.tech>
     10 --
     11 -- Pure base16 encoding and decoding of strict bytestrings.
     12 
     13 module Data.ByteString.Base16 (
     14     encode
     15   , decode
     16   ) where
     17 
     18 import qualified Data.Bits as B
     19 import Data.Bits ((.&.), (.|.))
     20 import qualified Data.ByteString as BS
     21 import qualified Data.ByteString.Base16.Arm as Arm
     22 import qualified Data.ByteString.Internal as BI
     23 import Data.Word (Word8, Word16)
     24 import Foreign.ForeignPtr (withForeignPtr)
     25 import Foreign.Ptr (Ptr, castPtr, plusPtr)
     26 import Foreign.Storable (peekElemOff, pokeElemOff)
     27 import System.IO.Unsafe (unsafeDupablePerformIO)
     28 
     29 fi :: (Num a, Integral b) => b -> a
     30 fi = fromIntegral
     31 {-# INLINE fi #-}
     32 
     33 -- 512-byte table.  Bytes [2k] and [2k+1] are the two lowercase ASCII
     34 -- hex characters representing the value k.  All-ASCII content means
     35 -- the bytestring 'IsString' rule rewrites this to 'unsafePackAddress'
     36 -- and the bytes live in static rodata.
     37 enc_tab :: BS.ByteString
     38 enc_tab =
     39   "000102030405060708090a0b0c0d0e0f\
     40   \101112131415161718191a1b1c1d1e1f\
     41   \202122232425262728292a2b2c2d2e2f\
     42   \303132333435363738393a3b3c3d3e3f\
     43   \404142434445464748494a4b4c4d4e4f\
     44   \505152535455565758595a5b5c5d5e5f\
     45   \606162636465666768696a6b6c6d6e6f\
     46   \707172737475767778797a7b7c7d7e7f\
     47   \808182838485868788898a8b8c8d8e8f\
     48   \909192939495969798999a9b9c9d9e9f\
     49   \a0a1a2a3a4a5a6a7a8a9aaabacadaeaf\
     50   \b0b1b2b3b4b5b6b7b8b9babbbcbdbebf\
     51   \c0c1c2c3c4c5c6c7c8c9cacbcccdcecf\
     52   \d0d1d2d3d4d5d6d7d8d9dadbdcdddedf\
     53   \e0e1e2e3e4e5e6e7e8e9eaebecedeeef\
     54   \f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"
     55 {-# NOINLINE enc_tab #-}
     56 
     57 -- 256-byte table.  Index by an ASCII byte to obtain its nibble; valid
     58 -- hex chars ('0'..'9', 'a'..'f', 'A'..'F') map to 0x10..0x1f, every
     59 -- other byte maps to 0x20.
     60 --
     61 -- The encoding is chosen so the literal is strictly ASCII and contains
     62 -- no embedded NUL, which is what the bytestring 'IsString' rule needs
     63 -- to rewrite it into 'unsafePackAddress' (cf. 'enc_tab') — the bytes
     64 -- end up in static rodata, with no CAF allocation.
     65 --
     66 -- The 0x20 sentinel is distinguished by bit 5; no value 0x10..0x1f
     67 -- carries that bit, so 'decode' OR-folds every lookup into an
     68 -- accumulator and tests 'acc .&. 0x20 == 0' once at the end.  The
     69 -- output byte is '(n0 `shiftL` 4) .|. (n1 .&. 0x0f)': in 'Word8' the
     70 -- shift naturally drops bit 4, and the mask isolates the low nibble.
     71 dec_tab :: BS.ByteString
     72 dec_tab =
     73   "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     74   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     75   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     76   \\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x20\x20\x20\x20\x20\x20\
     77   \\x20\x1a\x1b\x1c\x1d\x1e\x1f\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     78   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     79   \\x20\x1a\x1b\x1c\x1d\x1e\x1f\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     80   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     81   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     82   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     83   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     84   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     85   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     86   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     87   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\
     88   \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20"
     89 {-# NOINLINE dec_tab #-}
     90 
     91 -- | Encode a base256 'ByteString' as base16.
     92 --
     93 --   Uses ARM NEON extensions when available, otherwise a pure
     94 --   Haskell scalar loop.
     95 --
     96 --   >>> encode "hello world"
     97 --   "68656c6c6f20776f726c64"
     98 encode :: BS.ByteString -> BS.ByteString
     99 encode bs
    100   | Arm.base16_arm_available = Arm.encode bs
    101   | otherwise = encode_scalar bs
    102 {-# INLINABLE encode #-}
    103 
    104 -- | Decode a base16 'ByteString' to base256.
    105 --
    106 --   Uses ARM NEON extensions when available, otherwise a pure
    107 --   Haskell scalar loop.  Invalid inputs (including odd-length
    108 --   inputs) will produce 'Nothing'.
    109 --
    110 --   >>> decode "68656c6c6f20776f726c64"
    111 --   Just "hello world"
    112 --   >>> decode "068656c6c6f20776f726c64" -- odd-length
    113 --   Nothing
    114 decode :: BS.ByteString -> Maybe BS.ByteString
    115 decode bs
    116   | Arm.base16_arm_available = Arm.decode bs
    117   | otherwise = decode_scalar bs
    118 {-# INLINABLE decode #-}
    119 
    120 encode_scalar :: BS.ByteString -> BS.ByteString
    121 encode_scalar (BI.PS sfp soff l) =
    122   case enc_tab of
    123     BI.PS tfp toff _ ->
    124       BI.unsafeCreate (l `B.shiftL` 1) $ \dst ->
    125         withForeignPtr sfp $ \sp0 ->
    126         withForeignPtr tfp $ \tp0 -> do
    127           -- read 'enc_tab' and write 'dst' as 'Word16' pairs.  The
    128           -- two-byte block at 'enc_tab[2*b]' and the two-byte block
    129           -- at 'dst[2*i]' share the same byte layout in memory, so
    130           -- this is endianness-safe: we never inspect the numerical
    131           -- value of the 'Word16', we just shuffle 16 bits between
    132           -- two locations.
    133           let !sp = sp0 `plusPtr` soff :: Ptr Word8
    134               !tp = tp0 `plusPtr` toff :: Ptr Word16
    135               !dp = castPtr dst        :: Ptr Word16
    136               loop !i
    137                 | i == l    = pure ()
    138                 | otherwise = do
    139                     b <- peekElemOff sp i
    140                     w <- peekElemOff tp (fi b)
    141                     pokeElemOff dp i (w :: Word16)
    142                     loop (i + 1)
    143           loop 0
    144 
    145 decode_scalar :: BS.ByteString -> Maybe BS.ByteString
    146 decode_scalar (BI.PS sfp soff l)
    147   | B.testBit l 0 = Nothing
    148   | otherwise = case dec_tab of
    149       BI.PS tfp toff _ -> unsafeDupablePerformIO $ do
    150         let !n = l `B.shiftR` 1
    151         fp <- BI.mallocByteString n
    152         ok <- withForeignPtr fp  $ \dst ->
    153               withForeignPtr sfp $ \sp0 ->
    154               withForeignPtr tfp $ \tp0 -> do
    155                 let !sp = sp0 `plusPtr` soff :: Ptr Word8
    156                     !tp = tp0 `plusPtr` toff :: Ptr Word8
    157                     loop !i !acc
    158                       | i == n    =
    159                           pure $! acc .&. 0x20 == 0
    160                       | otherwise = do
    161                           let !o = i `B.shiftL` 1
    162                           c0 <- peekElemOff sp  o
    163                           c1 <- peekElemOff sp (o + 1)
    164                           n0 <- peekElemOff tp (fi c0)
    165                           n1 <- peekElemOff tp (fi c1)
    166                           let !b = (n0 `B.shiftL` 4)
    167                                .|. (n1 .&. 0x0f)
    168                           pokeElemOff dst i b
    169                           loop (i + 1) (acc .|. n0 .|. n1)
    170                 loop 0 0
    171         pure $! if ok then Just (BI.PS fp 0 n) else Nothing