Base16.hs (6866B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE BangPatterns #-} 3 {-# LANGUAGE OverloadedStrings #-} 4 5 -- | 6 -- Module: Data.ByteString.Base16 7 -- Copyright: (c) 2025 Jared Tobin 8 -- License: MIT 9 -- Maintainer: Jared Tobin <jared@ppad.tech> 10 -- 11 -- Pure base16 encoding and decoding of strict bytestrings. 12 13 module Data.ByteString.Base16 ( 14 encode 15 , decode 16 ) where 17 18 import qualified Data.Bits as B 19 import Data.Bits ((.&.), (.|.)) 20 import qualified Data.ByteString as BS 21 import qualified Data.ByteString.Base16.Arm as Arm 22 import qualified Data.ByteString.Internal as BI 23 import Data.Word (Word8, Word16) 24 import Foreign.ForeignPtr (withForeignPtr) 25 import Foreign.Ptr (Ptr, castPtr, plusPtr) 26 import Foreign.Storable (peekElemOff, pokeElemOff) 27 import System.IO.Unsafe (unsafeDupablePerformIO) 28 29 fi :: (Num a, Integral b) => b -> a 30 fi = fromIntegral 31 {-# INLINE fi #-} 32 33 -- 512-byte table. Bytes [2k] and [2k+1] are the two lowercase ASCII 34 -- hex characters representing the value k. All-ASCII content means 35 -- the bytestring 'IsString' rule rewrites this to 'unsafePackAddress' 36 -- and the bytes live in static rodata. 37 enc_tab :: BS.ByteString 38 enc_tab = 39 "000102030405060708090a0b0c0d0e0f\ 40 \101112131415161718191a1b1c1d1e1f\ 41 \202122232425262728292a2b2c2d2e2f\ 42 \303132333435363738393a3b3c3d3e3f\ 43 \404142434445464748494a4b4c4d4e4f\ 44 \505152535455565758595a5b5c5d5e5f\ 45 \606162636465666768696a6b6c6d6e6f\ 46 \707172737475767778797a7b7c7d7e7f\ 47 \808182838485868788898a8b8c8d8e8f\ 48 \909192939495969798999a9b9c9d9e9f\ 49 \a0a1a2a3a4a5a6a7a8a9aaabacadaeaf\ 50 \b0b1b2b3b4b5b6b7b8b9babbbcbdbebf\ 51 \c0c1c2c3c4c5c6c7c8c9cacbcccdcecf\ 52 \d0d1d2d3d4d5d6d7d8d9dadbdcdddedf\ 53 \e0e1e2e3e4e5e6e7e8e9eaebecedeeef\ 54 \f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff" 55 {-# NOINLINE enc_tab #-} 56 57 -- 256-byte table. Index by an ASCII byte to obtain its nibble; valid 58 -- hex chars ('0'..'9', 'a'..'f', 'A'..'F') map to 0x10..0x1f, every 59 -- other byte maps to 0x20. 60 -- 61 -- The encoding is chosen so the literal is strictly ASCII and contains 62 -- no embedded NUL, which is what the bytestring 'IsString' rule needs 63 -- to rewrite it into 'unsafePackAddress' (cf. 'enc_tab') — the bytes 64 -- end up in static rodata, with no CAF allocation. 65 -- 66 -- The 0x20 sentinel is distinguished by bit 5; no value 0x10..0x1f 67 -- carries that bit, so 'decode' OR-folds every lookup into an 68 -- accumulator and tests 'acc .&. 0x20 == 0' once at the end. The 69 -- output byte is '(n0 `shiftL` 4) .|. (n1 .&. 0x0f)': in 'Word8' the 70 -- shift naturally drops bit 4, and the mask isolates the low nibble. 71 dec_tab :: BS.ByteString 72 dec_tab = 73 "\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 74 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 75 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 76 \\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x20\x20\x20\x20\x20\x20\ 77 \\x20\x1a\x1b\x1c\x1d\x1e\x1f\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 78 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 79 \\x20\x1a\x1b\x1c\x1d\x1e\x1f\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 80 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 81 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 82 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 83 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 84 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 85 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 86 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 87 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ 88 \\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20" 89 {-# NOINLINE dec_tab #-} 90 91 -- | Encode a base256 'ByteString' as base16. 92 -- 93 -- Uses ARM NEON extensions when available, otherwise a pure 94 -- Haskell scalar loop. 95 -- 96 -- >>> encode "hello world" 97 -- "68656c6c6f20776f726c64" 98 encode :: BS.ByteString -> BS.ByteString 99 encode bs 100 | Arm.base16_arm_available = Arm.encode bs 101 | otherwise = encode_scalar bs 102 {-# INLINABLE encode #-} 103 104 -- | Decode a base16 'ByteString' to base256. 105 -- 106 -- Uses ARM NEON extensions when available, otherwise a pure 107 -- Haskell scalar loop. Invalid inputs (including odd-length 108 -- inputs) will produce 'Nothing'. 109 -- 110 -- >>> decode "68656c6c6f20776f726c64" 111 -- Just "hello world" 112 -- >>> decode "068656c6c6f20776f726c64" -- odd-length 113 -- Nothing 114 decode :: BS.ByteString -> Maybe BS.ByteString 115 decode bs 116 | Arm.base16_arm_available = Arm.decode bs 117 | otherwise = decode_scalar bs 118 {-# INLINABLE decode #-} 119 120 encode_scalar :: BS.ByteString -> BS.ByteString 121 encode_scalar (BI.PS sfp soff l) = 122 case enc_tab of 123 BI.PS tfp toff _ -> 124 BI.unsafeCreate (l `B.shiftL` 1) $ \dst -> 125 withForeignPtr sfp $ \sp0 -> 126 withForeignPtr tfp $ \tp0 -> do 127 -- read 'enc_tab' and write 'dst' as 'Word16' pairs. The 128 -- two-byte block at 'enc_tab[2*b]' and the two-byte block 129 -- at 'dst[2*i]' share the same byte layout in memory, so 130 -- this is endianness-safe: we never inspect the numerical 131 -- value of the 'Word16', we just shuffle 16 bits between 132 -- two locations. 133 let !sp = sp0 `plusPtr` soff :: Ptr Word8 134 !tp = tp0 `plusPtr` toff :: Ptr Word16 135 !dp = castPtr dst :: Ptr Word16 136 loop !i 137 | i == l = pure () 138 | otherwise = do 139 b <- peekElemOff sp i 140 w <- peekElemOff tp (fi b) 141 pokeElemOff dp i (w :: Word16) 142 loop (i + 1) 143 loop 0 144 145 decode_scalar :: BS.ByteString -> Maybe BS.ByteString 146 decode_scalar (BI.PS sfp soff l) 147 | B.testBit l 0 = Nothing 148 | otherwise = case dec_tab of 149 BI.PS tfp toff _ -> unsafeDupablePerformIO $ do 150 let !n = l `B.shiftR` 1 151 fp <- BI.mallocByteString n 152 ok <- withForeignPtr fp $ \dst -> 153 withForeignPtr sfp $ \sp0 -> 154 withForeignPtr tfp $ \tp0 -> do 155 let !sp = sp0 `plusPtr` soff :: Ptr Word8 156 !tp = tp0 `plusPtr` toff :: Ptr Word8 157 loop !i !acc 158 | i == n = 159 pure $! acc .&. 0x20 == 0 160 | otherwise = do 161 let !o = i `B.shiftL` 1 162 c0 <- peekElemOff sp o 163 c1 <- peekElemOff sp (o + 1) 164 n0 <- peekElemOff tp (fi c0) 165 n1 <- peekElemOff tp (fi c1) 166 let !b = (n0 `B.shiftL` 4) 167 .|. (n1 .&. 0x0f) 168 pokeElemOff dst i b 169 loop (i + 1) (acc .|. n0 .|. n1) 170 loop 0 0 171 pure $! if ok then Just (BI.PS fp 0 n) else Nothing