base16

Pure Haskell base16 encoding/decoding (docs.ppad.tech/base16).
git clone git://git.ppad.tech/base16.git
Log | Files | Refs | README | LICENSE

commit 172805a1500ccf81594d82cd189dfc81a562d43a
parent 58dfb7922401a60d5de76825fcd5f6ecbcd7afe0
Author: Jared Tobin <jared@jtobin.io>
Date:   Sat, 16 May 2026 11:27:12 -0230

lib: drop bytestring builder, use unsafeCreate + lookup tables

encode and decode now allocate the output buffer once with the exact
final size and fill it via direct Ptr writes, instead of accumulating
a Builder and collapsing it with toLazyByteString/toStrict.

- 512-byte enc_tab CAF, all-ASCII so the bytestring IsString rule
  rewrites it to unsafePackAddress and the bytes live in rodata
- 256-byte dec_tab CAF mapping byte -> nibble, with 0x80 as the
  invalid sentinel; OR-accumulator gives branchless validity, checked
  once after the loop
- encode body is unsafeCreate (2*l) + a per-byte Ptr loop
- decode body is mallocByteString (l/2) + unsafeDupablePerformIO

Resulting Core is a joinrec with Int#/Word8#/State# args calling
readWord8OffAddr# and writeWord8OffAddr# directly; no allocation in
the hot path.

On 1 KiB inputs (aarch64, GHC 9.10.3, LLVM 19, -f+llvm):
- encode: 3032 ns -> 296 ns   (10.2x)
- decode:  596 ns -> 394 ns   ( 1.5x)
- encode alloc: 49,696 B ->  3,872 B
- decode alloc: 17,960 B ->  3,992 B

Allocation now matches the C-backed base16-bytestring on both paths.
Existing tasty suite (5000 QC cases x 3 properties + uppercase HUnit)
passes unchanged.

Diffstat:
Mlib/Data/ByteString/Base16.hs | 353+++++++++++++++++++++----------------------------------------------------------
1 file changed, 92 insertions(+), 261 deletions(-)

diff --git a/lib/Data/ByteString/Base16.hs b/lib/Data/ByteString/Base16.hs @@ -1,7 +1,5 @@ {-# OPTIONS_HADDOCK prune #-} -{-# LANGUAGE ApplicativeDo #-} {-# LANGUAGE BangPatterns #-} -{-# LANGUAGE BinaryLiterals #-} {-# LANGUAGE OverloadedStrings #-} -- | @@ -20,116 +18,84 @@ module Data.ByteString.Base16 ( import qualified Data.Bits as B import Data.Bits ((.&.), (.|.)) import qualified Data.ByteString as BS -import qualified Data.ByteString.Builder as BSB -import qualified Data.ByteString.Builder.Extra as BE import qualified Data.ByteString.Internal as BI -import qualified Data.ByteString.Unsafe as BU -import Data.Word (Word8, Word16) - -to_strict :: BSB.Builder -> BS.ByteString -to_strict = BS.toStrict . BSB.toLazyByteString -{-# INLINE to_strict #-} - -to_strict_small :: BSB.Builder -> BS.ByteString -to_strict_small = BS.toStrict - . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty -{-# INLINE to_strict_small #-} +import Data.Word (Word8) +import Foreign.ForeignPtr (withForeignPtr) +import Foreign.Ptr (Ptr, plusPtr) +import Foreign.Storable (peekElemOff, pokeElemOff) +import System.IO.Unsafe (unsafeDupablePerformIO) fi :: (Num a, Integral b) => b -> a fi = fromIntegral {-# INLINE fi #-} -hex_charset :: BS.ByteString -hex_charset = "0123456789abcdef" - -expand_w8 :: Word8 -> Word16 -expand_w8 b = - let !hi = BU.unsafeIndex hex_charset (fi b `B.shiftR` 4) - !lo = BU.unsafeIndex hex_charset (fi b .&. 0b00001111) - in fi hi `B.shiftL` 8 - .|. fi lo -{-# INLINE expand_w8 #-} +-- 512-byte table. Bytes [2k] and [2k+1] are the two lowercase ASCII +-- hex characters representing the value k. All-ASCII content means +-- the bytestring 'IsString' rule rewrites this to 'unsafePackAddress' +-- and the bytes live in static rodata. +enc_tab :: BS.ByteString +enc_tab = + "000102030405060708090a0b0c0d0e0f\ + \101112131415161718191a1b1c1d1e1f\ + \202122232425262728292a2b2c2d2e2f\ + \303132333435363738393a3b3c3d3e3f\ + \404142434445464748494a4b4c4d4e4f\ + \505152535455565758595a5b5c5d5e5f\ + \606162636465666768696a6b6c6d6e6f\ + \707172737475767778797a7b7c7d7e7f\ + \808182838485868788898a8b8c8d8e8f\ + \909192939495969798999a9b9c9d9e9f\ + \a0a1a2a3a4a5a6a7a8a9aaabacadaeaf\ + \b0b1b2b3b4b5b6b7b8b9babbbcbdbebf\ + \c0c1c2c3c4c5c6c7c8c9cacbcccdcecf\ + \d0d1d2d3d4d5d6d7d8d9dadbdcdddedf\ + \e0e1e2e3e4e5e6e7e8e9eaebecedeeef\ + \f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff" +{-# NOINLINE enc_tab #-} + +-- 256-byte table. Index by an ASCII byte to obtain its nibble value +-- (0..15); invalid bytes map to 0x80. Built once at first use, with +-- no intermediate list (which would balloon CAF allocation). +dec_tab :: BS.ByteString +dec_tab = BI.unsafeCreate 256 $ \p -> + let go !i + | i == 256 = pure () + | otherwise = pokeElemOff p i (nib i) >> go (i + 1) + in go 0 + where + nib :: Int -> Word8 + nib c + | c >= 0x30 && c <= 0x39 = fi (c - 0x30) -- '0'..'9' + | c >= 0x41 && c <= 0x46 = fi (c - 0x37) -- 'A'..'F' + | c >= 0x61 && c <= 0x66 = fi (c - 0x57) -- 'a'..'f' + | otherwise = 0x80 +{-# NOINLINE dec_tab #-} -- | Encode a base256 'ByteString' as base16. -- -- >>> encode "hello world" -- "68656c6c6f20776f726c64" encode :: BS.ByteString -> BS.ByteString -encode bs@(BI.PS _ _ l) - | l < 64 = to_strict_small loop - | otherwise = to_strict loop - where - -- writing as few words as possible requires performing some length - -- checks up front - loop - | l `rem` 4 == 0 = - go64 bs - | (l - 3) `rem` 4 == 0 = case BS.splitAt (l - 3) bs of - (chunk, etc) -> - go64 chunk - <> go32 (BU.unsafeTake 2 etc) - <> go16 (BU.unsafeDrop 2 etc) - | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of - (chunk, etc) -> - go64 chunk - <> go32 etc - | (l - 1) `rem` 4 == 0 = case BS.splitAt (l - 1) bs of - (chunk, etc) -> - go64 chunk - <> go16 etc - - | l `rem` 2 == 0 = - go32 bs - | (l - 1) `rem` 2 == 0 = case BS.splitAt (l - 1) bs of - (chunk, etc) -> - go32 chunk - <> go16 etc - - | otherwise = - go16 bs - - go64 b = case BS.splitAt 4 b of - (chunk, etc) - | BS.null chunk -> mempty - | otherwise -> - let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) - !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) - !w16_2 = expand_w8 (BU.unsafeIndex chunk 2) - !w16_3 = expand_w8 (BU.unsafeIndex chunk 3) - - !w64 = fi w16_0 `B.shiftL` 48 - .|. fi w16_1 `B.shiftL` 32 - .|. fi w16_2 `B.shiftL` 16 - .|. fi w16_3 - - in BSB.word64BE w64 <> go64 etc - - go32 b = case BS.splitAt 2 b of - (chunk, etc) - | BS.null chunk -> mempty - | otherwise -> - let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) - !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) - - !w32 = fi w16_0 `B.shiftL` 16 - .|. fi w16_1 - - in BSB.word32BE w32 <> go32 etc - - go16 b = case BS.uncons b of - Nothing -> mempty - Just (h, t) -> - let !w16 = expand_w8 h - in BSB.word16BE w16 <> go16 t - --- word8 hex character to word4 -word4 :: Word8 -> Maybe Word8 -word4 c - | c > 47 && c < 58 = pure $! c - 48 -- 0-9 - | c > 64 && c < 71 = pure $! c - 55 -- A-F - | c > 96 && c < 103 = pure $! c - 87 -- a-f - | otherwise = Nothing -{-# INLINE word4 #-} +encode (BI.PS sfp soff l) = + case enc_tab of + BI.PS tfp toff _ -> + BI.unsafeCreate (l `B.shiftL` 1) $ \dst -> + withForeignPtr sfp $ \sp0 -> + withForeignPtr tfp $ \tp0 -> do + let !sp = sp0 `plusPtr` soff :: Ptr Word8 + !tp = tp0 `plusPtr` toff :: Ptr Word8 + loop !i + | i == l = pure () + | otherwise = do + b <- peekElemOff sp i + let !j = fi b `B.shiftL` 1 + !o = i `B.shiftL` 1 + hi <- peekElemOff tp j + lo <- peekElemOff tp (j + 1) + pokeElemOff dst o hi + pokeElemOff dst (o + 1) lo + loop (i + 1) + loop 0 -- | Decode a base16 'ByteString' to base256. -- @@ -141,164 +107,29 @@ word4 c -- >>> decode "068656c6c6f20776f726c64" -- odd-length -- Nothing decode :: BS.ByteString -> Maybe BS.ByteString -decode bs@(BI.PS _ _ l) - | B.testBit l 0 = Nothing - | l `quot` 2 < 128 = fmap to_strict_small loop - | otherwise = fmap to_strict loop - where - -- same story, but we need more checks - loop - | l `rem` 16 == 0 = - go64 mempty bs - | (l - 2) `rem` 16 == 0 = case BS.splitAt (l - 2) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - go8 b0 etc - | (l - 4) `rem` 16 == 0 = case BS.splitAt (l - 4) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - go16 b0 etc - | (l - 6) `rem` 16 == 0 = case BS.splitAt (l - 6) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - b1 <- go16 b0 (BU.unsafeTake 4 etc) - go8 b1 (BU.unsafeDrop 4 etc) - | (l - 8) `rem` 16 == 0 = case BS.splitAt (l - 8) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - go32 b0 etc - | (l - 10) `rem` 16 == 0 = case BS.splitAt (l - 10) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - b1 <- go32 b0 (BU.unsafeTake 8 etc) - go8 b1 (BU.unsafeDrop 8 etc) - | (l - 12) `rem` 16 == 0 = case BS.splitAt (l - 12) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - b1 <- go32 b0 (BU.unsafeTake 8 etc) - go16 b1 (BU.unsafeDrop 8 etc) - | (l - 14) `rem` 16 == 0 = case BS.splitAt (l - 14) bs of - (chunk, etc) -> do - b0 <- go64 mempty chunk - b1 <- go32 b0 (BU.unsafeTake 8 etc) - b2 <- go16 b1 (BU.unsafeTake 4 (BU.unsafeDrop 8 etc)) - go8 b2 (BU.unsafeDrop 12 etc) - - | l `rem` 8 == 0 = - go32 mempty bs - | (l - 2) `rem` 8 == 0 = case BS.splitAt (l - 2) bs of - (chunk, etc) -> do - b0 <- go32 mempty chunk - go8 b0 etc - | (l - 4) `rem` 8 == 0 = case BS.splitAt (l - 4) bs of - (chunk, etc) -> do - b0 <- go32 mempty chunk - go16 b0 etc - | (l - 6) `rem` 8 == 0 = case BS.splitAt (l - 6) bs of - (chunk, etc) -> do - b0 <- go32 mempty chunk - b1 <- go16 b0 (BU.unsafeTake 4 etc) - go8 b1 (BU.unsafeDrop 4 etc) - - | l `rem` 4 == 0 = - go16 mempty bs - | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of - (chunk, etc) -> do - b0 <- go16 mempty chunk - go8 b0 etc - - | otherwise = - go8 mempty bs - - go64 acc b = case BS.splitAt 16 b of - (chunk, etc) - | BS.null chunk -> pure acc - | otherwise -> do - !w4_00 <- word4 (BU.unsafeIndex chunk 00) - !w4_01 <- word4 (BU.unsafeIndex chunk 01) - !w4_02 <- word4 (BU.unsafeIndex chunk 02) - !w4_03 <- word4 (BU.unsafeIndex chunk 03) - !w4_04 <- word4 (BU.unsafeIndex chunk 04) - !w4_05 <- word4 (BU.unsafeIndex chunk 05) - !w4_06 <- word4 (BU.unsafeIndex chunk 06) - !w4_07 <- word4 (BU.unsafeIndex chunk 07) - !w4_08 <- word4 (BU.unsafeIndex chunk 08) - !w4_09 <- word4 (BU.unsafeIndex chunk 09) - !w4_10 <- word4 (BU.unsafeIndex chunk 10) - !w4_11 <- word4 (BU.unsafeIndex chunk 11) - !w4_12 <- word4 (BU.unsafeIndex chunk 12) - !w4_13 <- word4 (BU.unsafeIndex chunk 13) - !w4_14 <- word4 (BU.unsafeIndex chunk 14) - !w4_15 <- word4 (BU.unsafeIndex chunk 15) - - let !w64 = fi w4_00 `B.shiftL` 60 - .|. fi w4_01 `B.shiftL` 56 - .|. fi w4_02 `B.shiftL` 52 - .|. fi w4_03 `B.shiftL` 48 - .|. fi w4_04 `B.shiftL` 44 - .|. fi w4_05 `B.shiftL` 40 - .|. fi w4_06 `B.shiftL` 36 - .|. fi w4_07 `B.shiftL` 32 - .|. fi w4_08 `B.shiftL` 28 - .|. fi w4_09 `B.shiftL` 24 - .|. fi w4_10 `B.shiftL` 20 - .|. fi w4_11 `B.shiftL` 16 - .|. fi w4_12 `B.shiftL` 12 - .|. fi w4_13 `B.shiftL` 08 - .|. fi w4_14 `B.shiftL` 04 - .|. fi w4_15 - - go64 (acc <> BSB.word64BE w64) etc - - go32 acc b = case BS.splitAt 8 b of - (chunk, etc) - | BS.null chunk -> pure acc - | otherwise -> do - !w4_00 <- word4 (BU.unsafeIndex chunk 00) - !w4_01 <- word4 (BU.unsafeIndex chunk 01) - !w4_02 <- word4 (BU.unsafeIndex chunk 02) - !w4_03 <- word4 (BU.unsafeIndex chunk 03) - !w4_04 <- word4 (BU.unsafeIndex chunk 04) - !w4_05 <- word4 (BU.unsafeIndex chunk 05) - !w4_06 <- word4 (BU.unsafeIndex chunk 06) - !w4_07 <- word4 (BU.unsafeIndex chunk 07) - - let !w32 = fi w4_00 `B.shiftL` 28 - .|. fi w4_01 `B.shiftL` 24 - .|. fi w4_02 `B.shiftL` 20 - .|. fi w4_03 `B.shiftL` 16 - .|. fi w4_04 `B.shiftL` 12 - .|. fi w4_05 `B.shiftL` 08 - .|. fi w4_06 `B.shiftL` 04 - .|. fi w4_07 - - go32 (acc <> BSB.word32BE w32) etc - - go16 acc b = case BS.splitAt 4 b of - (chunk, etc) - | BS.null chunk -> pure acc - | otherwise -> do - !w4_00 <- word4 (BU.unsafeIndex chunk 00) - !w4_01 <- word4 (BU.unsafeIndex chunk 01) - !w4_02 <- word4 (BU.unsafeIndex chunk 02) - !w4_03 <- word4 (BU.unsafeIndex chunk 03) - - let !w16 = fi w4_00 `B.shiftL` 12 - .|. fi w4_01 `B.shiftL` 08 - .|. fi w4_02 `B.shiftL` 04 - .|. fi w4_03 - - go16 (acc <> BSB.word16BE w16) etc - - go8 acc b = case BS.splitAt 2 b of - (chunk, etc) - | BS.null chunk -> pure acc - | otherwise -> do - !w4_00 <- word4 (BU.unsafeIndex chunk 00) - !w4_01 <- word4 (BU.unsafeIndex chunk 01) - - let !w8 = fi w4_00 `B.shiftL` 04 - .|. fi w4_01 - - go8 (acc <> BSB.word8 w8) etc - +decode (BI.PS sfp soff l) + | B.testBit l 0 = Nothing + | otherwise = case dec_tab of + BI.PS tfp toff _ -> unsafeDupablePerformIO $ do + let !n = l `B.shiftR` 1 + fp <- BI.mallocByteString n + ok <- withForeignPtr fp $ \dst -> + withForeignPtr sfp $ \sp0 -> + withForeignPtr tfp $ \tp0 -> do + let !sp = sp0 `plusPtr` soff :: Ptr Word8 + !tp = tp0 `plusPtr` toff :: Ptr Word8 + loop !i !acc + | i == n = + pure $! acc .&. 0x80 == 0 + | otherwise = do + let !o = i `B.shiftL` 1 + c0 <- peekElemOff sp o + c1 <- peekElemOff sp (o + 1) + n0 <- peekElemOff tp (fi c0) + n1 <- peekElemOff tp (fi c1) + let !b = (n0 `B.shiftL` 4) + .|. (n1 .&. 0x0f) + pokeElemOff dst i b + loop (i + 1) (acc .|. n0 .|. n1) + loop 0 0 + pure $! if ok then Just (BI.PS fp 0 n) else Nothing