commit cf4585fe2597879d18541426fd6e3d5bca746942
parent 53523b52d38ee51fce474feb007c62e6c991e18f
Author: Jared Tobin <jared@jtobin.io>
Date: Sat, 16 May 2026 11:42:15 -0230
lib: simplify encode inner loop with Word16 view
The two-byte block at 'enc_tab[2*b]' and the two-byte block at
'dst[2*i]' share the same byte layout in memory, so we can read
'enc_tab' as 'Ptr Word16' and write 'dst' as 'Ptr Word16' — never
inspecting the numerical value, just shuffling 16 bits between the
two locations. Endianness-safe by construction.
Drops the explicit 'j'/'o' offset arithmetic and the 'hi'/'lo'
interleaving from the hot path:
before: 4 byte ops + 2 shift-and-bind lines
after: 2 ops, no extra bindings
No measurable wall-time change (296.7 ns vs 297.6 ns on 1 KiB, well
within noise) — the CPU was already overlapping the byte ops across
iterations. Kept for readability.
Diffstat:
1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/lib/Data/ByteString/Base16.hs b/lib/Data/ByteString/Base16.hs
@@ -19,9 +19,9 @@ import qualified Data.Bits as B
import Data.Bits ((.&.), (.|.))
import qualified Data.ByteString as BS
import qualified Data.ByteString.Internal as BI
-import Data.Word (Word8)
+import Data.Word (Word8, Word16)
import Foreign.ForeignPtr (withForeignPtr)
-import Foreign.Ptr (Ptr, plusPtr)
+import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (peekElemOff, pokeElemOff)
import System.IO.Unsafe (unsafeDupablePerformIO)
@@ -98,18 +98,21 @@ encode (BI.PS sfp soff l) =
BI.unsafeCreate (l `B.shiftL` 1) $ \dst ->
withForeignPtr sfp $ \sp0 ->
withForeignPtr tfp $ \tp0 -> do
+ -- read 'enc_tab' and write 'dst' as 'Word16' pairs. The
+ -- two-byte block at 'enc_tab[2*b]' and the two-byte block
+ -- at 'dst[2*i]' share the same byte layout in memory, so
+ -- this is endianness-safe: we never inspect the numerical
+ -- value of the 'Word16', we just shuffle 16 bits between
+ -- two locations.
let !sp = sp0 `plusPtr` soff :: Ptr Word8
- !tp = tp0 `plusPtr` toff :: Ptr Word8
+ !tp = tp0 `plusPtr` toff :: Ptr Word16
+ !dp = castPtr dst :: Ptr Word16
loop !i
| i == l = pure ()
| otherwise = do
b <- peekElemOff sp i
- let !j = fi b `B.shiftL` 1
- !o = i `B.shiftL` 1
- hi <- peekElemOff tp j
- lo <- peekElemOff tp (j + 1)
- pokeElemOff dst o hi
- pokeElemOff dst (o + 1) lo
+ w <- peekElemOff tp (fi b)
+ pokeElemOff dp i (w :: Word16)
loop (i + 1)
loop 0