base16

Pure Haskell base16 encoding/decoding (docs.ppad.tech/base16).
git clone git://git.ppad.tech/base16.git
Log | Files | Refs | README | LICENSE

Base16.hs (9963B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE ApplicativeDo #-}
      3 {-# LANGUAGE BangPatterns #-}
      4 {-# LANGUAGE BinaryLiterals #-}
      5 {-# LANGUAGE OverloadedStrings #-}
      6 
      7 -- |
      8 -- Module: Data.ByteString.Base16
      9 -- Copyright: (c) 2025 Jared Tobin
     10 -- License: MIT
     11 -- Maintainer: Jared Tobin <jared@ppad.tech>
     12 --
     13 -- Pure base16 encoding and decoding of strict bytestrings.
     14 
     15 module Data.ByteString.Base16 (
     16     encode
     17   , decode
     18   ) where
     19 
     20 import qualified Data.Bits as B
     21 import Data.Bits ((.&.), (.|.))
     22 import qualified Data.ByteString as BS
     23 import qualified Data.ByteString.Builder as BSB
     24 import qualified Data.ByteString.Builder.Extra as BE
     25 import qualified Data.ByteString.Internal as BI
     26 import qualified Data.ByteString.Unsafe as BU
     27 import Data.Word (Word8, Word16)
     28 
     29 to_strict :: BSB.Builder -> BS.ByteString
     30 to_strict = BS.toStrict . BSB.toLazyByteString
     31 {-# INLINE to_strict #-}
     32 
     33 to_strict_small :: BSB.Builder -> BS.ByteString
     34 to_strict_small = BS.toStrict
     35   . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
     36 {-# INLINE to_strict_small #-}
     37 
     38 fi :: (Num a, Integral b) => b -> a
     39 fi = fromIntegral
     40 {-# INLINE fi #-}
     41 
     42 hex_charset :: BS.ByteString
     43 hex_charset = "0123456789abcdef"
     44 
     45 expand_w8 :: Word8 -> Word16
     46 expand_w8 b =
     47   let !hi = BU.unsafeIndex hex_charset (fi b `B.shiftR` 4)
     48       !lo = BU.unsafeIndex hex_charset (fi b .&. 0b00001111)
     49   in      fi hi `B.shiftL` 8
     50       .|. fi lo
     51 {-# INLINE expand_w8 #-}
     52 
     53 -- | Encode a base256 'ByteString' as base16.
     54 --
     55 --   >>> encode "hello world"
     56 --   "68656c6c6f20776f726c64"
     57 encode :: BS.ByteString -> BS.ByteString
     58 encode bs@(BI.PS _ _ l)
     59     | l < 64    = to_strict_small loop
     60     | otherwise = to_strict loop
     61   where
     62     -- writing as few words as possible requires performing some length
     63     -- checks up front
     64     loop
     65       | l `rem` 4 == 0 = go64 bs
     66       | (l - 3) `rem` 4 == 0 = case BS.splitAt (l - 3) bs of
     67           (chunk, etc) ->
     68                go64 chunk
     69             <> go32 (BU.unsafeTake 2 etc)
     70             <> go16 (BU.unsafeDrop 2 etc)
     71       | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of
     72           (chunk, etc) -> go64 chunk <> go32 etc
     73       | (l - 1) `rem` 4 == 0 = case BS.splitAt (l - 1) bs of
     74           (chunk, etc) -> go64 chunk <> go16 etc
     75 
     76       | l `rem` 2 == 0 = go32 bs
     77       | (l - 1) `rem` 2 == 0 = case BS.splitAt (l - 1) bs of
     78           (chunk, etc) -> go32 chunk <> go16 etc
     79 
     80       | otherwise = go16 bs
     81 
     82     go64 b = case BS.splitAt 4 b of
     83       (chunk, etc)
     84         | BS.null chunk -> mempty
     85         | otherwise ->
     86             let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0)
     87                 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1)
     88                 !w16_2 = expand_w8 (BU.unsafeIndex chunk 2)
     89                 !w16_3 = expand_w8 (BU.unsafeIndex chunk 3)
     90 
     91                 !w64 = fi w16_0 `B.shiftL` 48
     92                    .|. fi w16_1 `B.shiftL` 32
     93                    .|. fi w16_2 `B.shiftL` 16
     94                    .|. fi w16_3
     95 
     96             in  BSB.word64BE w64 <> go64 etc
     97 
     98     go32 b = case BS.splitAt 2 b of
     99       (chunk, etc)
    100         | BS.null chunk -> mempty
    101         | otherwise ->
    102             let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0)
    103                 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1)
    104 
    105                 !w32 = fi w16_0 `B.shiftL` 16
    106                    .|. fi w16_1
    107 
    108             in  BSB.word32BE w32 <> go32 etc
    109 
    110     go16 b = case BS.uncons b of
    111       Nothing -> mempty
    112       Just (h, t) ->
    113         let !w16 = expand_w8 h
    114         in  BSB.word16BE w16 <> go16 t
    115 
    116 word4 :: Word8 -> Maybe Word8
    117 word4 w8 = fmap fi (BS.elemIndex w8 hex_charset)
    118 {-# INLINE word4 #-}
    119 
    120 -- | Decode a base16 'ByteString' to base256.
    121 --
    122 --   Invalid inputs (including odd-length inputs) will produce
    123 --   'Nothing'.
    124 --
    125 --   >>> decode "68656c6c6f20776f726c64"
    126 --   Just "hello world"
    127 --   >>> decode "068656c6c6f20776f726c64" -- odd-length
    128 --   Nothing
    129 decode :: BS.ByteString -> Maybe BS.ByteString
    130 decode bs@(BI.PS _ _ l)
    131     | B.testBit l 0    = Nothing
    132     | l `quot` 2 < 128 = fmap to_strict_small loop
    133     | otherwise        = fmap to_strict loop
    134   where
    135     -- same story, but we need more checks
    136     loop
    137       | l `rem` 16 == 0       = go64 mempty bs
    138       | (l - 2) `rem` 16 == 0 = case BS.splitAt (l - 2) bs of
    139           (chunk, etc) -> do
    140             b0 <- go64 mempty chunk
    141             go8 b0 etc
    142       | (l - 4) `rem` 16 == 0 = case BS.splitAt (l - 4) bs of
    143           (chunk, etc) -> do
    144             b0 <- go64 mempty chunk
    145             go16 b0 etc
    146       | (l - 6) `rem` 16 == 0 = case BS.splitAt (l - 6) bs of
    147           (chunk, etc) -> do
    148             b0 <- go64 mempty chunk
    149             b1 <- go16 b0 (BU.unsafeTake 4 etc)
    150             go8 b1 (BU.unsafeDrop 4 etc)
    151       | (l - 8) `rem` 16 == 0 = case BS.splitAt (l - 8) bs of
    152           (chunk, etc) -> do
    153             b0 <- go64 mempty chunk
    154             go32 b0 etc
    155       | (l - 10) `rem` 16 == 0 = case BS.splitAt (l - 10) bs of
    156           (chunk, etc) -> do
    157             b0 <- go64 mempty chunk
    158             b1 <- go32 b0 (BU.unsafeTake 8 etc)
    159             go8 b1 (BU.unsafeDrop 8 etc)
    160       | (l - 12) `rem` 16 == 0 = case BS.splitAt (l - 12) bs of
    161           (chunk, etc) -> do
    162             b0 <- go64 mempty chunk
    163             b1 <- go32 b0 (BU.unsafeTake 8 etc)
    164             go16 b1 (BU.unsafeDrop 8 etc)
    165       | (l - 14) `rem` 16 == 0 = case BS.splitAt (l - 14) bs of
    166           (chunk, etc) -> do
    167             b0 <- go64 mempty chunk
    168             b1 <- go32 b0 (BU.unsafeTake 8 etc)
    169             b2 <- go16 b1 (BU.unsafeTake 4 (BU.unsafeDrop 8 etc))
    170             go8 b2 (BU.unsafeDrop 12 etc)
    171 
    172       | l `rem` 8 == 0       = go32 mempty bs
    173       | (l - 2) `rem` 8 == 0 = case BS.splitAt (l - 2) bs of
    174           (chunk, etc) -> do
    175             b0 <- go32 mempty chunk
    176             go8 b0 etc
    177       | (l - 4) `rem` 8 == 0 = case BS.splitAt (l - 4) bs of
    178           (chunk, etc) -> do
    179             b0 <- go32 mempty chunk
    180             go16 b0 etc
    181       | (l - 6) `rem` 8 == 0 = case BS.splitAt (l - 6) bs of
    182           (chunk, etc) -> do
    183             b0 <- go32 mempty chunk
    184             b1 <- go16 b0 (BU.unsafeTake 4 etc)
    185             go8 b1  (BU.unsafeDrop 4 etc)
    186 
    187       | l `rem` 4 == 0       = go16 mempty bs
    188       | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of
    189           (chunk, etc) -> do
    190             b0 <- go16 mempty chunk
    191             go8 b0 etc
    192 
    193       | otherwise = go8 mempty bs
    194 
    195     go64 acc b = case BS.splitAt 16 b of
    196       (chunk, etc)
    197         | BS.null chunk -> pure acc
    198         | otherwise -> do
    199             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    200             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    201             !w4_02 <- word4 (BU.unsafeIndex chunk 02)
    202             !w4_03 <- word4 (BU.unsafeIndex chunk 03)
    203             !w4_04 <- word4 (BU.unsafeIndex chunk 04)
    204             !w4_05 <- word4 (BU.unsafeIndex chunk 05)
    205             !w4_06 <- word4 (BU.unsafeIndex chunk 06)
    206             !w4_07 <- word4 (BU.unsafeIndex chunk 07)
    207             !w4_08 <- word4 (BU.unsafeIndex chunk 08)
    208             !w4_09 <- word4 (BU.unsafeIndex chunk 09)
    209             !w4_10 <- word4 (BU.unsafeIndex chunk 10)
    210             !w4_11 <- word4 (BU.unsafeIndex chunk 11)
    211             !w4_12 <- word4 (BU.unsafeIndex chunk 12)
    212             !w4_13 <- word4 (BU.unsafeIndex chunk 13)
    213             !w4_14 <- word4 (BU.unsafeIndex chunk 14)
    214             !w4_15 <- word4 (BU.unsafeIndex chunk 15)
    215 
    216             let !w64 = fi w4_00 `B.shiftL` 60
    217                    .|. fi w4_01 `B.shiftL` 56
    218                    .|. fi w4_02 `B.shiftL` 52
    219                    .|. fi w4_03 `B.shiftL` 48
    220                    .|. fi w4_04 `B.shiftL` 44
    221                    .|. fi w4_05 `B.shiftL` 40
    222                    .|. fi w4_06 `B.shiftL` 36
    223                    .|. fi w4_07 `B.shiftL` 32
    224                    .|. fi w4_08 `B.shiftL` 28
    225                    .|. fi w4_09 `B.shiftL` 24
    226                    .|. fi w4_10 `B.shiftL` 20
    227                    .|. fi w4_11 `B.shiftL` 16
    228                    .|. fi w4_12 `B.shiftL` 12
    229                    .|. fi w4_13 `B.shiftL` 08
    230                    .|. fi w4_14 `B.shiftL` 04
    231                    .|. fi w4_15
    232 
    233             go64 (acc <> BSB.word64BE w64) etc
    234 
    235     go32 acc b = case BS.splitAt 8 b of
    236       (chunk, etc)
    237         | BS.null chunk -> pure acc
    238         | otherwise -> do
    239             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    240             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    241             !w4_02 <- word4 (BU.unsafeIndex chunk 02)
    242             !w4_03 <- word4 (BU.unsafeIndex chunk 03)
    243             !w4_04 <- word4 (BU.unsafeIndex chunk 04)
    244             !w4_05 <- word4 (BU.unsafeIndex chunk 05)
    245             !w4_06 <- word4 (BU.unsafeIndex chunk 06)
    246             !w4_07 <- word4 (BU.unsafeIndex chunk 07)
    247 
    248             let !w32 = fi w4_00 `B.shiftL` 28
    249                    .|. fi w4_01 `B.shiftL` 24
    250                    .|. fi w4_02 `B.shiftL` 20
    251                    .|. fi w4_03 `B.shiftL` 16
    252                    .|. fi w4_04 `B.shiftL` 12
    253                    .|. fi w4_05 `B.shiftL` 08
    254                    .|. fi w4_06 `B.shiftL` 04
    255                    .|. fi w4_07
    256 
    257             go32 (acc <> BSB.word32BE w32) etc
    258 
    259     go16 acc b = case BS.splitAt 4 b of
    260       (chunk, etc)
    261         | BS.null chunk -> pure acc
    262         | otherwise -> do
    263             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    264             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    265             !w4_02 <- word4 (BU.unsafeIndex chunk 02)
    266             !w4_03 <- word4 (BU.unsafeIndex chunk 03)
    267 
    268             let !w16 = fi w4_00 `B.shiftL` 12
    269                    .|. fi w4_01 `B.shiftL` 08
    270                    .|. fi w4_02 `B.shiftL` 04
    271                    .|. fi w4_03
    272 
    273             go16 (acc <> BSB.word16BE w16) etc
    274 
    275     go8 acc b  = case BS.splitAt 2 b of
    276       (chunk, etc)
    277         | BS.null chunk -> pure acc
    278         | otherwise -> do
    279             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    280             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    281 
    282             let !w8 = fi w4_00 `B.shiftL` 04
    283                   .|. fi w4_01
    284 
    285             go8 (acc <> BSB.word8 w8) etc
    286