base16

Pure Haskell base16 encoding/decoding (docs.ppad.tech/base16).
git clone git://git.ppad.tech/base16.git
Log | Files | Refs | README | LICENSE

Base16.hs (10119B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE ApplicativeDo #-}
      3 {-# LANGUAGE BangPatterns #-}
      4 {-# LANGUAGE BinaryLiterals #-}
      5 {-# LANGUAGE OverloadedStrings #-}
      6 
      7 -- |
      8 -- Module: Data.ByteString.Base16
      9 -- Copyright: (c) 2025 Jared Tobin
     10 -- License: MIT
     11 -- Maintainer: Jared Tobin <jared@ppad.tech>
     12 --
     13 -- Pure base16 encoding and decoding of strict bytestrings.
     14 
     15 module Data.ByteString.Base16 (
     16     encode
     17   , decode
     18   ) where
     19 
     20 import qualified Data.Bits as B
     21 import Data.Bits ((.&.), (.|.))
     22 import qualified Data.ByteString as BS
     23 import qualified Data.ByteString.Builder as BSB
     24 import qualified Data.ByteString.Builder.Extra as BE
     25 import qualified Data.ByteString.Internal as BI
     26 import qualified Data.ByteString.Unsafe as BU
     27 import Data.Word (Word8, Word16)
     28 
     29 to_strict :: BSB.Builder -> BS.ByteString
     30 to_strict = BS.toStrict . BSB.toLazyByteString
     31 {-# INLINE to_strict #-}
     32 
     33 to_strict_small :: BSB.Builder -> BS.ByteString
     34 to_strict_small = BS.toStrict
     35   . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty
     36 {-# INLINE to_strict_small #-}
     37 
     38 fi :: (Num a, Integral b) => b -> a
     39 fi = fromIntegral
     40 {-# INLINE fi #-}
     41 
     42 hex_charset :: BS.ByteString
     43 hex_charset = "0123456789abcdef"
     44 
     45 expand_w8 :: Word8 -> Word16
     46 expand_w8 b =
     47   let !hi = BU.unsafeIndex hex_charset (fi b `B.shiftR` 4)
     48       !lo = BU.unsafeIndex hex_charset (fi b .&. 0b00001111)
     49   in      fi hi `B.shiftL` 8
     50       .|. fi lo
     51 {-# INLINE expand_w8 #-}
     52 
     53 -- | Encode a base256 'ByteString' as base16.
     54 --
     55 --   >>> encode "hello world"
     56 --   "68656c6c6f20776f726c64"
     57 encode :: BS.ByteString -> BS.ByteString
     58 encode bs@(BI.PS _ _ l)
     59     | l < 64    = to_strict_small loop
     60     | otherwise = to_strict loop
     61   where
     62     -- writing as few words as possible requires performing some length
     63     -- checks up front
     64     loop
     65       | l `rem` 4 == 0 =
     66                go64 bs
     67       | (l - 3) `rem` 4 == 0 = case BS.splitAt (l - 3) bs of
     68           (chunk, etc) ->
     69                go64 chunk
     70             <> go32 (BU.unsafeTake 2 etc)
     71             <> go16 (BU.unsafeDrop 2 etc)
     72       | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of
     73           (chunk, etc) ->
     74                go64 chunk
     75             <> go32 etc
     76       | (l - 1) `rem` 4 == 0 = case BS.splitAt (l - 1) bs of
     77           (chunk, etc) ->
     78                go64 chunk
     79             <> go16 etc
     80 
     81       | l `rem` 2 == 0 =
     82                go32 bs
     83       | (l - 1) `rem` 2 == 0 = case BS.splitAt (l - 1) bs of
     84           (chunk, etc) ->
     85                go32 chunk
     86             <> go16 etc
     87 
     88       | otherwise =
     89                go16 bs
     90 
     91     go64 b = case BS.splitAt 4 b of
     92       (chunk, etc)
     93         | BS.null chunk -> mempty
     94         | otherwise ->
     95             let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0)
     96                 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1)
     97                 !w16_2 = expand_w8 (BU.unsafeIndex chunk 2)
     98                 !w16_3 = expand_w8 (BU.unsafeIndex chunk 3)
     99 
    100                 !w64 = fi w16_0 `B.shiftL` 48
    101                    .|. fi w16_1 `B.shiftL` 32
    102                    .|. fi w16_2 `B.shiftL` 16
    103                    .|. fi w16_3
    104 
    105             in  BSB.word64BE w64 <> go64 etc
    106 
    107     go32 b = case BS.splitAt 2 b of
    108       (chunk, etc)
    109         | BS.null chunk -> mempty
    110         | otherwise ->
    111             let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0)
    112                 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1)
    113 
    114                 !w32 = fi w16_0 `B.shiftL` 16
    115                    .|. fi w16_1
    116 
    117             in  BSB.word32BE w32 <> go32 etc
    118 
    119     go16 b = case BS.uncons b of
    120       Nothing -> mempty
    121       Just (h, t) ->
    122         let !w16 = expand_w8 h
    123         in  BSB.word16BE w16 <> go16 t
    124 
    125 word4 :: Word8 -> Maybe Word8
    126 word4 w8 = fmap fi (BS.elemIndex w8 hex_charset)
    127 {-# INLINE word4 #-}
    128 
    129 -- | Decode a base16 'ByteString' to base256.
    130 --
    131 --   Invalid inputs (including odd-length inputs) will produce
    132 --   'Nothing'.
    133 --
    134 --   >>> decode "68656c6c6f20776f726c64"
    135 --   Just "hello world"
    136 --   >>> decode "068656c6c6f20776f726c64" -- odd-length
    137 --   Nothing
    138 decode :: BS.ByteString -> Maybe BS.ByteString
    139 decode bs@(BI.PS _ _ l)
    140     | B.testBit l 0    = Nothing
    141     | l `quot` 2 < 128 = fmap to_strict_small loop
    142     | otherwise        = fmap to_strict loop
    143   where
    144     -- same story, but we need more checks
    145     loop
    146       | l `rem` 16 == 0 =
    147             go64 mempty bs
    148       | (l - 2) `rem` 16 == 0 = case BS.splitAt (l - 2) bs of
    149           (chunk, etc) -> do
    150             b0 <- go64 mempty chunk
    151             go8 b0 etc
    152       | (l - 4) `rem` 16 == 0 = case BS.splitAt (l - 4) bs of
    153           (chunk, etc) -> do
    154             b0 <- go64 mempty chunk
    155             go16 b0 etc
    156       | (l - 6) `rem` 16 == 0 = case BS.splitAt (l - 6) bs of
    157           (chunk, etc) -> do
    158             b0 <- go64 mempty chunk
    159             b1 <- go16 b0 (BU.unsafeTake 4 etc)
    160             go8 b1 (BU.unsafeDrop 4 etc)
    161       | (l - 8) `rem` 16 == 0 = case BS.splitAt (l - 8) bs of
    162           (chunk, etc) -> do
    163             b0 <- go64 mempty chunk
    164             go32 b0 etc
    165       | (l - 10) `rem` 16 == 0 = case BS.splitAt (l - 10) bs of
    166           (chunk, etc) -> do
    167             b0 <- go64 mempty chunk
    168             b1 <- go32 b0 (BU.unsafeTake 8 etc)
    169             go8 b1 (BU.unsafeDrop 8 etc)
    170       | (l - 12) `rem` 16 == 0 = case BS.splitAt (l - 12) bs of
    171           (chunk, etc) -> do
    172             b0 <- go64 mempty chunk
    173             b1 <- go32 b0 (BU.unsafeTake 8 etc)
    174             go16 b1 (BU.unsafeDrop 8 etc)
    175       | (l - 14) `rem` 16 == 0 = case BS.splitAt (l - 14) bs of
    176           (chunk, etc) -> do
    177             b0 <- go64 mempty chunk
    178             b1 <- go32 b0 (BU.unsafeTake 8 etc)
    179             b2 <- go16 b1 (BU.unsafeTake 4 (BU.unsafeDrop 8 etc))
    180             go8 b2 (BU.unsafeDrop 12 etc)
    181 
    182       | l `rem` 8 == 0 =
    183             go32 mempty bs
    184       | (l - 2) `rem` 8 == 0 = case BS.splitAt (l - 2) bs of
    185           (chunk, etc) -> do
    186             b0 <- go32 mempty chunk
    187             go8 b0 etc
    188       | (l - 4) `rem` 8 == 0 = case BS.splitAt (l - 4) bs of
    189           (chunk, etc) -> do
    190             b0 <- go32 mempty chunk
    191             go16 b0 etc
    192       | (l - 6) `rem` 8 == 0 = case BS.splitAt (l - 6) bs of
    193           (chunk, etc) -> do
    194             b0 <- go32 mempty chunk
    195             b1 <- go16 b0 (BU.unsafeTake 4 etc)
    196             go8 b1  (BU.unsafeDrop 4 etc)
    197 
    198       | l `rem` 4 == 0 =
    199             go16 mempty bs
    200       | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of
    201           (chunk, etc) -> do
    202             b0 <- go16 mempty chunk
    203             go8 b0 etc
    204 
    205       | otherwise =
    206             go8 mempty bs
    207 
    208     go64 acc b = case BS.splitAt 16 b of
    209       (chunk, etc)
    210         | BS.null chunk -> pure acc
    211         | otherwise -> do
    212             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    213             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    214             !w4_02 <- word4 (BU.unsafeIndex chunk 02)
    215             !w4_03 <- word4 (BU.unsafeIndex chunk 03)
    216             !w4_04 <- word4 (BU.unsafeIndex chunk 04)
    217             !w4_05 <- word4 (BU.unsafeIndex chunk 05)
    218             !w4_06 <- word4 (BU.unsafeIndex chunk 06)
    219             !w4_07 <- word4 (BU.unsafeIndex chunk 07)
    220             !w4_08 <- word4 (BU.unsafeIndex chunk 08)
    221             !w4_09 <- word4 (BU.unsafeIndex chunk 09)
    222             !w4_10 <- word4 (BU.unsafeIndex chunk 10)
    223             !w4_11 <- word4 (BU.unsafeIndex chunk 11)
    224             !w4_12 <- word4 (BU.unsafeIndex chunk 12)
    225             !w4_13 <- word4 (BU.unsafeIndex chunk 13)
    226             !w4_14 <- word4 (BU.unsafeIndex chunk 14)
    227             !w4_15 <- word4 (BU.unsafeIndex chunk 15)
    228 
    229             let !w64 = fi w4_00 `B.shiftL` 60
    230                    .|. fi w4_01 `B.shiftL` 56
    231                    .|. fi w4_02 `B.shiftL` 52
    232                    .|. fi w4_03 `B.shiftL` 48
    233                    .|. fi w4_04 `B.shiftL` 44
    234                    .|. fi w4_05 `B.shiftL` 40
    235                    .|. fi w4_06 `B.shiftL` 36
    236                    .|. fi w4_07 `B.shiftL` 32
    237                    .|. fi w4_08 `B.shiftL` 28
    238                    .|. fi w4_09 `B.shiftL` 24
    239                    .|. fi w4_10 `B.shiftL` 20
    240                    .|. fi w4_11 `B.shiftL` 16
    241                    .|. fi w4_12 `B.shiftL` 12
    242                    .|. fi w4_13 `B.shiftL` 08
    243                    .|. fi w4_14 `B.shiftL` 04
    244                    .|. fi w4_15
    245 
    246             go64 (acc <> BSB.word64BE w64) etc
    247 
    248     go32 acc b = case BS.splitAt 8 b of
    249       (chunk, etc)
    250         | BS.null chunk -> pure acc
    251         | otherwise -> do
    252             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    253             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    254             !w4_02 <- word4 (BU.unsafeIndex chunk 02)
    255             !w4_03 <- word4 (BU.unsafeIndex chunk 03)
    256             !w4_04 <- word4 (BU.unsafeIndex chunk 04)
    257             !w4_05 <- word4 (BU.unsafeIndex chunk 05)
    258             !w4_06 <- word4 (BU.unsafeIndex chunk 06)
    259             !w4_07 <- word4 (BU.unsafeIndex chunk 07)
    260 
    261             let !w32 = fi w4_00 `B.shiftL` 28
    262                    .|. fi w4_01 `B.shiftL` 24
    263                    .|. fi w4_02 `B.shiftL` 20
    264                    .|. fi w4_03 `B.shiftL` 16
    265                    .|. fi w4_04 `B.shiftL` 12
    266                    .|. fi w4_05 `B.shiftL` 08
    267                    .|. fi w4_06 `B.shiftL` 04
    268                    .|. fi w4_07
    269 
    270             go32 (acc <> BSB.word32BE w32) etc
    271 
    272     go16 acc b = case BS.splitAt 4 b of
    273       (chunk, etc)
    274         | BS.null chunk -> pure acc
    275         | otherwise -> do
    276             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    277             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    278             !w4_02 <- word4 (BU.unsafeIndex chunk 02)
    279             !w4_03 <- word4 (BU.unsafeIndex chunk 03)
    280 
    281             let !w16 = fi w4_00 `B.shiftL` 12
    282                    .|. fi w4_01 `B.shiftL` 08
    283                    .|. fi w4_02 `B.shiftL` 04
    284                    .|. fi w4_03
    285 
    286             go16 (acc <> BSB.word16BE w16) etc
    287 
    288     go8 acc b  = case BS.splitAt 2 b of
    289       (chunk, etc)
    290         | BS.null chunk -> pure acc
    291         | otherwise -> do
    292             !w4_00 <- word4 (BU.unsafeIndex chunk 00)
    293             !w4_01 <- word4 (BU.unsafeIndex chunk 01)
    294 
    295             let !w8 = fi w4_00 `B.shiftL` 04
    296                   .|. fi w4_01
    297 
    298             go8 (acc <> BSB.word8 w8) etc
    299