Base16.hs (10280B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE ApplicativeDo #-} 3 {-# LANGUAGE BangPatterns #-} 4 {-# LANGUAGE BinaryLiterals #-} 5 {-# LANGUAGE OverloadedStrings #-} 6 7 -- | 8 -- Module: Data.ByteString.Base16 9 -- Copyright: (c) 2025 Jared Tobin 10 -- License: MIT 11 -- Maintainer: Jared Tobin <jared@ppad.tech> 12 -- 13 -- Pure base16 encoding and decoding of strict bytestrings. 14 15 module Data.ByteString.Base16 ( 16 encode 17 , decode 18 ) where 19 20 import qualified Data.Bits as B 21 import Data.Bits ((.&.), (.|.)) 22 import qualified Data.ByteString as BS 23 import qualified Data.ByteString.Builder as BSB 24 import qualified Data.ByteString.Builder.Extra as BE 25 import qualified Data.ByteString.Internal as BI 26 import qualified Data.ByteString.Unsafe as BU 27 import Data.Word (Word8, Word16) 28 29 to_strict :: BSB.Builder -> BS.ByteString 30 to_strict = BS.toStrict . BSB.toLazyByteString 31 {-# INLINE to_strict #-} 32 33 to_strict_small :: BSB.Builder -> BS.ByteString 34 to_strict_small = BS.toStrict 35 . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty 36 {-# INLINE to_strict_small #-} 37 38 fi :: (Num a, Integral b) => b -> a 39 fi = fromIntegral 40 {-# INLINE fi #-} 41 42 hex_charset :: BS.ByteString 43 hex_charset = "0123456789abcdef" 44 45 expand_w8 :: Word8 -> Word16 46 expand_w8 b = 47 let !hi = BU.unsafeIndex hex_charset (fi b `B.shiftR` 4) 48 !lo = BU.unsafeIndex hex_charset (fi b .&. 0b00001111) 49 in fi hi `B.shiftL` 8 50 .|. fi lo 51 {-# INLINE expand_w8 #-} 52 53 -- | Encode a base256 'ByteString' as base16. 54 -- 55 -- >>> encode "hello world" 56 -- "68656c6c6f20776f726c64" 57 encode :: BS.ByteString -> BS.ByteString 58 encode bs@(BI.PS _ _ l) 59 | l < 64 = to_strict_small loop 60 | otherwise = to_strict loop 61 where 62 -- writing as few words as possible requires performing some length 63 -- checks up front 64 loop 65 | l `rem` 4 == 0 = 66 go64 bs 67 | (l - 3) `rem` 4 == 0 = case BS.splitAt (l - 3) bs of 68 (chunk, etc) -> 69 go64 chunk 70 <> go32 (BU.unsafeTake 2 etc) 71 <> go16 (BU.unsafeDrop 2 etc) 72 | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of 73 (chunk, etc) -> 74 go64 chunk 75 <> go32 etc 76 | (l - 1) `rem` 4 == 0 = case BS.splitAt (l - 1) bs of 77 (chunk, etc) -> 78 go64 chunk 79 <> go16 etc 80 81 | l `rem` 2 == 0 = 82 go32 bs 83 | (l - 1) `rem` 2 == 0 = case BS.splitAt (l - 1) bs of 84 (chunk, etc) -> 85 go32 chunk 86 <> go16 etc 87 88 | otherwise = 89 go16 bs 90 91 go64 b = case BS.splitAt 4 b of 92 (chunk, etc) 93 | BS.null chunk -> mempty 94 | otherwise -> 95 let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) 96 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) 97 !w16_2 = expand_w8 (BU.unsafeIndex chunk 2) 98 !w16_3 = expand_w8 (BU.unsafeIndex chunk 3) 99 100 !w64 = fi w16_0 `B.shiftL` 48 101 .|. fi w16_1 `B.shiftL` 32 102 .|. fi w16_2 `B.shiftL` 16 103 .|. fi w16_3 104 105 in BSB.word64BE w64 <> go64 etc 106 107 go32 b = case BS.splitAt 2 b of 108 (chunk, etc) 109 | BS.null chunk -> mempty 110 | otherwise -> 111 let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) 112 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) 113 114 !w32 = fi w16_0 `B.shiftL` 16 115 .|. fi w16_1 116 117 in BSB.word32BE w32 <> go32 etc 118 119 go16 b = case BS.uncons b of 120 Nothing -> mempty 121 Just (h, t) -> 122 let !w16 = expand_w8 h 123 in BSB.word16BE w16 <> go16 t 124 125 -- word8 hex character to word4 126 word4 :: Word8 -> Maybe Word8 127 word4 c 128 | c > 47 && c < 58 = pure $! c - 48 -- 0-9 129 | c > 64 && c < 71 = pure $! c - 55 -- A-F 130 | c > 96 && c < 103 = pure $! c - 87 -- a-f 131 | otherwise = Nothing 132 {-# INLINE word4 #-} 133 134 -- | Decode a base16 'ByteString' to base256. 135 -- 136 -- Invalid inputs (including odd-length inputs) will produce 137 -- 'Nothing'. 138 -- 139 -- >>> decode "68656c6c6f20776f726c64" 140 -- Just "hello world" 141 -- >>> decode "068656c6c6f20776f726c64" -- odd-length 142 -- Nothing 143 decode :: BS.ByteString -> Maybe BS.ByteString 144 decode bs@(BI.PS _ _ l) 145 | B.testBit l 0 = Nothing 146 | l `quot` 2 < 128 = fmap to_strict_small loop 147 | otherwise = fmap to_strict loop 148 where 149 -- same story, but we need more checks 150 loop 151 | l `rem` 16 == 0 = 152 go64 mempty bs 153 | (l - 2) `rem` 16 == 0 = case BS.splitAt (l - 2) bs of 154 (chunk, etc) -> do 155 b0 <- go64 mempty chunk 156 go8 b0 etc 157 | (l - 4) `rem` 16 == 0 = case BS.splitAt (l - 4) bs of 158 (chunk, etc) -> do 159 b0 <- go64 mempty chunk 160 go16 b0 etc 161 | (l - 6) `rem` 16 == 0 = case BS.splitAt (l - 6) bs of 162 (chunk, etc) -> do 163 b0 <- go64 mempty chunk 164 b1 <- go16 b0 (BU.unsafeTake 4 etc) 165 go8 b1 (BU.unsafeDrop 4 etc) 166 | (l - 8) `rem` 16 == 0 = case BS.splitAt (l - 8) bs of 167 (chunk, etc) -> do 168 b0 <- go64 mempty chunk 169 go32 b0 etc 170 | (l - 10) `rem` 16 == 0 = case BS.splitAt (l - 10) bs of 171 (chunk, etc) -> do 172 b0 <- go64 mempty chunk 173 b1 <- go32 b0 (BU.unsafeTake 8 etc) 174 go8 b1 (BU.unsafeDrop 8 etc) 175 | (l - 12) `rem` 16 == 0 = case BS.splitAt (l - 12) bs of 176 (chunk, etc) -> do 177 b0 <- go64 mempty chunk 178 b1 <- go32 b0 (BU.unsafeTake 8 etc) 179 go16 b1 (BU.unsafeDrop 8 etc) 180 | (l - 14) `rem` 16 == 0 = case BS.splitAt (l - 14) bs of 181 (chunk, etc) -> do 182 b0 <- go64 mempty chunk 183 b1 <- go32 b0 (BU.unsafeTake 8 etc) 184 b2 <- go16 b1 (BU.unsafeTake 4 (BU.unsafeDrop 8 etc)) 185 go8 b2 (BU.unsafeDrop 12 etc) 186 187 | l `rem` 8 == 0 = 188 go32 mempty bs 189 | (l - 2) `rem` 8 == 0 = case BS.splitAt (l - 2) bs of 190 (chunk, etc) -> do 191 b0 <- go32 mempty chunk 192 go8 b0 etc 193 | (l - 4) `rem` 8 == 0 = case BS.splitAt (l - 4) bs of 194 (chunk, etc) -> do 195 b0 <- go32 mempty chunk 196 go16 b0 etc 197 | (l - 6) `rem` 8 == 0 = case BS.splitAt (l - 6) bs of 198 (chunk, etc) -> do 199 b0 <- go32 mempty chunk 200 b1 <- go16 b0 (BU.unsafeTake 4 etc) 201 go8 b1 (BU.unsafeDrop 4 etc) 202 203 | l `rem` 4 == 0 = 204 go16 mempty bs 205 | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of 206 (chunk, etc) -> do 207 b0 <- go16 mempty chunk 208 go8 b0 etc 209 210 | otherwise = 211 go8 mempty bs 212 213 go64 acc b = case BS.splitAt 16 b of 214 (chunk, etc) 215 | BS.null chunk -> pure acc 216 | otherwise -> do 217 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 218 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 219 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 220 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 221 !w4_04 <- word4 (BU.unsafeIndex chunk 04) 222 !w4_05 <- word4 (BU.unsafeIndex chunk 05) 223 !w4_06 <- word4 (BU.unsafeIndex chunk 06) 224 !w4_07 <- word4 (BU.unsafeIndex chunk 07) 225 !w4_08 <- word4 (BU.unsafeIndex chunk 08) 226 !w4_09 <- word4 (BU.unsafeIndex chunk 09) 227 !w4_10 <- word4 (BU.unsafeIndex chunk 10) 228 !w4_11 <- word4 (BU.unsafeIndex chunk 11) 229 !w4_12 <- word4 (BU.unsafeIndex chunk 12) 230 !w4_13 <- word4 (BU.unsafeIndex chunk 13) 231 !w4_14 <- word4 (BU.unsafeIndex chunk 14) 232 !w4_15 <- word4 (BU.unsafeIndex chunk 15) 233 234 let !w64 = fi w4_00 `B.shiftL` 60 235 .|. fi w4_01 `B.shiftL` 56 236 .|. fi w4_02 `B.shiftL` 52 237 .|. fi w4_03 `B.shiftL` 48 238 .|. fi w4_04 `B.shiftL` 44 239 .|. fi w4_05 `B.shiftL` 40 240 .|. fi w4_06 `B.shiftL` 36 241 .|. fi w4_07 `B.shiftL` 32 242 .|. fi w4_08 `B.shiftL` 28 243 .|. fi w4_09 `B.shiftL` 24 244 .|. fi w4_10 `B.shiftL` 20 245 .|. fi w4_11 `B.shiftL` 16 246 .|. fi w4_12 `B.shiftL` 12 247 .|. fi w4_13 `B.shiftL` 08 248 .|. fi w4_14 `B.shiftL` 04 249 .|. fi w4_15 250 251 go64 (acc <> BSB.word64BE w64) etc 252 253 go32 acc b = case BS.splitAt 8 b of 254 (chunk, etc) 255 | BS.null chunk -> pure acc 256 | otherwise -> do 257 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 258 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 259 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 260 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 261 !w4_04 <- word4 (BU.unsafeIndex chunk 04) 262 !w4_05 <- word4 (BU.unsafeIndex chunk 05) 263 !w4_06 <- word4 (BU.unsafeIndex chunk 06) 264 !w4_07 <- word4 (BU.unsafeIndex chunk 07) 265 266 let !w32 = fi w4_00 `B.shiftL` 28 267 .|. fi w4_01 `B.shiftL` 24 268 .|. fi w4_02 `B.shiftL` 20 269 .|. fi w4_03 `B.shiftL` 16 270 .|. fi w4_04 `B.shiftL` 12 271 .|. fi w4_05 `B.shiftL` 08 272 .|. fi w4_06 `B.shiftL` 04 273 .|. fi w4_07 274 275 go32 (acc <> BSB.word32BE w32) etc 276 277 go16 acc b = case BS.splitAt 4 b of 278 (chunk, etc) 279 | BS.null chunk -> pure acc 280 | otherwise -> do 281 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 282 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 283 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 284 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 285 286 let !w16 = fi w4_00 `B.shiftL` 12 287 .|. fi w4_01 `B.shiftL` 08 288 .|. fi w4_02 `B.shiftL` 04 289 .|. fi w4_03 290 291 go16 (acc <> BSB.word16BE w16) etc 292 293 go8 acc b = case BS.splitAt 2 b of 294 (chunk, etc) 295 | BS.null chunk -> pure acc 296 | otherwise -> do 297 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 298 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 299 300 let !w8 = fi w4_00 `B.shiftL` 04 301 .|. fi w4_01 302 303 go8 (acc <> BSB.word8 w8) etc 304