Base16.hs (10119B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE ApplicativeDo #-} 3 {-# LANGUAGE BangPatterns #-} 4 {-# LANGUAGE BinaryLiterals #-} 5 {-# LANGUAGE OverloadedStrings #-} 6 7 -- | 8 -- Module: Data.ByteString.Base16 9 -- Copyright: (c) 2025 Jared Tobin 10 -- License: MIT 11 -- Maintainer: Jared Tobin <jared@ppad.tech> 12 -- 13 -- Pure base16 encoding and decoding of strict bytestrings. 14 15 module Data.ByteString.Base16 ( 16 encode 17 , decode 18 ) where 19 20 import qualified Data.Bits as B 21 import Data.Bits ((.&.), (.|.)) 22 import qualified Data.ByteString as BS 23 import qualified Data.ByteString.Builder as BSB 24 import qualified Data.ByteString.Builder.Extra as BE 25 import qualified Data.ByteString.Internal as BI 26 import qualified Data.ByteString.Unsafe as BU 27 import Data.Word (Word8, Word16) 28 29 to_strict :: BSB.Builder -> BS.ByteString 30 to_strict = BS.toStrict . BSB.toLazyByteString 31 {-# INLINE to_strict #-} 32 33 to_strict_small :: BSB.Builder -> BS.ByteString 34 to_strict_small = BS.toStrict 35 . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty 36 {-# INLINE to_strict_small #-} 37 38 fi :: (Num a, Integral b) => b -> a 39 fi = fromIntegral 40 {-# INLINE fi #-} 41 42 hex_charset :: BS.ByteString 43 hex_charset = "0123456789abcdef" 44 45 expand_w8 :: Word8 -> Word16 46 expand_w8 b = 47 let !hi = BU.unsafeIndex hex_charset (fi b `B.shiftR` 4) 48 !lo = BU.unsafeIndex hex_charset (fi b .&. 0b00001111) 49 in fi hi `B.shiftL` 8 50 .|. fi lo 51 {-# INLINE expand_w8 #-} 52 53 -- | Encode a base256 'ByteString' as base16. 54 -- 55 -- >>> encode "hello world" 56 -- "68656c6c6f20776f726c64" 57 encode :: BS.ByteString -> BS.ByteString 58 encode bs@(BI.PS _ _ l) 59 | l < 64 = to_strict_small loop 60 | otherwise = to_strict loop 61 where 62 -- writing as few words as possible requires performing some length 63 -- checks up front 64 loop 65 | l `rem` 4 == 0 = 66 go64 bs 67 | (l - 3) `rem` 4 == 0 = case BS.splitAt (l - 3) bs of 68 (chunk, etc) -> 69 go64 chunk 70 <> go32 (BU.unsafeTake 2 etc) 71 <> go16 (BU.unsafeDrop 2 etc) 72 | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of 73 (chunk, etc) -> 74 go64 chunk 75 <> go32 etc 76 | (l - 1) `rem` 4 == 0 = case BS.splitAt (l - 1) bs of 77 (chunk, etc) -> 78 go64 chunk 79 <> go16 etc 80 81 | l `rem` 2 == 0 = 82 go32 bs 83 | (l - 1) `rem` 2 == 0 = case BS.splitAt (l - 1) bs of 84 (chunk, etc) -> 85 go32 chunk 86 <> go16 etc 87 88 | otherwise = 89 go16 bs 90 91 go64 b = case BS.splitAt 4 b of 92 (chunk, etc) 93 | BS.null chunk -> mempty 94 | otherwise -> 95 let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) 96 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) 97 !w16_2 = expand_w8 (BU.unsafeIndex chunk 2) 98 !w16_3 = expand_w8 (BU.unsafeIndex chunk 3) 99 100 !w64 = fi w16_0 `B.shiftL` 48 101 .|. fi w16_1 `B.shiftL` 32 102 .|. fi w16_2 `B.shiftL` 16 103 .|. fi w16_3 104 105 in BSB.word64BE w64 <> go64 etc 106 107 go32 b = case BS.splitAt 2 b of 108 (chunk, etc) 109 | BS.null chunk -> mempty 110 | otherwise -> 111 let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) 112 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) 113 114 !w32 = fi w16_0 `B.shiftL` 16 115 .|. fi w16_1 116 117 in BSB.word32BE w32 <> go32 etc 118 119 go16 b = case BS.uncons b of 120 Nothing -> mempty 121 Just (h, t) -> 122 let !w16 = expand_w8 h 123 in BSB.word16BE w16 <> go16 t 124 125 word4 :: Word8 -> Maybe Word8 126 word4 w8 = fmap fi (BS.elemIndex w8 hex_charset) 127 {-# INLINE word4 #-} 128 129 -- | Decode a base16 'ByteString' to base256. 130 -- 131 -- Invalid inputs (including odd-length inputs) will produce 132 -- 'Nothing'. 133 -- 134 -- >>> decode "68656c6c6f20776f726c64" 135 -- Just "hello world" 136 -- >>> decode "068656c6c6f20776f726c64" -- odd-length 137 -- Nothing 138 decode :: BS.ByteString -> Maybe BS.ByteString 139 decode bs@(BI.PS _ _ l) 140 | B.testBit l 0 = Nothing 141 | l `quot` 2 < 128 = fmap to_strict_small loop 142 | otherwise = fmap to_strict loop 143 where 144 -- same story, but we need more checks 145 loop 146 | l `rem` 16 == 0 = 147 go64 mempty bs 148 | (l - 2) `rem` 16 == 0 = case BS.splitAt (l - 2) bs of 149 (chunk, etc) -> do 150 b0 <- go64 mempty chunk 151 go8 b0 etc 152 | (l - 4) `rem` 16 == 0 = case BS.splitAt (l - 4) bs of 153 (chunk, etc) -> do 154 b0 <- go64 mempty chunk 155 go16 b0 etc 156 | (l - 6) `rem` 16 == 0 = case BS.splitAt (l - 6) bs of 157 (chunk, etc) -> do 158 b0 <- go64 mempty chunk 159 b1 <- go16 b0 (BU.unsafeTake 4 etc) 160 go8 b1 (BU.unsafeDrop 4 etc) 161 | (l - 8) `rem` 16 == 0 = case BS.splitAt (l - 8) bs of 162 (chunk, etc) -> do 163 b0 <- go64 mempty chunk 164 go32 b0 etc 165 | (l - 10) `rem` 16 == 0 = case BS.splitAt (l - 10) bs of 166 (chunk, etc) -> do 167 b0 <- go64 mempty chunk 168 b1 <- go32 b0 (BU.unsafeTake 8 etc) 169 go8 b1 (BU.unsafeDrop 8 etc) 170 | (l - 12) `rem` 16 == 0 = case BS.splitAt (l - 12) bs of 171 (chunk, etc) -> do 172 b0 <- go64 mempty chunk 173 b1 <- go32 b0 (BU.unsafeTake 8 etc) 174 go16 b1 (BU.unsafeDrop 8 etc) 175 | (l - 14) `rem` 16 == 0 = case BS.splitAt (l - 14) bs of 176 (chunk, etc) -> do 177 b0 <- go64 mempty chunk 178 b1 <- go32 b0 (BU.unsafeTake 8 etc) 179 b2 <- go16 b1 (BU.unsafeTake 4 (BU.unsafeDrop 8 etc)) 180 go8 b2 (BU.unsafeDrop 12 etc) 181 182 | l `rem` 8 == 0 = 183 go32 mempty bs 184 | (l - 2) `rem` 8 == 0 = case BS.splitAt (l - 2) bs of 185 (chunk, etc) -> do 186 b0 <- go32 mempty chunk 187 go8 b0 etc 188 | (l - 4) `rem` 8 == 0 = case BS.splitAt (l - 4) bs of 189 (chunk, etc) -> do 190 b0 <- go32 mempty chunk 191 go16 b0 etc 192 | (l - 6) `rem` 8 == 0 = case BS.splitAt (l - 6) bs of 193 (chunk, etc) -> do 194 b0 <- go32 mempty chunk 195 b1 <- go16 b0 (BU.unsafeTake 4 etc) 196 go8 b1 (BU.unsafeDrop 4 etc) 197 198 | l `rem` 4 == 0 = 199 go16 mempty bs 200 | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of 201 (chunk, etc) -> do 202 b0 <- go16 mempty chunk 203 go8 b0 etc 204 205 | otherwise = 206 go8 mempty bs 207 208 go64 acc b = case BS.splitAt 16 b of 209 (chunk, etc) 210 | BS.null chunk -> pure acc 211 | otherwise -> do 212 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 213 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 214 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 215 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 216 !w4_04 <- word4 (BU.unsafeIndex chunk 04) 217 !w4_05 <- word4 (BU.unsafeIndex chunk 05) 218 !w4_06 <- word4 (BU.unsafeIndex chunk 06) 219 !w4_07 <- word4 (BU.unsafeIndex chunk 07) 220 !w4_08 <- word4 (BU.unsafeIndex chunk 08) 221 !w4_09 <- word4 (BU.unsafeIndex chunk 09) 222 !w4_10 <- word4 (BU.unsafeIndex chunk 10) 223 !w4_11 <- word4 (BU.unsafeIndex chunk 11) 224 !w4_12 <- word4 (BU.unsafeIndex chunk 12) 225 !w4_13 <- word4 (BU.unsafeIndex chunk 13) 226 !w4_14 <- word4 (BU.unsafeIndex chunk 14) 227 !w4_15 <- word4 (BU.unsafeIndex chunk 15) 228 229 let !w64 = fi w4_00 `B.shiftL` 60 230 .|. fi w4_01 `B.shiftL` 56 231 .|. fi w4_02 `B.shiftL` 52 232 .|. fi w4_03 `B.shiftL` 48 233 .|. fi w4_04 `B.shiftL` 44 234 .|. fi w4_05 `B.shiftL` 40 235 .|. fi w4_06 `B.shiftL` 36 236 .|. fi w4_07 `B.shiftL` 32 237 .|. fi w4_08 `B.shiftL` 28 238 .|. fi w4_09 `B.shiftL` 24 239 .|. fi w4_10 `B.shiftL` 20 240 .|. fi w4_11 `B.shiftL` 16 241 .|. fi w4_12 `B.shiftL` 12 242 .|. fi w4_13 `B.shiftL` 08 243 .|. fi w4_14 `B.shiftL` 04 244 .|. fi w4_15 245 246 go64 (acc <> BSB.word64BE w64) etc 247 248 go32 acc b = case BS.splitAt 8 b of 249 (chunk, etc) 250 | BS.null chunk -> pure acc 251 | otherwise -> do 252 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 253 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 254 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 255 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 256 !w4_04 <- word4 (BU.unsafeIndex chunk 04) 257 !w4_05 <- word4 (BU.unsafeIndex chunk 05) 258 !w4_06 <- word4 (BU.unsafeIndex chunk 06) 259 !w4_07 <- word4 (BU.unsafeIndex chunk 07) 260 261 let !w32 = fi w4_00 `B.shiftL` 28 262 .|. fi w4_01 `B.shiftL` 24 263 .|. fi w4_02 `B.shiftL` 20 264 .|. fi w4_03 `B.shiftL` 16 265 .|. fi w4_04 `B.shiftL` 12 266 .|. fi w4_05 `B.shiftL` 08 267 .|. fi w4_06 `B.shiftL` 04 268 .|. fi w4_07 269 270 go32 (acc <> BSB.word32BE w32) etc 271 272 go16 acc b = case BS.splitAt 4 b of 273 (chunk, etc) 274 | BS.null chunk -> pure acc 275 | otherwise -> do 276 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 277 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 278 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 279 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 280 281 let !w16 = fi w4_00 `B.shiftL` 12 282 .|. fi w4_01 `B.shiftL` 08 283 .|. fi w4_02 `B.shiftL` 04 284 .|. fi w4_03 285 286 go16 (acc <> BSB.word16BE w16) etc 287 288 go8 acc b = case BS.splitAt 2 b of 289 (chunk, etc) 290 | BS.null chunk -> pure acc 291 | otherwise -> do 292 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 293 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 294 295 let !w8 = fi w4_00 `B.shiftL` 04 296 .|. fi w4_01 297 298 go8 (acc <> BSB.word8 w8) etc 299