Base16.hs (9963B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE ApplicativeDo #-} 3 {-# LANGUAGE BangPatterns #-} 4 {-# LANGUAGE BinaryLiterals #-} 5 {-# LANGUAGE OverloadedStrings #-} 6 7 -- | 8 -- Module: Data.ByteString.Base16 9 -- Copyright: (c) 2025 Jared Tobin 10 -- License: MIT 11 -- Maintainer: Jared Tobin <jared@ppad.tech> 12 -- 13 -- Pure base16 encoding and decoding of strict bytestrings. 14 15 module Data.ByteString.Base16 ( 16 encode 17 , decode 18 ) where 19 20 import qualified Data.Bits as B 21 import Data.Bits ((.&.), (.|.)) 22 import qualified Data.ByteString as BS 23 import qualified Data.ByteString.Builder as BSB 24 import qualified Data.ByteString.Builder.Extra as BE 25 import qualified Data.ByteString.Internal as BI 26 import qualified Data.ByteString.Unsafe as BU 27 import Data.Word (Word8, Word16) 28 29 to_strict :: BSB.Builder -> BS.ByteString 30 to_strict = BS.toStrict . BSB.toLazyByteString 31 {-# INLINE to_strict #-} 32 33 to_strict_small :: BSB.Builder -> BS.ByteString 34 to_strict_small = BS.toStrict 35 . BE.toLazyByteStringWith (BE.safeStrategy 128 BE.smallChunkSize) mempty 36 {-# INLINE to_strict_small #-} 37 38 fi :: (Num a, Integral b) => b -> a 39 fi = fromIntegral 40 {-# INLINE fi #-} 41 42 hex_charset :: BS.ByteString 43 hex_charset = "0123456789abcdef" 44 45 expand_w8 :: Word8 -> Word16 46 expand_w8 b = 47 let !hi = BU.unsafeIndex hex_charset (fi b `B.shiftR` 4) 48 !lo = BU.unsafeIndex hex_charset (fi b .&. 0b00001111) 49 in fi hi `B.shiftL` 8 50 .|. fi lo 51 {-# INLINE expand_w8 #-} 52 53 -- | Encode a base256 'ByteString' as base16. 54 -- 55 -- >>> encode "hello world" 56 -- "68656c6c6f20776f726c64" 57 encode :: BS.ByteString -> BS.ByteString 58 encode bs@(BI.PS _ _ l) 59 | l < 64 = to_strict_small loop 60 | otherwise = to_strict loop 61 where 62 -- writing as few words as possible requires performing some length 63 -- checks up front 64 loop 65 | l `rem` 4 == 0 = go64 bs 66 | (l - 3) `rem` 4 == 0 = case BS.splitAt (l - 3) bs of 67 (chunk, etc) -> 68 go64 chunk 69 <> go32 (BU.unsafeTake 2 etc) 70 <> go16 (BU.unsafeDrop 2 etc) 71 | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of 72 (chunk, etc) -> go64 chunk <> go32 etc 73 | (l - 1) `rem` 4 == 0 = case BS.splitAt (l - 1) bs of 74 (chunk, etc) -> go64 chunk <> go16 etc 75 76 | l `rem` 2 == 0 = go32 bs 77 | (l - 1) `rem` 2 == 0 = case BS.splitAt (l - 1) bs of 78 (chunk, etc) -> go32 chunk <> go16 etc 79 80 | otherwise = go16 bs 81 82 go64 b = case BS.splitAt 4 b of 83 (chunk, etc) 84 | BS.null chunk -> mempty 85 | otherwise -> 86 let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) 87 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) 88 !w16_2 = expand_w8 (BU.unsafeIndex chunk 2) 89 !w16_3 = expand_w8 (BU.unsafeIndex chunk 3) 90 91 !w64 = fi w16_0 `B.shiftL` 48 92 .|. fi w16_1 `B.shiftL` 32 93 .|. fi w16_2 `B.shiftL` 16 94 .|. fi w16_3 95 96 in BSB.word64BE w64 <> go64 etc 97 98 go32 b = case BS.splitAt 2 b of 99 (chunk, etc) 100 | BS.null chunk -> mempty 101 | otherwise -> 102 let !w16_0 = expand_w8 (BU.unsafeIndex chunk 0) 103 !w16_1 = expand_w8 (BU.unsafeIndex chunk 1) 104 105 !w32 = fi w16_0 `B.shiftL` 16 106 .|. fi w16_1 107 108 in BSB.word32BE w32 <> go32 etc 109 110 go16 b = case BS.uncons b of 111 Nothing -> mempty 112 Just (h, t) -> 113 let !w16 = expand_w8 h 114 in BSB.word16BE w16 <> go16 t 115 116 word4 :: Word8 -> Maybe Word8 117 word4 w8 = fmap fi (BS.elemIndex w8 hex_charset) 118 {-# INLINE word4 #-} 119 120 -- | Decode a base16 'ByteString' to base256. 121 -- 122 -- Invalid inputs (including odd-length inputs) will produce 123 -- 'Nothing'. 124 -- 125 -- >>> decode "68656c6c6f20776f726c64" 126 -- Just "hello world" 127 -- >>> decode "068656c6c6f20776f726c64" -- odd-length 128 -- Nothing 129 decode :: BS.ByteString -> Maybe BS.ByteString 130 decode bs@(BI.PS _ _ l) 131 | B.testBit l 0 = Nothing 132 | l `quot` 2 < 128 = fmap to_strict_small loop 133 | otherwise = fmap to_strict loop 134 where 135 -- same story, but we need more checks 136 loop 137 | l `rem` 16 == 0 = go64 mempty bs 138 | (l - 2) `rem` 16 == 0 = case BS.splitAt (l - 2) bs of 139 (chunk, etc) -> do 140 b0 <- go64 mempty chunk 141 go8 b0 etc 142 | (l - 4) `rem` 16 == 0 = case BS.splitAt (l - 4) bs of 143 (chunk, etc) -> do 144 b0 <- go64 mempty chunk 145 go16 b0 etc 146 | (l - 6) `rem` 16 == 0 = case BS.splitAt (l - 6) bs of 147 (chunk, etc) -> do 148 b0 <- go64 mempty chunk 149 b1 <- go16 b0 (BU.unsafeTake 4 etc) 150 go8 b1 (BU.unsafeDrop 4 etc) 151 | (l - 8) `rem` 16 == 0 = case BS.splitAt (l - 8) bs of 152 (chunk, etc) -> do 153 b0 <- go64 mempty chunk 154 go32 b0 etc 155 | (l - 10) `rem` 16 == 0 = case BS.splitAt (l - 10) bs of 156 (chunk, etc) -> do 157 b0 <- go64 mempty chunk 158 b1 <- go32 b0 (BU.unsafeTake 8 etc) 159 go8 b1 (BU.unsafeDrop 8 etc) 160 | (l - 12) `rem` 16 == 0 = case BS.splitAt (l - 12) bs of 161 (chunk, etc) -> do 162 b0 <- go64 mempty chunk 163 b1 <- go32 b0 (BU.unsafeTake 8 etc) 164 go16 b1 (BU.unsafeDrop 8 etc) 165 | (l - 14) `rem` 16 == 0 = case BS.splitAt (l - 14) bs of 166 (chunk, etc) -> do 167 b0 <- go64 mempty chunk 168 b1 <- go32 b0 (BU.unsafeTake 8 etc) 169 b2 <- go16 b1 (BU.unsafeTake 4 (BU.unsafeDrop 8 etc)) 170 go8 b2 (BU.unsafeDrop 12 etc) 171 172 | l `rem` 8 == 0 = go32 mempty bs 173 | (l - 2) `rem` 8 == 0 = case BS.splitAt (l - 2) bs of 174 (chunk, etc) -> do 175 b0 <- go32 mempty chunk 176 go8 b0 etc 177 | (l - 4) `rem` 8 == 0 = case BS.splitAt (l - 4) bs of 178 (chunk, etc) -> do 179 b0 <- go32 mempty chunk 180 go16 b0 etc 181 | (l - 6) `rem` 8 == 0 = case BS.splitAt (l - 6) bs of 182 (chunk, etc) -> do 183 b0 <- go32 mempty chunk 184 b1 <- go16 b0 (BU.unsafeTake 4 etc) 185 go8 b1 (BU.unsafeDrop 4 etc) 186 187 | l `rem` 4 == 0 = go16 mempty bs 188 | (l - 2) `rem` 4 == 0 = case BS.splitAt (l - 2) bs of 189 (chunk, etc) -> do 190 b0 <- go16 mempty chunk 191 go8 b0 etc 192 193 | otherwise = go8 mempty bs 194 195 go64 acc b = case BS.splitAt 16 b of 196 (chunk, etc) 197 | BS.null chunk -> pure acc 198 | otherwise -> do 199 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 200 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 201 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 202 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 203 !w4_04 <- word4 (BU.unsafeIndex chunk 04) 204 !w4_05 <- word4 (BU.unsafeIndex chunk 05) 205 !w4_06 <- word4 (BU.unsafeIndex chunk 06) 206 !w4_07 <- word4 (BU.unsafeIndex chunk 07) 207 !w4_08 <- word4 (BU.unsafeIndex chunk 08) 208 !w4_09 <- word4 (BU.unsafeIndex chunk 09) 209 !w4_10 <- word4 (BU.unsafeIndex chunk 10) 210 !w4_11 <- word4 (BU.unsafeIndex chunk 11) 211 !w4_12 <- word4 (BU.unsafeIndex chunk 12) 212 !w4_13 <- word4 (BU.unsafeIndex chunk 13) 213 !w4_14 <- word4 (BU.unsafeIndex chunk 14) 214 !w4_15 <- word4 (BU.unsafeIndex chunk 15) 215 216 let !w64 = fi w4_00 `B.shiftL` 60 217 .|. fi w4_01 `B.shiftL` 56 218 .|. fi w4_02 `B.shiftL` 52 219 .|. fi w4_03 `B.shiftL` 48 220 .|. fi w4_04 `B.shiftL` 44 221 .|. fi w4_05 `B.shiftL` 40 222 .|. fi w4_06 `B.shiftL` 36 223 .|. fi w4_07 `B.shiftL` 32 224 .|. fi w4_08 `B.shiftL` 28 225 .|. fi w4_09 `B.shiftL` 24 226 .|. fi w4_10 `B.shiftL` 20 227 .|. fi w4_11 `B.shiftL` 16 228 .|. fi w4_12 `B.shiftL` 12 229 .|. fi w4_13 `B.shiftL` 08 230 .|. fi w4_14 `B.shiftL` 04 231 .|. fi w4_15 232 233 go64 (acc <> BSB.word64BE w64) etc 234 235 go32 acc b = case BS.splitAt 8 b of 236 (chunk, etc) 237 | BS.null chunk -> pure acc 238 | otherwise -> do 239 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 240 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 241 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 242 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 243 !w4_04 <- word4 (BU.unsafeIndex chunk 04) 244 !w4_05 <- word4 (BU.unsafeIndex chunk 05) 245 !w4_06 <- word4 (BU.unsafeIndex chunk 06) 246 !w4_07 <- word4 (BU.unsafeIndex chunk 07) 247 248 let !w32 = fi w4_00 `B.shiftL` 28 249 .|. fi w4_01 `B.shiftL` 24 250 .|. fi w4_02 `B.shiftL` 20 251 .|. fi w4_03 `B.shiftL` 16 252 .|. fi w4_04 `B.shiftL` 12 253 .|. fi w4_05 `B.shiftL` 08 254 .|. fi w4_06 `B.shiftL` 04 255 .|. fi w4_07 256 257 go32 (acc <> BSB.word32BE w32) etc 258 259 go16 acc b = case BS.splitAt 4 b of 260 (chunk, etc) 261 | BS.null chunk -> pure acc 262 | otherwise -> do 263 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 264 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 265 !w4_02 <- word4 (BU.unsafeIndex chunk 02) 266 !w4_03 <- word4 (BU.unsafeIndex chunk 03) 267 268 let !w16 = fi w4_00 `B.shiftL` 12 269 .|. fi w4_01 `B.shiftL` 08 270 .|. fi w4_02 `B.shiftL` 04 271 .|. fi w4_03 272 273 go16 (acc <> BSB.word16BE w16) etc 274 275 go8 acc b = case BS.splitAt 2 b of 276 (chunk, etc) 277 | BS.null chunk -> pure acc 278 | otherwise -> do 279 !w4_00 <- word4 (BU.unsafeIndex chunk 00) 280 !w4_01 <- word4 (BU.unsafeIndex chunk 01) 281 282 let !w8 = fi w4_00 `B.shiftL` 04 283 .|. fi w4_01 284 285 go8 (acc <> BSB.word8 w8) etc 286