Tx.hs (16203B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE BangPatterns #-} 3 {-# LANGUAGE DeriveGeneric #-} 4 {-# LANGUAGE OverloadedStrings #-} 5 {-# LANGUAGE RecordWildCards #-} 6 7 -- | 8 -- Module: Bitcoin.Prim.Tx 9 -- Copyright: (c) 2025 Jared Tobin 10 -- License: MIT 11 -- Maintainer: Jared Tobin <jared@ppad.tech> 12 -- 13 -- Minimal Bitcoin transaction primitives, including raw transaction 14 -- types, serialisation to/from bytes, and txid computation. 15 16 module Bitcoin.Prim.Tx ( 17 -- * Transaction Types 18 Tx(..) 19 , TxIn(..) 20 , TxOut(..) 21 , OutPoint(..) 22 , Witness(..) 23 , TxId(..) 24 , mkTxId 25 26 -- * Serialisation 27 , to_bytes 28 , from_bytes 29 , to_bytes_legacy 30 , to_base16 31 , from_base16 32 33 -- * TxId 34 , txid 35 36 -- * Internal (for Sighash) 37 , put_word32_le 38 , put_word64_le 39 , put_compact 40 , put_outpoint 41 , put_txout 42 , to_strict 43 ) where 44 45 import Control.DeepSeq (NFData(..)) 46 import qualified Crypto.Hash.SHA256 as SHA256 47 import Data.Bits ((.|.), shiftL) 48 import qualified Data.ByteString as BS 49 import qualified Data.ByteString.Base16 as B16 50 import qualified Data.ByteString.Builder as BSB 51 import qualified Data.ByteString.Lazy as BL 52 import Data.List.NonEmpty (NonEmpty(..)) 53 import qualified Data.List.NonEmpty as NE 54 import Data.Word (Word32, Word64) 55 import GHC.Generics (Generic) 56 57 -- | Transaction ID (32 bytes, little-endian double-SHA256). 58 newtype TxId = TxId BS.ByteString 59 deriving (Eq, Show, Generic) 60 61 instance NFData TxId 62 63 -- | Construct a TxId from a 32-byte ByteString. 64 -- 65 -- Returns 'Nothing' if the input is not exactly 32 bytes. 66 -- 67 -- @ 68 -- mkTxId (BS.replicate 32 0x00) == Just (TxId ...) 69 -- mkTxId (BS.replicate 31 0x00) == Nothing 70 -- @ 71 mkTxId :: BS.ByteString -> Maybe TxId 72 mkTxId bs 73 | BS.length bs == 32 = Just (TxId bs) 74 | otherwise = Nothing 75 76 -- | Transaction outpoint (txid + output index). 77 data OutPoint = OutPoint 78 { op_txid :: {-# UNPACK #-} !TxId 79 , op_vout :: {-# UNPACK #-} !Word32 80 } deriving (Eq, Show, Generic) 81 82 instance NFData OutPoint 83 84 -- | Transaction input. 85 data TxIn = TxIn 86 { txin_prevout :: {-# UNPACK #-} !OutPoint 87 , txin_script_sig :: !BS.ByteString 88 , txin_sequence :: {-# UNPACK #-} !Word32 89 } deriving (Eq, Show, Generic) 90 91 instance NFData TxIn 92 93 -- | Transaction output. 94 data TxOut = TxOut 95 { txout_value :: {-# UNPACK #-} !Word64 -- ^ satoshis 96 , txout_script_pubkey :: !BS.ByteString 97 } deriving (Eq, Show, Generic) 98 99 instance NFData TxOut 100 101 -- | Witness stack for a single input. 102 newtype Witness = Witness [BS.ByteString] 103 deriving (Eq, Show, Generic) 104 105 instance NFData Witness 106 107 -- | Complete transaction. 108 -- 109 -- Bitcoin requires at least one input and one output, enforced here 110 -- via 'NonEmpty' lists. 111 data Tx = Tx 112 { tx_version :: {-# UNPACK #-} !Word32 113 , tx_inputs :: !(NonEmpty TxIn) 114 , tx_outputs :: !(NonEmpty TxOut) 115 , tx_witnesses :: ![Witness] -- ^ empty list for legacy tx 116 , tx_locktime :: {-# UNPACK #-} !Word32 117 } deriving (Eq, Show, Generic) 118 119 instance NFData Tx 120 121 -- serialisation --------------------------------------------------------------- 122 123 -- | Serialise a transaction to bytes. 124 -- 125 -- Uses segwit format if witnesses are present, legacy otherwise. 126 -- 127 -- @ 128 -- -- round-trip 129 -- from_bytes (to_bytes tx) == Just tx 130 -- @ 131 to_bytes :: Tx -> BS.ByteString 132 to_bytes tx@Tx {..} 133 | null tx_witnesses = to_bytes_legacy tx 134 | otherwise = to_strict $ 135 put_word32_le tx_version 136 <> BSB.word8 0x00 -- marker 137 <> BSB.word8 0x01 -- flag 138 <> put_compact (fromIntegral (NE.length tx_inputs)) 139 <> foldMap put_txin tx_inputs 140 <> put_compact (fromIntegral (NE.length tx_outputs)) 141 <> foldMap put_txout tx_outputs 142 <> foldMap put_witness tx_witnesses 143 <> put_word32_le tx_locktime 144 145 -- | Serialise a transaction to legacy format (no witness data). 146 -- 147 -- Used for txid computation. Excludes witness data even if present. 148 -- 149 -- @ 150 -- -- for legacy tx (no witnesses), same as to_bytes 151 -- to_bytes_legacy legacyTx == to_bytes legacyTx 152 -- 153 -- -- for segwit tx, strips witnesses 154 -- BS.length (to_bytes_legacy segwitTx) < BS.length (to_bytes segwitTx) 155 -- @ 156 to_bytes_legacy :: Tx -> BS.ByteString 157 to_bytes_legacy Tx {..} = to_strict $ 158 put_word32_le tx_version 159 <> put_compact (fromIntegral (NE.length tx_inputs)) 160 <> foldMap put_txin tx_inputs 161 <> put_compact (fromIntegral (NE.length tx_outputs)) 162 <> foldMap put_txout tx_outputs 163 <> put_word32_le tx_locktime 164 165 -- | Serialise a transaction to base16 (hex). 166 -- 167 -- @ 168 -- to_base16 tx = B16.encode (to_bytes tx) 169 -- @ 170 to_base16 :: Tx -> BS.ByteString 171 to_base16 tx = B16.encode (to_bytes tx) 172 173 -- | Parse a transaction from base16 (hex). 174 -- 175 -- @ 176 -- -- round-trip 177 -- from_base16 (to_base16 tx) == Just tx 178 -- @ 179 from_base16 :: BS.ByteString -> Maybe Tx 180 from_base16 b16 = do 181 bs <- B16.decode b16 182 from_bytes bs 183 184 -- internal: builders ---------------------------------------------------------- 185 186 -- | Convert a Builder to a strict ByteString. 187 to_strict :: BSB.Builder -> BS.ByteString 188 to_strict = BL.toStrict . BSB.toLazyByteString 189 {-# INLINE to_strict #-} 190 191 -- | Encode a Word32 as little-endian bytes. 192 put_word32_le :: Word32 -> BSB.Builder 193 put_word32_le = BSB.word32LE 194 {-# INLINE put_word32_le #-} 195 196 -- | Encode a Word64 as little-endian bytes. 197 put_word64_le :: Word64 -> BSB.Builder 198 put_word64_le = BSB.word64LE 199 {-# INLINE put_word64_le #-} 200 201 -- | Encode a Word64 as Bitcoin compactSize (varint). 202 -- 203 -- Encoding: 204 -- - 0x00-0xfc: 1 byte (value itself) 205 -- - 0xfd-0xffff: 0xfd ++ 2 bytes LE 206 -- - 0x10000-0xffffffff: 0xfe ++ 4 bytes LE 207 -- - larger: 0xff ++ 8 bytes LE 208 put_compact :: Word64 -> BSB.Builder 209 put_compact !n 210 | n <= 0xfc = BSB.word8 (fromIntegral n) 211 | n <= 0xffff = BSB.word8 0xfd <> BSB.word16LE (fromIntegral n) 212 | n <= 0xffffffff = BSB.word8 0xfe <> BSB.word32LE (fromIntegral n) 213 | otherwise = BSB.word8 0xff <> BSB.word64LE n 214 {-# INLINE put_compact #-} 215 216 -- | Encode an OutPoint (txid + vout). 217 put_outpoint :: OutPoint -> BSB.Builder 218 put_outpoint OutPoint {..} = 219 let !(TxId !txid_bs) = op_txid 220 in BSB.byteString txid_bs <> put_word32_le op_vout 221 {-# INLINE put_outpoint #-} 222 223 -- | Encode a TxIn. 224 put_txin :: TxIn -> BSB.Builder 225 put_txin TxIn {..} = 226 put_outpoint txin_prevout 227 <> put_compact (fromIntegral (BS.length txin_script_sig)) 228 <> BSB.byteString txin_script_sig 229 <> put_word32_le txin_sequence 230 {-# INLINE put_txin #-} 231 232 -- | Encode a TxOut. 233 put_txout :: TxOut -> BSB.Builder 234 put_txout TxOut {..} = 235 put_word64_le txout_value 236 <> put_compact (fromIntegral (BS.length txout_script_pubkey)) 237 <> BSB.byteString txout_script_pubkey 238 {-# INLINE put_txout #-} 239 240 -- | Encode a Witness stack. 241 put_witness :: Witness -> BSB.Builder 242 put_witness (Witness items) = 243 put_compact (fromIntegral (length items)) 244 <> foldMap put_witness_item items 245 where 246 put_witness_item :: BS.ByteString -> BSB.Builder 247 put_witness_item !item = 248 put_compact (fromIntegral (BS.length item)) 249 <> BSB.byteString item 250 {-# INLINE put_witness #-} 251 252 -- decoding -------------------------------------------------------------------- 253 254 -- | Parse a transaction from bytes. 255 -- 256 -- Automatically detects segwit vs legacy format by checking for 257 -- marker byte 0x00 followed by flag 0x01 after the version field. 258 -- 259 -- Returns 'Nothing' on invalid or truncated input. 260 -- 261 -- @ 262 -- -- round-trip 263 -- from_bytes (to_bytes tx) == Just tx 264 -- @ 265 from_bytes :: BS.ByteString -> Maybe Tx 266 from_bytes !bs = do 267 -- need at least 4 bytes for version 268 guard (BS.length bs >= 4) 269 let !version = get_word32_le bs 0 270 !off0 = 4 271 -- check for segwit marker (0x00) and flag (0x01) 272 if BS.length bs > off0 + 1 273 && BS.index bs off0 == 0x00 274 && BS.index bs (off0 + 1) == 0x01 275 then parse_segwit bs version (off0 + 2) 276 else parse_legacy bs version off0 277 278 -- Parse legacy transaction (no witness data) 279 parse_legacy :: BS.ByteString -> Word32 -> Int -> Maybe Tx 280 parse_legacy !bs !version !off0 = do 281 -- input count 282 (input_count, off1) <- get_compact bs off0 283 -- inputs (must have at least one) 284 (inputs_list, off2) <- get_many get_txin bs off1 (fromIntegral input_count) 285 inputs <- NE.nonEmpty inputs_list 286 -- output count 287 (output_count, off3) <- get_compact bs off2 288 -- outputs (must have at least one) 289 (outputs_list, off4) <- get_many get_txout bs off3 (fromIntegral output_count) 290 outputs <- NE.nonEmpty outputs_list 291 -- locktime (4 bytes) 292 guard (BS.length bs >= off4 + 4) 293 let !locktime = get_word32_le bs off4 294 !off5 = off4 + 4 295 -- should have consumed all bytes 296 guard (off5 == BS.length bs) 297 pure $! Tx version inputs outputs [] locktime 298 299 -- Parse segwit transaction (with witness data) 300 parse_segwit :: BS.ByteString -> Word32 -> Int -> Maybe Tx 301 parse_segwit !bs !version !off0 = do 302 -- input count 303 (input_count, off1) <- get_compact bs off0 304 -- inputs (must have at least one) 305 (inputs_list, off2) <- get_many get_txin bs off1 (fromIntegral input_count) 306 inputs <- NE.nonEmpty inputs_list 307 -- output count 308 (output_count, off3) <- get_compact bs off2 309 -- outputs (must have at least one) 310 (outputs_list, off4) <- get_many get_txout bs off3 (fromIntegral output_count) 311 outputs <- NE.nonEmpty outputs_list 312 -- witnesses (one per input) 313 (witnesses, off5) <- get_many get_witness bs off4 (fromIntegral input_count) 314 -- locktime (4 bytes) 315 guard (BS.length bs >= off5 + 4) 316 let !locktime = get_word32_le bs off5 317 !off6 = off5 + 4 318 -- should have consumed all bytes 319 guard (off6 == BS.length bs) 320 pure $! Tx version inputs outputs witnesses locktime 321 322 -- internal helpers ------------------------------------------------------------ 323 324 -- | Guard for Maybe monad. 325 guard :: Bool -> Maybe () 326 guard True = Just () 327 guard False = Nothing 328 {-# INLINE guard #-} 329 330 -- | Decode a 32-bit little-endian word at the given offset. 331 -- Does not bounds-check; caller must ensure sufficient bytes. 332 get_word32_le :: BS.ByteString -> Int -> Word32 333 get_word32_le !bs !off = 334 let !b0 = fromIntegral (BS.index bs off) :: Word32 335 !b1 = fromIntegral (BS.index bs (off + 1)) :: Word32 336 !b2 = fromIntegral (BS.index bs (off + 2)) :: Word32 337 !b3 = fromIntegral (BS.index bs (off + 3)) :: Word32 338 in b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24) 339 {-# INLINE get_word32_le #-} 340 341 -- | Decode a 64-bit little-endian word at the given offset. 342 -- Does not bounds-check; caller must ensure sufficient bytes. 343 get_word64_le :: BS.ByteString -> Int -> Word64 344 get_word64_le !bs !off = 345 let !b0 = fromIntegral (BS.index bs off) :: Word64 346 !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64 347 !b2 = fromIntegral (BS.index bs (off + 2)) :: Word64 348 !b3 = fromIntegral (BS.index bs (off + 3)) :: Word64 349 !b4 = fromIntegral (BS.index bs (off + 4)) :: Word64 350 !b5 = fromIntegral (BS.index bs (off + 5)) :: Word64 351 !b6 = fromIntegral (BS.index bs (off + 6)) :: Word64 352 !b7 = fromIntegral (BS.index bs (off + 7)) :: Word64 353 in b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24) 354 .|. (b4 `shiftL` 32) .|. (b5 `shiftL` 40) 355 .|. (b6 `shiftL` 48) .|. (b7 `shiftL` 56) 356 {-# INLINE get_word64_le #-} 357 358 -- | Decode a 16-bit little-endian word at the given offset. 359 -- Does not bounds-check; caller must ensure sufficient bytes. 360 get_word16_le :: BS.ByteString -> Int -> Word64 361 get_word16_le !bs !off = 362 let !b0 = fromIntegral (BS.index bs off) :: Word64 363 !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64 364 in b0 .|. (b1 `shiftL` 8) 365 {-# INLINE get_word16_le #-} 366 367 -- | Decode compactSize (Bitcoin's variable-length integer). 368 -- Returns (value, new_offset). 369 -- Enforces minimal encoding: rejects non-minimal representations. 370 get_compact :: BS.ByteString -> Int -> Maybe (Word64, Int) 371 get_compact !bs !off 372 | off >= BS.length bs = Nothing 373 | otherwise = case BS.index bs off of 374 tag | tag <= 0xfc -> 375 -- Single byte: value is the tag itself 376 Just (fromIntegral tag, off + 1) 377 378 0xfd -> 379 -- 2-byte value follows 380 if BS.length bs < off + 3 381 then Nothing 382 else 383 let !val = get_word16_le bs (off + 1) 384 in if val < 0xfd 385 then Nothing -- non-minimal encoding 386 else Just (val, off + 3) 387 388 0xfe -> 389 -- 4-byte value follows 390 if BS.length bs < off + 5 391 then Nothing 392 else 393 let !val = fromIntegral (get_word32_le bs (off + 1)) :: Word64 394 in if val <= 0xffff 395 then Nothing -- non-minimal encoding 396 else Just (val, off + 5) 397 398 _ -> -- 0xff 399 -- 8-byte value follows 400 if BS.length bs < off + 9 401 then Nothing 402 else 403 let !val = get_word64_le bs (off + 1) 404 in if val <= 0xffffffff 405 then Nothing -- non-minimal encoding 406 else Just (val, off + 9) 407 {-# INLINE get_compact #-} 408 409 -- | Decode an outpoint (txid + vout). 410 -- Returns (OutPoint, new_offset). 411 get_outpoint :: BS.ByteString -> Int -> Maybe (OutPoint, Int) 412 get_outpoint !bs !off 413 | BS.length bs < off + 36 = Nothing 414 | otherwise = 415 let !txid_bytes = BS.take 32 (BS.drop off bs) 416 !vout = get_word32_le bs (off + 32) 417 in Just (OutPoint (TxId txid_bytes) vout, off + 36) 418 {-# INLINE get_outpoint #-} 419 420 -- | Decode a transaction input. 421 -- Returns (TxIn, new_offset). 422 get_txin :: BS.ByteString -> Int -> Maybe (TxIn, Int) 423 get_txin !bs !off0 = do 424 -- outpoint: 36 bytes 425 (outpoint, off1) <- get_outpoint bs off0 426 -- scriptSig length + bytes 427 (script_len, off2) <- get_compact bs off1 428 let !slen = fromIntegral script_len 429 guard (BS.length bs >= off2 + slen) 430 let !script_sig = BS.take slen (BS.drop off2 bs) 431 !off3 = off2 + slen 432 -- sequence: 4 bytes 433 guard (BS.length bs >= off3 + 4) 434 let !seqn = get_word32_le bs off3 435 !off4 = off3 + 4 436 pure (TxIn outpoint script_sig seqn, off4) 437 438 -- | Decode a transaction output. 439 -- Returns (TxOut, new_offset). 440 get_txout :: BS.ByteString -> Int -> Maybe (TxOut, Int) 441 get_txout !bs !off0 = do 442 -- value: 8 bytes 443 guard (BS.length bs >= off0 + 8) 444 let !value = get_word64_le bs off0 445 !off1 = off0 + 8 446 -- scriptPubKey length + bytes 447 (script_len, off2) <- get_compact bs off1 448 let !slen = fromIntegral script_len 449 guard (BS.length bs >= off2 + slen) 450 let !script_pk = BS.take slen (BS.drop off2 bs) 451 !off3 = off2 + slen 452 pure (TxOut value script_pk, off3) 453 454 -- | Decode a witness stack for one input. 455 -- Returns (Witness, new_offset). 456 get_witness :: BS.ByteString -> Int -> Maybe (Witness, Int) 457 get_witness !bs !off0 = do 458 -- stack item count 459 (item_count, off1) <- get_compact bs off0 460 -- each item: length + bytes 461 (items, off2) <- get_many get_witness_item bs off1 (fromIntegral item_count) 462 pure (Witness items, off2) 463 464 -- | Decode a single witness stack item (length-prefixed bytes). 465 get_witness_item :: BS.ByteString -> Int -> Maybe (BS.ByteString, Int) 466 get_witness_item !bs !off0 = do 467 (item_len, off1) <- get_compact bs off0 468 let !ilen = fromIntegral item_len 469 guard (BS.length bs >= off1 + ilen) 470 let !item = BS.take ilen (BS.drop off1 bs) 471 pure (item, off1 + ilen) 472 473 -- | Decode multiple items using a decoder function. 474 -- Returns (list of items, new_offset). 475 get_many :: (BS.ByteString -> Int -> Maybe (a, Int)) 476 -> BS.ByteString -> Int -> Int -> Maybe ([a], Int) 477 get_many getter !bs = go [] 478 where 479 go !acc !off !n 480 | n <= 0 = Just (reverse acc, off) 481 | otherwise = do 482 (item, off') <- getter bs off 483 go (item : acc) off' (n - 1) 484 {-# INLINE get_many #-} 485 486 -- txid ------------------------------------------------------------------------ 487 488 -- | Compute the transaction ID (double SHA256 of legacy serialisation). 489 -- 490 -- The txid is computed from the legacy serialisation, so segwit 491 -- transactions have the same txid regardless of witness data. 492 -- 493 -- @ 494 -- -- Satoshi->Hal tx (block 170) 495 -- txid satoshiHalTx == 496 -- TxId "f4184fc596403b9d638783cf57adfe4c75c605f6356fbc91338530e9831e9e16" 497 -- @ 498 txid :: Tx -> TxId 499 txid tx = TxId (SHA256.hash (SHA256.hash (to_bytes_legacy tx)))