tx

Minimal Bitcoin transaction primitives (docs.ppad.tech/tx).
git clone git://git.ppad.tech/tx.git
Log | Files | Refs | README | LICENSE

commit d26828b5c441c3db23a323f18e665a4f6bae02cb
parent baa2031ec129e3fbb3038195cdc241d190585b09
Author: Jared Tobin <jared@jtobin.io>
Date:   Sun, 25 Jan 2026 17:52:25 +0400

Implement transaction parsing (decoding)

Add decoding functions for Bitcoin transactions:
- get_compact: compactSize decoding with minimal encoding enforcement
- get_word32_le, get_word64_le, get_word16_le: little-endian word decoding
- get_outpoint, get_txin, get_txout, get_witness: component decoders
- from_bytes: full tx parsing with segwit/legacy format detection

Segwit detection via marker byte 0x00 followed by flag 0x01 after version.
Uses explicit offset tracking without external parser libraries.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Diffstat:
Mlib/Bitcoin/Prim/Tx.hs | 228+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 224 insertions(+), 4 deletions(-)

diff --git a/lib/Bitcoin/Prim/Tx.hs b/lib/Bitcoin/Prim/Tx.hs @@ -32,6 +32,7 @@ module Bitcoin.Prim.Tx ( , txid ) where +import Data.Bits ((.|.), shiftL) import qualified Data.ByteString as BS import qualified Data.ByteString.Base16 as B16 import Data.Word (Word32, Word64) @@ -81,10 +82,6 @@ data Tx = Tx to_bytes :: Tx -> BS.ByteString to_bytes = error "Bitcoin.Prim.Tx.to_bytes: not yet implemented" --- | Parse a transaction from bytes. -from_bytes :: BS.ByteString -> Maybe Tx -from_bytes = error "Bitcoin.Prim.Tx.from_bytes: not yet implemented" - -- | Serialise a transaction to legacy format (no witness data). -- -- Used for txid computation. @@ -101,6 +98,229 @@ from_base16 b16 = do bs <- B16.decode b16 from_bytes bs +-- decoding -------------------------------------------------------------------- + +-- | Parse a transaction from bytes. +-- +-- Automatically detects segwit vs legacy format by checking for +-- marker byte 0x00 followed by flag 0x01 after the version field. +from_bytes :: BS.ByteString -> Maybe Tx +from_bytes !bs = do + -- need at least 4 bytes for version + guard (BS.length bs >= 4) + let !version = get_word32_le bs 0 + !off0 = 4 + -- check for segwit marker (0x00) and flag (0x01) + if BS.length bs > off0 + 1 + && BS.index bs off0 == 0x00 + && BS.index bs (off0 + 1) == 0x01 + then parse_segwit bs version (off0 + 2) + else parse_legacy bs version off0 + +-- Parse legacy transaction (no witness data) +parse_legacy :: BS.ByteString -> Word32 -> Int -> Maybe Tx +parse_legacy !bs !version !off0 = do + -- input count + (input_count, off1) <- get_compact bs off0 + -- inputs + (inputs, off2) <- get_many get_txin bs off1 (fromIntegral input_count) + -- output count + (output_count, off3) <- get_compact bs off2 + -- outputs + (outputs, off4) <- get_many get_txout bs off3 (fromIntegral output_count) + -- locktime (4 bytes) + guard (BS.length bs >= off4 + 4) + let !locktime = get_word32_le bs off4 + !off5 = off4 + 4 + -- should have consumed all bytes + guard (off5 == BS.length bs) + pure $! Tx version inputs outputs [] locktime + +-- Parse segwit transaction (with witness data) +parse_segwit :: BS.ByteString -> Word32 -> Int -> Maybe Tx +parse_segwit !bs !version !off0 = do + -- input count + (input_count, off1) <- get_compact bs off0 + -- inputs + (inputs, off2) <- get_many get_txin bs off1 (fromIntegral input_count) + -- output count + (output_count, off3) <- get_compact bs off2 + -- outputs + (outputs, off4) <- get_many get_txout bs off3 (fromIntegral output_count) + -- witnesses (one per input) + (witnesses, off5) <- get_many get_witness bs off4 (fromIntegral input_count) + -- locktime (4 bytes) + guard (BS.length bs >= off5 + 4) + let !locktime = get_word32_le bs off5 + !off6 = off5 + 4 + -- should have consumed all bytes + guard (off6 == BS.length bs) + pure $! Tx version inputs outputs witnesses locktime + +-- internal helpers ------------------------------------------------------------ + +-- | Guard for Maybe monad. +guard :: Bool -> Maybe () +guard True = Just () +guard False = Nothing +{-# INLINE guard #-} + +-- | Decode a 32-bit little-endian word at the given offset. +-- Does not bounds-check; caller must ensure sufficient bytes. +get_word32_le :: BS.ByteString -> Int -> Word32 +get_word32_le !bs !off = + let !b0 = fromIntegral (BS.index bs off) :: Word32 + !b1 = fromIntegral (BS.index bs (off + 1)) :: Word32 + !b2 = fromIntegral (BS.index bs (off + 2)) :: Word32 + !b3 = fromIntegral (BS.index bs (off + 3)) :: Word32 + in b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24) +{-# INLINE get_word32_le #-} + +-- | Decode a 64-bit little-endian word at the given offset. +-- Does not bounds-check; caller must ensure sufficient bytes. +get_word64_le :: BS.ByteString -> Int -> Word64 +get_word64_le !bs !off = + let !b0 = fromIntegral (BS.index bs off) :: Word64 + !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64 + !b2 = fromIntegral (BS.index bs (off + 2)) :: Word64 + !b3 = fromIntegral (BS.index bs (off + 3)) :: Word64 + !b4 = fromIntegral (BS.index bs (off + 4)) :: Word64 + !b5 = fromIntegral (BS.index bs (off + 5)) :: Word64 + !b6 = fromIntegral (BS.index bs (off + 6)) :: Word64 + !b7 = fromIntegral (BS.index bs (off + 7)) :: Word64 + in b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24) + .|. (b4 `shiftL` 32) .|. (b5 `shiftL` 40) + .|. (b6 `shiftL` 48) .|. (b7 `shiftL` 56) +{-# INLINE get_word64_le #-} + +-- | Decode a 16-bit little-endian word at the given offset. +-- Does not bounds-check; caller must ensure sufficient bytes. +get_word16_le :: BS.ByteString -> Int -> Word64 +get_word16_le !bs !off = + let !b0 = fromIntegral (BS.index bs off) :: Word64 + !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64 + in b0 .|. (b1 `shiftL` 8) +{-# INLINE get_word16_le #-} + +-- | Decode compactSize (Bitcoin's variable-length integer). +-- Returns (value, new_offset). +-- Enforces minimal encoding: rejects non-minimal representations. +get_compact :: BS.ByteString -> Int -> Maybe (Word64, Int) +get_compact !bs !off + | off >= BS.length bs = Nothing + | otherwise = case BS.index bs off of + tag | tag <= 0xfc -> + -- Single byte: value is the tag itself + Just (fromIntegral tag, off + 1) + + 0xfd -> + -- 2-byte value follows + if BS.length bs < off + 3 + then Nothing + else + let !val = get_word16_le bs (off + 1) + in if val < 0xfd + then Nothing -- non-minimal encoding + else Just (val, off + 3) + + 0xfe -> + -- 4-byte value follows + if BS.length bs < off + 5 + then Nothing + else + let !val = fromIntegral (get_word32_le bs (off + 1)) :: Word64 + in if val <= 0xffff + then Nothing -- non-minimal encoding + else Just (val, off + 5) + + _ -> -- 0xff + -- 8-byte value follows + if BS.length bs < off + 9 + then Nothing + else + let !val = get_word64_le bs (off + 1) + in if val <= 0xffffffff + then Nothing -- non-minimal encoding + else Just (val, off + 9) +{-# INLINE get_compact #-} + +-- | Decode an outpoint (txid + vout). +-- Returns (OutPoint, new_offset). +get_outpoint :: BS.ByteString -> Int -> Maybe (OutPoint, Int) +get_outpoint !bs !off + | BS.length bs < off + 36 = Nothing + | otherwise = + let !txid_bytes = BS.take 32 (BS.drop off bs) + !vout = get_word32_le bs (off + 32) + in Just (OutPoint (TxId txid_bytes) vout, off + 36) +{-# INLINE get_outpoint #-} + +-- | Decode a transaction input. +-- Returns (TxIn, new_offset). +get_txin :: BS.ByteString -> Int -> Maybe (TxIn, Int) +get_txin !bs !off0 = do + -- outpoint: 36 bytes + (outpoint, off1) <- get_outpoint bs off0 + -- scriptSig length + bytes + (script_len, off2) <- get_compact bs off1 + let !slen = fromIntegral script_len + guard (BS.length bs >= off2 + slen) + let !script_sig = BS.take slen (BS.drop off2 bs) + !off3 = off2 + slen + -- sequence: 4 bytes + guard (BS.length bs >= off3 + 4) + let !seqn = get_word32_le bs off3 + !off4 = off3 + 4 + pure (TxIn outpoint script_sig seqn, off4) + +-- | Decode a transaction output. +-- Returns (TxOut, new_offset). +get_txout :: BS.ByteString -> Int -> Maybe (TxOut, Int) +get_txout !bs !off0 = do + -- value: 8 bytes + guard (BS.length bs >= off0 + 8) + let !value = get_word64_le bs off0 + !off1 = off0 + 8 + -- scriptPubKey length + bytes + (script_len, off2) <- get_compact bs off1 + let !slen = fromIntegral script_len + guard (BS.length bs >= off2 + slen) + let !script_pk = BS.take slen (BS.drop off2 bs) + !off3 = off2 + slen + pure (TxOut value script_pk, off3) + +-- | Decode a witness stack for one input. +-- Returns (Witness, new_offset). +get_witness :: BS.ByteString -> Int -> Maybe (Witness, Int) +get_witness !bs !off0 = do + -- stack item count + (item_count, off1) <- get_compact bs off0 + -- each item: length + bytes + (items, off2) <- get_many get_witness_item bs off1 (fromIntegral item_count) + pure (Witness items, off2) + +-- | Decode a single witness stack item (length-prefixed bytes). +get_witness_item :: BS.ByteString -> Int -> Maybe (BS.ByteString, Int) +get_witness_item !bs !off0 = do + (item_len, off1) <- get_compact bs off0 + let !ilen = fromIntegral item_len + guard (BS.length bs >= off1 + ilen) + let !item = BS.take ilen (BS.drop off1 bs) + pure (item, off1 + ilen) + +-- | Decode multiple items using a decoder function. +-- Returns (list of items, new_offset). +get_many :: (BS.ByteString -> Int -> Maybe (a, Int)) + -> BS.ByteString -> Int -> Int -> Maybe ([a], Int) +get_many getter !bs = go [] + where + go !acc !off !n + | n <= 0 = Just (reverse acc, off) + | otherwise = do + (item, off') <- getter bs off + go (item : acc) off' (n - 1) +{-# INLINE get_many #-} + -- txid ------------------------------------------------------------------------ -- | Compute the transaction ID (double SHA256 of legacy serialisation).