tx

Minimal Bitcoin transaction primitives (docs.ppad.tech/tx).
git clone git://git.ppad.tech/tx.git
Log | Files | Refs | README | LICENSE

commit e401de2b86f7470eedfbf537a6cbcb57614e1b20
parent 26bc887b09a314b71d6cb0d3831e9f9486ae4552
Author: Jared Tobin <jared@jtobin.io>
Date:   Sun, 25 Jan 2026 17:53:40 +0400

Merge impl/decoding: transaction parsing

Diffstat:
Mlib/Bitcoin/Prim/Tx.hs | 228+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Aplans/IMPL1.md | 208+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 432 insertions(+), 4 deletions(-)

diff --git a/lib/Bitcoin/Prim/Tx.hs b/lib/Bitcoin/Prim/Tx.hs @@ -33,6 +33,7 @@ module Bitcoin.Prim.Tx ( , txid ) where +import Data.Bits ((.|.), shiftL) import qualified Data.ByteString as BS import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString.Builder as BSB @@ -95,10 +96,6 @@ to_bytes tx@Tx {..} <> foldMap put_witness tx_witnesses <> put_word32_le tx_locktime --- | Parse a transaction from bytes. -from_bytes :: BS.ByteString -> Maybe Tx -from_bytes = error "Bitcoin.Prim.Tx.from_bytes: not yet implemented" - -- | Serialise a transaction to legacy format (no witness data). -- -- Used for txid computation. @@ -189,6 +186,229 @@ put_witness (Witness items) = <> BSB.byteString item {-# INLINE put_witness #-} +-- decoding -------------------------------------------------------------------- + +-- | Parse a transaction from bytes. +-- +-- Automatically detects segwit vs legacy format by checking for +-- marker byte 0x00 followed by flag 0x01 after the version field. +from_bytes :: BS.ByteString -> Maybe Tx +from_bytes !bs = do + -- need at least 4 bytes for version + guard (BS.length bs >= 4) + let !version = get_word32_le bs 0 + !off0 = 4 + -- check for segwit marker (0x00) and flag (0x01) + if BS.length bs > off0 + 1 + && BS.index bs off0 == 0x00 + && BS.index bs (off0 + 1) == 0x01 + then parse_segwit bs version (off0 + 2) + else parse_legacy bs version off0 + +-- Parse legacy transaction (no witness data) +parse_legacy :: BS.ByteString -> Word32 -> Int -> Maybe Tx +parse_legacy !bs !version !off0 = do + -- input count + (input_count, off1) <- get_compact bs off0 + -- inputs + (inputs, off2) <- get_many get_txin bs off1 (fromIntegral input_count) + -- output count + (output_count, off3) <- get_compact bs off2 + -- outputs + (outputs, off4) <- get_many get_txout bs off3 (fromIntegral output_count) + -- locktime (4 bytes) + guard (BS.length bs >= off4 + 4) + let !locktime = get_word32_le bs off4 + !off5 = off4 + 4 + -- should have consumed all bytes + guard (off5 == BS.length bs) + pure $! Tx version inputs outputs [] locktime + +-- Parse segwit transaction (with witness data) +parse_segwit :: BS.ByteString -> Word32 -> Int -> Maybe Tx +parse_segwit !bs !version !off0 = do + -- input count + (input_count, off1) <- get_compact bs off0 + -- inputs + (inputs, off2) <- get_many get_txin bs off1 (fromIntegral input_count) + -- output count + (output_count, off3) <- get_compact bs off2 + -- outputs + (outputs, off4) <- get_many get_txout bs off3 (fromIntegral output_count) + -- witnesses (one per input) + (witnesses, off5) <- get_many get_witness bs off4 (fromIntegral input_count) + -- locktime (4 bytes) + guard (BS.length bs >= off5 + 4) + let !locktime = get_word32_le bs off5 + !off6 = off5 + 4 + -- should have consumed all bytes + guard (off6 == BS.length bs) + pure $! Tx version inputs outputs witnesses locktime + +-- internal helpers ------------------------------------------------------------ + +-- | Guard for Maybe monad. +guard :: Bool -> Maybe () +guard True = Just () +guard False = Nothing +{-# INLINE guard #-} + +-- | Decode a 32-bit little-endian word at the given offset. +-- Does not bounds-check; caller must ensure sufficient bytes. +get_word32_le :: BS.ByteString -> Int -> Word32 +get_word32_le !bs !off = + let !b0 = fromIntegral (BS.index bs off) :: Word32 + !b1 = fromIntegral (BS.index bs (off + 1)) :: Word32 + !b2 = fromIntegral (BS.index bs (off + 2)) :: Word32 + !b3 = fromIntegral (BS.index bs (off + 3)) :: Word32 + in b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24) +{-# INLINE get_word32_le #-} + +-- | Decode a 64-bit little-endian word at the given offset. +-- Does not bounds-check; caller must ensure sufficient bytes. +get_word64_le :: BS.ByteString -> Int -> Word64 +get_word64_le !bs !off = + let !b0 = fromIntegral (BS.index bs off) :: Word64 + !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64 + !b2 = fromIntegral (BS.index bs (off + 2)) :: Word64 + !b3 = fromIntegral (BS.index bs (off + 3)) :: Word64 + !b4 = fromIntegral (BS.index bs (off + 4)) :: Word64 + !b5 = fromIntegral (BS.index bs (off + 5)) :: Word64 + !b6 = fromIntegral (BS.index bs (off + 6)) :: Word64 + !b7 = fromIntegral (BS.index bs (off + 7)) :: Word64 + in b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24) + .|. (b4 `shiftL` 32) .|. (b5 `shiftL` 40) + .|. (b6 `shiftL` 48) .|. (b7 `shiftL` 56) +{-# INLINE get_word64_le #-} + +-- | Decode a 16-bit little-endian word at the given offset. +-- Does not bounds-check; caller must ensure sufficient bytes. +get_word16_le :: BS.ByteString -> Int -> Word64 +get_word16_le !bs !off = + let !b0 = fromIntegral (BS.index bs off) :: Word64 + !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64 + in b0 .|. (b1 `shiftL` 8) +{-# INLINE get_word16_le #-} + +-- | Decode compactSize (Bitcoin's variable-length integer). +-- Returns (value, new_offset). +-- Enforces minimal encoding: rejects non-minimal representations. +get_compact :: BS.ByteString -> Int -> Maybe (Word64, Int) +get_compact !bs !off + | off >= BS.length bs = Nothing + | otherwise = case BS.index bs off of + tag | tag <= 0xfc -> + -- Single byte: value is the tag itself + Just (fromIntegral tag, off + 1) + + 0xfd -> + -- 2-byte value follows + if BS.length bs < off + 3 + then Nothing + else + let !val = get_word16_le bs (off + 1) + in if val < 0xfd + then Nothing -- non-minimal encoding + else Just (val, off + 3) + + 0xfe -> + -- 4-byte value follows + if BS.length bs < off + 5 + then Nothing + else + let !val = fromIntegral (get_word32_le bs (off + 1)) :: Word64 + in if val <= 0xffff + then Nothing -- non-minimal encoding + else Just (val, off + 5) + + _ -> -- 0xff + -- 8-byte value follows + if BS.length bs < off + 9 + then Nothing + else + let !val = get_word64_le bs (off + 1) + in if val <= 0xffffffff + then Nothing -- non-minimal encoding + else Just (val, off + 9) +{-# INLINE get_compact #-} + +-- | Decode an outpoint (txid + vout). +-- Returns (OutPoint, new_offset). +get_outpoint :: BS.ByteString -> Int -> Maybe (OutPoint, Int) +get_outpoint !bs !off + | BS.length bs < off + 36 = Nothing + | otherwise = + let !txid_bytes = BS.take 32 (BS.drop off bs) + !vout = get_word32_le bs (off + 32) + in Just (OutPoint (TxId txid_bytes) vout, off + 36) +{-# INLINE get_outpoint #-} + +-- | Decode a transaction input. +-- Returns (TxIn, new_offset). +get_txin :: BS.ByteString -> Int -> Maybe (TxIn, Int) +get_txin !bs !off0 = do + -- outpoint: 36 bytes + (outpoint, off1) <- get_outpoint bs off0 + -- scriptSig length + bytes + (script_len, off2) <- get_compact bs off1 + let !slen = fromIntegral script_len + guard (BS.length bs >= off2 + slen) + let !script_sig = BS.take slen (BS.drop off2 bs) + !off3 = off2 + slen + -- sequence: 4 bytes + guard (BS.length bs >= off3 + 4) + let !seqn = get_word32_le bs off3 + !off4 = off3 + 4 + pure (TxIn outpoint script_sig seqn, off4) + +-- | Decode a transaction output. +-- Returns (TxOut, new_offset). +get_txout :: BS.ByteString -> Int -> Maybe (TxOut, Int) +get_txout !bs !off0 = do + -- value: 8 bytes + guard (BS.length bs >= off0 + 8) + let !value = get_word64_le bs off0 + !off1 = off0 + 8 + -- scriptPubKey length + bytes + (script_len, off2) <- get_compact bs off1 + let !slen = fromIntegral script_len + guard (BS.length bs >= off2 + slen) + let !script_pk = BS.take slen (BS.drop off2 bs) + !off3 = off2 + slen + pure (TxOut value script_pk, off3) + +-- | Decode a witness stack for one input. +-- Returns (Witness, new_offset). +get_witness :: BS.ByteString -> Int -> Maybe (Witness, Int) +get_witness !bs !off0 = do + -- stack item count + (item_count, off1) <- get_compact bs off0 + -- each item: length + bytes + (items, off2) <- get_many get_witness_item bs off1 (fromIntegral item_count) + pure (Witness items, off2) + +-- | Decode a single witness stack item (length-prefixed bytes). +get_witness_item :: BS.ByteString -> Int -> Maybe (BS.ByteString, Int) +get_witness_item !bs !off0 = do + (item_len, off1) <- get_compact bs off0 + let !ilen = fromIntegral item_len + guard (BS.length bs >= off1 + ilen) + let !item = BS.take ilen (BS.drop off1 bs) + pure (item, off1 + ilen) + +-- | Decode multiple items using a decoder function. +-- Returns (list of items, new_offset). +get_many :: (BS.ByteString -> Int -> Maybe (a, Int)) + -> BS.ByteString -> Int -> Int -> Maybe ([a], Int) +get_many getter !bs = go [] + where + go !acc !off !n + | n <= 0 = Just (reverse acc, off) + | otherwise = do + (item, off') <- getter bs off + go (item : acc) off' (n - 1) +{-# INLINE get_many #-} + -- txid ------------------------------------------------------------------------ -- | Compute the transaction ID (double SHA256 of legacy serialisation). diff --git a/plans/IMPL1.md b/plans/IMPL1.md @@ -0,0 +1,208 @@ +# IMPL1 - Core Types, Serialisation, and TxId + +## Goal + +Implement core transaction types, binary serialisation (legacy and segwit +formats), and txid computation. + +## Scope + +- `Bitcoin.Prim.Tx` module: types and serialisation +- CompactSize (varint) encoding/decoding +- Legacy and segwit tx formats +- TxId computation via double SHA256 + +## Types + +Types are already defined in skeleton. Key points: + +- `TxId`: 32-byte ByteString (stored as-is, displayed reversed per convention) +- `OutPoint`: TxId + Word32 vout +- `TxIn`: OutPoint + scriptSig + sequence +- `TxOut`: Word64 value + scriptPubKey +- `Witness`: list of stack items (ByteStrings) +- `Tx`: version + inputs + outputs + witnesses + locktime + +## CompactSize Encoding + +Internal helpers for Bitcoin's variable-length integer format: + +```haskell +-- | Encode a Word64 as compactSize. +put_compact :: Word64 -> BS.ByteString + +-- | Decode compactSize, returning (value, bytes_consumed). +get_compact :: BS.ByteString -> Maybe (Word64, Int) +``` + +Encoding rules: +- 0x00-0xfc: 1 byte (value itself) +- 0xfd-0xffff: 0xfd ++ 2 bytes LE +- 0x10000-0xffffffff: 0xfe ++ 4 bytes LE +- larger: 0xff ++ 8 bytes LE + +## Serialisation Implementation + +### Encoding (to_bytes) + +Build output via `Data.ByteString.Builder` or direct unsafe writes: + +``` +to_bytes tx: + if has_witnesses tx: + put_word32_le version + put_byte 0x00 -- marker + put_byte 0x01 -- flag + put_compact (length inputs) + for each input: put_txin + put_compact (length outputs) + for each output: put_txout + for each witness: put_witness + put_word32_le locktime + else: + put_word32_le version + put_compact (length inputs) + for each input: put_txin + put_compact (length outputs) + for each output: put_txout + put_word32_le locktime +``` + +Component encoders: +```haskell +put_txin :: TxIn -> Builder + -- outpoint (32 + 4 bytes) + scriptSig (compact + bytes) + sequence (4) + +put_txout :: TxOut -> Builder + -- value (8 bytes LE) + scriptPubKey (compact + bytes) + +put_witness :: Witness -> Builder + -- compact count + for each item: compact len + bytes +``` + +### Decoding (from_bytes) + +Parse with explicit offset tracking or a simple parser state: + +``` +from_bytes bs: + version <- get_word32_le + peek next byte: + if 0x00 and following byte is 0x01: + skip marker/flag + parse as segwit + else: + parse as legacy + + -- segwit parse: + input_count <- get_compact + inputs <- replicateM input_count get_txin + output_count <- get_compact + outputs <- replicateM output_count get_txout + witnesses <- replicateM input_count get_witness + locktime <- get_word32_le + + -- legacy parse: + input_count <- get_compact + inputs <- replicateM input_count get_txin + output_count <- get_compact + outputs <- replicateM output_count get_txout + locktime <- get_word32_le + witnesses = [] +``` + +Component decoders: +```haskell +get_txin :: Parser TxIn +get_txout :: Parser TxOut +get_witness :: Parser Witness +``` + +### Legacy Serialisation + +```haskell +to_bytes_legacy :: Tx -> BS.ByteString + -- Always legacy format (no marker/flag/witnesses) + -- Used for txid computation +``` + +## TxId Computation + +```haskell +txid :: Tx -> TxId +txid tx = TxId (SHA256.hash (SHA256.hash (to_bytes_legacy tx))) +``` + +The result is the raw 32-byte hash. Display convention (reversed hex) is +separate from storage. + +## Internal Helpers + +Little-endian word encoding/decoding: + +```haskell +put_word32_le :: Word32 -> Builder +put_word64_le :: Word64 -> Builder +get_word32_le :: BS.ByteString -> Int -> Maybe Word32 +get_word64_le :: BS.ByteString -> Int -> Maybe Word64 +``` + +Use `Data.Bits` shifts or `Foreign.Storable` with explicit byte order. + +## Work Items + +### Phase 1: Encoding (independent) + +1. Implement `put_compact` (compactSize encoding) +2. Implement `put_word32_le`, `put_word64_le` +3. Implement `put_txin`, `put_txout`, `put_witness` +4. Implement `to_bytes` and `to_bytes_legacy` + +### Phase 2: Decoding (independent of Phase 1) + +1. Implement `get_compact` (compactSize decoding) +2. Implement `get_word32_le`, `get_word64_le` +3. Implement `get_txin`, `get_txout`, `get_witness` +4. Implement `from_bytes` with format detection + +### Phase 3: TxId (depends on Phase 1) + +1. Implement `txid` using ppad-sha256 + +### Phase 4: Base16 wrappers + +1. `to_base16` wraps `to_bytes` with B16.encode +2. `from_base16` decodes hex then calls `from_bytes` + +## Tests + +- Round-trip: `from_bytes (to_bytes tx) == Just tx` +- Known vectors: parse real Bitcoin transactions, verify txid +- Edge cases: empty inputs/outputs, max-size compactSize values +- Legacy vs segwit format detection + +## Test Vectors + +### Simple legacy tx (1 input, 1 output) + +Use a known mainnet transaction, e.g., the pizza transaction or a +simple testnet tx with known txid. + +### Segwit tx (P2WPKH) + +Parse a native segwit transaction, verify witnesses preserved, verify +txid matches (should exclude witnesses). + +### Sources + +- BIP143 test vectors (have full tx hex + expected sighash) +- Bitcoin Core tx_valid.json +- Manually hex-dump transactions from block explorers + +## Notes + +- All integers are little-endian except where noted +- TxId is stored in natural byte order (not display order) +- Witnesses list length must equal inputs list length for segwit +- Empty witness list indicates legacy transaction +- CompactSize must use minimal encoding (enforced on decode)