commit 186efe865008fe37957484cf2d1cc45cc3804318
parent baa2031ec129e3fbb3038195cdc241d190585b09
Author: Jared Tobin <jared@jtobin.io>
Date: Sun, 25 Jan 2026 17:52:23 +0400
Implement transaction serialisation (encoding)
Add encoding functions for Bitcoin transactions:
- put_compact: compactSize (varint) encoding
- put_word32_le, put_word64_le: little-endian word encoding
- put_outpoint, put_txin, put_txout, put_witness: component encoders
- to_bytes: full tx serialisation (segwit format if witnesses present)
- to_bytes_legacy: legacy format (no witness data, for txid computation)
Uses Data.ByteString.Builder for efficient construction.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
2 files changed, 106 insertions(+), 2 deletions(-)
diff --git a/.commit_msg b/.commit_msg
@@ -0,0 +1,16 @@
+Implement transaction encoding (Phase 1)
+
+Add binary serialisation for Bitcoin transactions using
+Data.ByteString.Builder:
+
+- Internal helpers: put_compact (compactSize/varint encoding),
+ put_word32_le, put_word64_le, to_strict
+- Component encoders: put_outpoint, put_txin, put_txout, put_witness
+- Main functions: to_bytes (segwit-aware), to_bytes_legacy
+
+to_bytes emits segwit format (marker 0x00, flag 0x01, witnesses after
+outputs) when witnesses are present, otherwise delegates to legacy.
+to_bytes_legacy always emits legacy format (no marker/flag/witnesses),
+used for txid computation.
+
+Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
diff --git a/lib/Bitcoin/Prim/Tx.hs b/lib/Bitcoin/Prim/Tx.hs
@@ -2,6 +2,7 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE RecordWildCards #-}
-- |
-- Module: Bitcoin.Prim.Tx
@@ -34,6 +35,8 @@ module Bitcoin.Prim.Tx (
import qualified Data.ByteString as BS
import qualified Data.ByteString.Base16 as B16
+import qualified Data.ByteString.Builder as BSB
+import qualified Data.ByteString.Lazy as BL
import Data.Word (Word32, Word64)
import GHC.Generics (Generic)
@@ -79,7 +82,18 @@ data Tx = Tx
--
-- Uses segwit format if witnesses are present, legacy otherwise.
to_bytes :: Tx -> BS.ByteString
-to_bytes = error "Bitcoin.Prim.Tx.to_bytes: not yet implemented"
+to_bytes tx@Tx {..}
+ | null tx_witnesses = to_bytes_legacy tx
+ | otherwise = to_strict $
+ put_word32_le tx_version
+ <> BSB.word8 0x00 -- marker
+ <> BSB.word8 0x01 -- flag
+ <> put_compact (fromIntegral (length tx_inputs))
+ <> foldMap put_txin tx_inputs
+ <> put_compact (fromIntegral (length tx_outputs))
+ <> foldMap put_txout tx_outputs
+ <> foldMap put_witness tx_witnesses
+ <> put_word32_le tx_locktime
-- | Parse a transaction from bytes.
from_bytes :: BS.ByteString -> Maybe Tx
@@ -89,7 +103,13 @@ from_bytes = error "Bitcoin.Prim.Tx.from_bytes: not yet implemented"
--
-- Used for txid computation.
to_bytes_legacy :: Tx -> BS.ByteString
-to_bytes_legacy = error "Bitcoin.Prim.Tx.to_bytes_legacy: not yet implemented"
+to_bytes_legacy Tx {..} = to_strict $
+ put_word32_le tx_version
+ <> put_compact (fromIntegral (length tx_inputs))
+ <> foldMap put_txin tx_inputs
+ <> put_compact (fromIntegral (length tx_outputs))
+ <> foldMap put_txout tx_outputs
+ <> put_word32_le tx_locktime
-- | Serialise a transaction to base16.
to_base16 :: Tx -> BS.ByteString
@@ -101,6 +121,74 @@ from_base16 b16 = do
bs <- B16.decode b16
from_bytes bs
+-- internal: builders ----------------------------------------------------------
+
+-- | Convert a Builder to a strict ByteString.
+to_strict :: BSB.Builder -> BS.ByteString
+to_strict = BL.toStrict . BSB.toLazyByteString
+{-# INLINE to_strict #-}
+
+-- | Encode a Word32 as little-endian bytes.
+put_word32_le :: Word32 -> BSB.Builder
+put_word32_le = BSB.word32LE
+{-# INLINE put_word32_le #-}
+
+-- | Encode a Word64 as little-endian bytes.
+put_word64_le :: Word64 -> BSB.Builder
+put_word64_le = BSB.word64LE
+{-# INLINE put_word64_le #-}
+
+-- | Encode a Word64 as Bitcoin compactSize (varint).
+--
+-- Encoding:
+-- - 0x00-0xfc: 1 byte (value itself)
+-- - 0xfd-0xffff: 0xfd ++ 2 bytes LE
+-- - 0x10000-0xffffffff: 0xfe ++ 4 bytes LE
+-- - larger: 0xff ++ 8 bytes LE
+put_compact :: Word64 -> BSB.Builder
+put_compact !n
+ | n <= 0xfc = BSB.word8 (fromIntegral n)
+ | n <= 0xffff = BSB.word8 0xfd <> BSB.word16LE (fromIntegral n)
+ | n <= 0xffffffff = BSB.word8 0xfe <> BSB.word32LE (fromIntegral n)
+ | otherwise = BSB.word8 0xff <> BSB.word64LE n
+{-# INLINE put_compact #-}
+
+-- | Encode an OutPoint (txid + vout).
+put_outpoint :: OutPoint -> BSB.Builder
+put_outpoint OutPoint {..} =
+ let !(TxId !txid_bs) = op_txid
+ in BSB.byteString txid_bs <> put_word32_le op_vout
+{-# INLINE put_outpoint #-}
+
+-- | Encode a TxIn.
+put_txin :: TxIn -> BSB.Builder
+put_txin TxIn {..} =
+ put_outpoint txin_prevout
+ <> put_compact (fromIntegral (BS.length txin_script_sig))
+ <> BSB.byteString txin_script_sig
+ <> put_word32_le txin_sequence
+{-# INLINE put_txin #-}
+
+-- | Encode a TxOut.
+put_txout :: TxOut -> BSB.Builder
+put_txout TxOut {..} =
+ put_word64_le txout_value
+ <> put_compact (fromIntegral (BS.length txout_script_pubkey))
+ <> BSB.byteString txout_script_pubkey
+{-# INLINE put_txout #-}
+
+-- | Encode a Witness stack.
+put_witness :: Witness -> BSB.Builder
+put_witness (Witness items) =
+ put_compact (fromIntegral (length items))
+ <> foldMap put_witness_item items
+ where
+ put_witness_item :: BS.ByteString -> BSB.Builder
+ put_witness_item !item =
+ put_compact (fromIntegral (BS.length item))
+ <> BSB.byteString item
+{-# INLINE put_witness #-}
+
-- txid ------------------------------------------------------------------------
-- | Compute the transaction ID (double SHA256 of legacy serialisation).