tx

Minimal transaction primitives (docs.ppad.tech/tx).
git clone git://git.ppad.tech/tx.git
Log | Files | Refs | README | LICENSE

Tx.hs (16203B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE BangPatterns #-}
      3 {-# LANGUAGE DeriveGeneric #-}
      4 {-# LANGUAGE OverloadedStrings #-}
      5 {-# LANGUAGE RecordWildCards #-}
      6 
      7 -- |
      8 -- Module: Bitcoin.Prim.Tx
      9 -- Copyright: (c) 2025 Jared Tobin
     10 -- License: MIT
     11 -- Maintainer: Jared Tobin <jared@ppad.tech>
     12 --
     13 -- Minimal Bitcoin transaction primitives, including raw transaction
     14 -- types, serialisation to/from bytes, and txid computation.
     15 
     16 module Bitcoin.Prim.Tx (
     17     -- * Transaction Types
     18     Tx(..)
     19   , TxIn(..)
     20   , TxOut(..)
     21   , OutPoint(..)
     22   , Witness(..)
     23   , TxId(..)
     24   , mkTxId
     25 
     26     -- * Serialisation
     27   , to_bytes
     28   , from_bytes
     29   , to_bytes_legacy
     30   , to_base16
     31   , from_base16
     32 
     33     -- * TxId
     34   , txid
     35 
     36     -- * Internal (for Sighash)
     37   , put_word32_le
     38   , put_word64_le
     39   , put_compact
     40   , put_outpoint
     41   , put_txout
     42   , to_strict
     43   ) where
     44 
     45 import Control.DeepSeq (NFData(..))
     46 import qualified Crypto.Hash.SHA256 as SHA256
     47 import Data.Bits ((.|.), shiftL)
     48 import qualified Data.ByteString as BS
     49 import qualified Data.ByteString.Base16 as B16
     50 import qualified Data.ByteString.Builder as BSB
     51 import qualified Data.ByteString.Lazy as BL
     52 import Data.List.NonEmpty (NonEmpty(..))
     53 import qualified Data.List.NonEmpty as NE
     54 import Data.Word (Word32, Word64)
     55 import GHC.Generics (Generic)
     56 
     57 -- | Transaction ID (32 bytes, little-endian double-SHA256).
     58 newtype TxId = TxId BS.ByteString
     59   deriving (Eq, Show, Generic)
     60 
     61 instance NFData TxId
     62 
     63 -- | Construct a TxId from a 32-byte ByteString.
     64 --
     65 --   Returns 'Nothing' if the input is not exactly 32 bytes.
     66 --
     67 --   @
     68 --   mkTxId (BS.replicate 32 0x00) == Just (TxId ...)
     69 --   mkTxId (BS.replicate 31 0x00) == Nothing
     70 --   @
     71 mkTxId :: BS.ByteString -> Maybe TxId
     72 mkTxId bs
     73   | BS.length bs == 32 = Just (TxId bs)
     74   | otherwise          = Nothing
     75 
     76 -- | Transaction outpoint (txid + output index).
     77 data OutPoint = OutPoint
     78   { op_txid  :: {-# UNPACK #-} !TxId
     79   , op_vout  :: {-# UNPACK #-} !Word32
     80   } deriving (Eq, Show, Generic)
     81 
     82 instance NFData OutPoint
     83 
     84 -- | Transaction input.
     85 data TxIn = TxIn
     86   { txin_prevout    :: {-# UNPACK #-} !OutPoint
     87   , txin_script_sig :: !BS.ByteString
     88   , txin_sequence   :: {-# UNPACK #-} !Word32
     89   } deriving (Eq, Show, Generic)
     90 
     91 instance NFData TxIn
     92 
     93 -- | Transaction output.
     94 data TxOut = TxOut
     95   { txout_value         :: {-# UNPACK #-} !Word64  -- ^ satoshis
     96   , txout_script_pubkey :: !BS.ByteString
     97   } deriving (Eq, Show, Generic)
     98 
     99 instance NFData TxOut
    100 
    101 -- | Witness stack for a single input.
    102 newtype Witness = Witness [BS.ByteString]
    103   deriving (Eq, Show, Generic)
    104 
    105 instance NFData Witness
    106 
    107 -- | Complete transaction.
    108 --
    109 --   Bitcoin requires at least one input and one output, enforced here
    110 --   via 'NonEmpty' lists.
    111 data Tx = Tx
    112   { tx_version   :: {-# UNPACK #-} !Word32
    113   , tx_inputs    :: !(NonEmpty TxIn)
    114   , tx_outputs   :: !(NonEmpty TxOut)
    115   , tx_witnesses :: ![Witness]  -- ^ empty list for legacy tx
    116   , tx_locktime  :: {-# UNPACK #-} !Word32
    117   } deriving (Eq, Show, Generic)
    118 
    119 instance NFData Tx
    120 
    121 -- serialisation ---------------------------------------------------------------
    122 
    123 -- | Serialise a transaction to bytes.
    124 --
    125 --   Uses segwit format if witnesses are present, legacy otherwise.
    126 --
    127 --   @
    128 --   -- round-trip
    129 --   from_bytes (to_bytes tx) == Just tx
    130 --   @
    131 to_bytes :: Tx -> BS.ByteString
    132 to_bytes tx@Tx {..}
    133     | null tx_witnesses = to_bytes_legacy tx
    134     | otherwise         = to_strict $
    135            put_word32_le tx_version
    136         <> BSB.word8 0x00  -- marker
    137         <> BSB.word8 0x01  -- flag
    138         <> put_compact (fromIntegral (NE.length tx_inputs))
    139         <> foldMap put_txin tx_inputs
    140         <> put_compact (fromIntegral (NE.length tx_outputs))
    141         <> foldMap put_txout tx_outputs
    142         <> foldMap put_witness tx_witnesses
    143         <> put_word32_le tx_locktime
    144 
    145 -- | Serialise a transaction to legacy format (no witness data).
    146 --
    147 --   Used for txid computation. Excludes witness data even if present.
    148 --
    149 --   @
    150 --   -- for legacy tx (no witnesses), same as to_bytes
    151 --   to_bytes_legacy legacyTx == to_bytes legacyTx
    152 --
    153 --   -- for segwit tx, strips witnesses
    154 --   BS.length (to_bytes_legacy segwitTx) < BS.length (to_bytes segwitTx)
    155 --   @
    156 to_bytes_legacy :: Tx -> BS.ByteString
    157 to_bytes_legacy Tx {..} = to_strict $
    158        put_word32_le tx_version
    159     <> put_compact (fromIntegral (NE.length tx_inputs))
    160     <> foldMap put_txin tx_inputs
    161     <> put_compact (fromIntegral (NE.length tx_outputs))
    162     <> foldMap put_txout tx_outputs
    163     <> put_word32_le tx_locktime
    164 
    165 -- | Serialise a transaction to base16 (hex).
    166 --
    167 --   @
    168 --   to_base16 tx = B16.encode (to_bytes tx)
    169 --   @
    170 to_base16 :: Tx -> BS.ByteString
    171 to_base16 tx = B16.encode (to_bytes tx)
    172 
    173 -- | Parse a transaction from base16 (hex).
    174 --
    175 --   @
    176 --   -- round-trip
    177 --   from_base16 (to_base16 tx) == Just tx
    178 --   @
    179 from_base16 :: BS.ByteString -> Maybe Tx
    180 from_base16 b16 = do
    181   bs <- B16.decode b16
    182   from_bytes bs
    183 
    184 -- internal: builders ----------------------------------------------------------
    185 
    186 -- | Convert a Builder to a strict ByteString.
    187 to_strict :: BSB.Builder -> BS.ByteString
    188 to_strict = BL.toStrict . BSB.toLazyByteString
    189 {-# INLINE to_strict #-}
    190 
    191 -- | Encode a Word32 as little-endian bytes.
    192 put_word32_le :: Word32 -> BSB.Builder
    193 put_word32_le = BSB.word32LE
    194 {-# INLINE put_word32_le #-}
    195 
    196 -- | Encode a Word64 as little-endian bytes.
    197 put_word64_le :: Word64 -> BSB.Builder
    198 put_word64_le = BSB.word64LE
    199 {-# INLINE put_word64_le #-}
    200 
    201 -- | Encode a Word64 as Bitcoin compactSize (varint).
    202 --
    203 --   Encoding:
    204 --   - 0x00-0xfc: 1 byte (value itself)
    205 --   - 0xfd-0xffff: 0xfd ++ 2 bytes LE
    206 --   - 0x10000-0xffffffff: 0xfe ++ 4 bytes LE
    207 --   - larger: 0xff ++ 8 bytes LE
    208 put_compact :: Word64 -> BSB.Builder
    209 put_compact !n
    210     | n <= 0xfc       = BSB.word8 (fromIntegral n)
    211     | n <= 0xffff     = BSB.word8 0xfd <> BSB.word16LE (fromIntegral n)
    212     | n <= 0xffffffff = BSB.word8 0xfe <> BSB.word32LE (fromIntegral n)
    213     | otherwise       = BSB.word8 0xff <> BSB.word64LE n
    214 {-# INLINE put_compact #-}
    215 
    216 -- | Encode an OutPoint (txid + vout).
    217 put_outpoint :: OutPoint -> BSB.Builder
    218 put_outpoint OutPoint {..} =
    219     let !(TxId !txid_bs) = op_txid
    220     in  BSB.byteString txid_bs <> put_word32_le op_vout
    221 {-# INLINE put_outpoint #-}
    222 
    223 -- | Encode a TxIn.
    224 put_txin :: TxIn -> BSB.Builder
    225 put_txin TxIn {..} =
    226        put_outpoint txin_prevout
    227     <> put_compact (fromIntegral (BS.length txin_script_sig))
    228     <> BSB.byteString txin_script_sig
    229     <> put_word32_le txin_sequence
    230 {-# INLINE put_txin #-}
    231 
    232 -- | Encode a TxOut.
    233 put_txout :: TxOut -> BSB.Builder
    234 put_txout TxOut {..} =
    235        put_word64_le txout_value
    236     <> put_compact (fromIntegral (BS.length txout_script_pubkey))
    237     <> BSB.byteString txout_script_pubkey
    238 {-# INLINE put_txout #-}
    239 
    240 -- | Encode a Witness stack.
    241 put_witness :: Witness -> BSB.Builder
    242 put_witness (Witness items) =
    243        put_compact (fromIntegral (length items))
    244     <> foldMap put_witness_item items
    245   where
    246     put_witness_item :: BS.ByteString -> BSB.Builder
    247     put_witness_item !item =
    248            put_compact (fromIntegral (BS.length item))
    249         <> BSB.byteString item
    250 {-# INLINE put_witness #-}
    251 
    252 -- decoding --------------------------------------------------------------------
    253 
    254 -- | Parse a transaction from bytes.
    255 --
    256 --   Automatically detects segwit vs legacy format by checking for
    257 --   marker byte 0x00 followed by flag 0x01 after the version field.
    258 --
    259 --   Returns 'Nothing' on invalid or truncated input.
    260 --
    261 --   @
    262 --   -- round-trip
    263 --   from_bytes (to_bytes tx) == Just tx
    264 --   @
    265 from_bytes :: BS.ByteString -> Maybe Tx
    266 from_bytes !bs = do
    267   -- need at least 4 bytes for version
    268   guard (BS.length bs >= 4)
    269   let !version = get_word32_le bs 0
    270       !off0 = 4
    271   -- check for segwit marker (0x00) and flag (0x01)
    272   if   BS.length bs > off0 + 1
    273     && BS.index bs off0 == 0x00
    274     && BS.index bs (off0 + 1) == 0x01
    275   then parse_segwit bs version (off0 + 2)
    276   else parse_legacy bs version off0
    277 
    278 -- Parse legacy transaction (no witness data)
    279 parse_legacy :: BS.ByteString -> Word32 -> Int -> Maybe Tx
    280 parse_legacy !bs !version !off0 = do
    281   -- input count
    282   (input_count, off1) <- get_compact bs off0
    283   -- inputs (must have at least one)
    284   (inputs_list, off2) <- get_many get_txin bs off1 (fromIntegral input_count)
    285   inputs <- NE.nonEmpty inputs_list
    286   -- output count
    287   (output_count, off3) <- get_compact bs off2
    288   -- outputs (must have at least one)
    289   (outputs_list, off4) <- get_many get_txout bs off3 (fromIntegral output_count)
    290   outputs <- NE.nonEmpty outputs_list
    291   -- locktime (4 bytes)
    292   guard (BS.length bs >= off4 + 4)
    293   let !locktime = get_word32_le bs off4
    294       !off5 = off4 + 4
    295   -- should have consumed all bytes
    296   guard (off5 == BS.length bs)
    297   pure $! Tx version inputs outputs [] locktime
    298 
    299 -- Parse segwit transaction (with witness data)
    300 parse_segwit :: BS.ByteString -> Word32 -> Int -> Maybe Tx
    301 parse_segwit !bs !version !off0 = do
    302   -- input count
    303   (input_count, off1) <- get_compact bs off0
    304   -- inputs (must have at least one)
    305   (inputs_list, off2) <- get_many get_txin bs off1 (fromIntegral input_count)
    306   inputs <- NE.nonEmpty inputs_list
    307   -- output count
    308   (output_count, off3) <- get_compact bs off2
    309   -- outputs (must have at least one)
    310   (outputs_list, off4) <- get_many get_txout bs off3 (fromIntegral output_count)
    311   outputs <- NE.nonEmpty outputs_list
    312   -- witnesses (one per input)
    313   (witnesses, off5) <- get_many get_witness bs off4 (fromIntegral input_count)
    314   -- locktime (4 bytes)
    315   guard (BS.length bs >= off5 + 4)
    316   let !locktime = get_word32_le bs off5
    317       !off6 = off5 + 4
    318   -- should have consumed all bytes
    319   guard (off6 == BS.length bs)
    320   pure $! Tx version inputs outputs witnesses locktime
    321 
    322 -- internal helpers ------------------------------------------------------------
    323 
    324 -- | Guard for Maybe monad.
    325 guard :: Bool -> Maybe ()
    326 guard True  = Just ()
    327 guard False = Nothing
    328 {-# INLINE guard #-}
    329 
    330 -- | Decode a 32-bit little-endian word at the given offset.
    331 --   Does not bounds-check; caller must ensure sufficient bytes.
    332 get_word32_le :: BS.ByteString -> Int -> Word32
    333 get_word32_le !bs !off =
    334   let !b0 = fromIntegral (BS.index bs off) :: Word32
    335       !b1 = fromIntegral (BS.index bs (off + 1)) :: Word32
    336       !b2 = fromIntegral (BS.index bs (off + 2)) :: Word32
    337       !b3 = fromIntegral (BS.index bs (off + 3)) :: Word32
    338   in  b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24)
    339 {-# INLINE get_word32_le #-}
    340 
    341 -- | Decode a 64-bit little-endian word at the given offset.
    342 --   Does not bounds-check; caller must ensure sufficient bytes.
    343 get_word64_le :: BS.ByteString -> Int -> Word64
    344 get_word64_le !bs !off =
    345   let !b0 = fromIntegral (BS.index bs off) :: Word64
    346       !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64
    347       !b2 = fromIntegral (BS.index bs (off + 2)) :: Word64
    348       !b3 = fromIntegral (BS.index bs (off + 3)) :: Word64
    349       !b4 = fromIntegral (BS.index bs (off + 4)) :: Word64
    350       !b5 = fromIntegral (BS.index bs (off + 5)) :: Word64
    351       !b6 = fromIntegral (BS.index bs (off + 6)) :: Word64
    352       !b7 = fromIntegral (BS.index bs (off + 7)) :: Word64
    353   in  b0 .|. (b1 `shiftL` 8) .|. (b2 `shiftL` 16) .|. (b3 `shiftL` 24)
    354           .|. (b4 `shiftL` 32) .|. (b5 `shiftL` 40)
    355           .|. (b6 `shiftL` 48) .|. (b7 `shiftL` 56)
    356 {-# INLINE get_word64_le #-}
    357 
    358 -- | Decode a 16-bit little-endian word at the given offset.
    359 --   Does not bounds-check; caller must ensure sufficient bytes.
    360 get_word16_le :: BS.ByteString -> Int -> Word64
    361 get_word16_le !bs !off =
    362   let !b0 = fromIntegral (BS.index bs off) :: Word64
    363       !b1 = fromIntegral (BS.index bs (off + 1)) :: Word64
    364   in  b0 .|. (b1 `shiftL` 8)
    365 {-# INLINE get_word16_le #-}
    366 
    367 -- | Decode compactSize (Bitcoin's variable-length integer).
    368 --   Returns (value, new_offset).
    369 --   Enforces minimal encoding: rejects non-minimal representations.
    370 get_compact :: BS.ByteString -> Int -> Maybe (Word64, Int)
    371 get_compact !bs !off
    372   | off >= BS.length bs = Nothing
    373   | otherwise = case BS.index bs off of
    374       tag | tag <= 0xfc ->
    375         -- Single byte: value is the tag itself
    376         Just (fromIntegral tag, off + 1)
    377 
    378       0xfd ->
    379         -- 2-byte value follows
    380         if BS.length bs < off + 3
    381         then Nothing
    382         else
    383           let !val = get_word16_le bs (off + 1)
    384           in  if val < 0xfd
    385               then Nothing  -- non-minimal encoding
    386               else Just (val, off + 3)
    387 
    388       0xfe ->
    389         -- 4-byte value follows
    390         if BS.length bs < off + 5
    391         then Nothing
    392         else
    393           let !val = fromIntegral (get_word32_le bs (off + 1)) :: Word64
    394           in  if val <= 0xffff
    395               then Nothing  -- non-minimal encoding
    396               else Just (val, off + 5)
    397 
    398       _ -> -- 0xff
    399         -- 8-byte value follows
    400         if BS.length bs < off + 9
    401         then Nothing
    402         else
    403           let !val = get_word64_le bs (off + 1)
    404           in  if val <= 0xffffffff
    405               then Nothing  -- non-minimal encoding
    406               else Just (val, off + 9)
    407 {-# INLINE get_compact #-}
    408 
    409 -- | Decode an outpoint (txid + vout).
    410 --   Returns (OutPoint, new_offset).
    411 get_outpoint :: BS.ByteString -> Int -> Maybe (OutPoint, Int)
    412 get_outpoint !bs !off
    413   | BS.length bs < off + 36 = Nothing
    414   | otherwise =
    415       let !txid_bytes = BS.take 32 (BS.drop off bs)
    416           !vout = get_word32_le bs (off + 32)
    417       in  Just (OutPoint (TxId txid_bytes) vout, off + 36)
    418 {-# INLINE get_outpoint #-}
    419 
    420 -- | Decode a transaction input.
    421 --   Returns (TxIn, new_offset).
    422 get_txin :: BS.ByteString -> Int -> Maybe (TxIn, Int)
    423 get_txin !bs !off0 = do
    424   -- outpoint: 36 bytes
    425   (outpoint, off1) <- get_outpoint bs off0
    426   -- scriptSig length + bytes
    427   (script_len, off2) <- get_compact bs off1
    428   let !slen = fromIntegral script_len
    429   guard (BS.length bs >= off2 + slen)
    430   let !script_sig = BS.take slen (BS.drop off2 bs)
    431       !off3 = off2 + slen
    432   -- sequence: 4 bytes
    433   guard (BS.length bs >= off3 + 4)
    434   let !seqn = get_word32_le bs off3
    435       !off4 = off3 + 4
    436   pure (TxIn outpoint script_sig seqn, off4)
    437 
    438 -- | Decode a transaction output.
    439 --   Returns (TxOut, new_offset).
    440 get_txout :: BS.ByteString -> Int -> Maybe (TxOut, Int)
    441 get_txout !bs !off0 = do
    442   -- value: 8 bytes
    443   guard (BS.length bs >= off0 + 8)
    444   let !value = get_word64_le bs off0
    445       !off1 = off0 + 8
    446   -- scriptPubKey length + bytes
    447   (script_len, off2) <- get_compact bs off1
    448   let !slen = fromIntegral script_len
    449   guard (BS.length bs >= off2 + slen)
    450   let !script_pk = BS.take slen (BS.drop off2 bs)
    451       !off3 = off2 + slen
    452   pure (TxOut value script_pk, off3)
    453 
    454 -- | Decode a witness stack for one input.
    455 --   Returns (Witness, new_offset).
    456 get_witness :: BS.ByteString -> Int -> Maybe (Witness, Int)
    457 get_witness !bs !off0 = do
    458   -- stack item count
    459   (item_count, off1) <- get_compact bs off0
    460   -- each item: length + bytes
    461   (items, off2) <- get_many get_witness_item bs off1 (fromIntegral item_count)
    462   pure (Witness items, off2)
    463 
    464 -- | Decode a single witness stack item (length-prefixed bytes).
    465 get_witness_item :: BS.ByteString -> Int -> Maybe (BS.ByteString, Int)
    466 get_witness_item !bs !off0 = do
    467   (item_len, off1) <- get_compact bs off0
    468   let !ilen = fromIntegral item_len
    469   guard (BS.length bs >= off1 + ilen)
    470   let !item = BS.take ilen (BS.drop off1 bs)
    471   pure (item, off1 + ilen)
    472 
    473 -- | Decode multiple items using a decoder function.
    474 --   Returns (list of items, new_offset).
    475 get_many :: (BS.ByteString -> Int -> Maybe (a, Int))
    476          -> BS.ByteString -> Int -> Int -> Maybe ([a], Int)
    477 get_many getter !bs = go []
    478   where
    479     go !acc !off !n
    480       | n <= 0    = Just (reverse acc, off)
    481       | otherwise = do
    482           (item, off') <- getter bs off
    483           go (item : acc) off' (n - 1)
    484 {-# INLINE get_many #-}
    485 
    486 -- txid ------------------------------------------------------------------------
    487 
    488 -- | Compute the transaction ID (double SHA256 of legacy serialisation).
    489 --
    490 --   The txid is computed from the legacy serialisation, so segwit
    491 --   transactions have the same txid regardless of witness data.
    492 --
    493 --   @
    494 --   -- Satoshi->Hal tx (block 170)
    495 --   txid satoshiHalTx ==
    496 --     TxId "f4184fc596403b9d638783cf57adfe4c75c605f6356fbc91338530e9831e9e16"
    497 --   @
    498 txid :: Tx -> TxId
    499 txid tx = TxId (SHA256.hash (SHA256.hash (to_bytes_legacy tx)))