commit b8a7cd8dfb9b5cc032fd3ebb6bd1128f6125d51f
parent 6d87c2c5657bdc585bc1032482f1493f7e9ed5a0
Author: Jared Tobin <jared@jtobin.io>
Date: Tue, 10 Feb 2026 13:06:18 +0400
feat: implement inter-block taint propagation
Adds worklist-based forward dataflow analysis over the CFG, eliminating
the intra-block limitation. Taint now flows across basic block
boundaries via fixpoint iteration.
Changes:
- CFG.hs: add blockSuccessors with fallthrough edge computation
- CFG.hs: exclude Bl from successorLabels (calls are interprocedural)
- Taint.hs: add joinTaintState, runDataflow, setTaintLoad
- Taint.hs: add X22 (HpLim) to public roots
- Check.hs: use dataflow IN states instead of fresh initTaintState
- test/Main.hs: add cross-block taint propagation tests
Public roots are now preserved across loads (GHC spills/restores them).
On Curve.s (51k lines): violations reduced from 434 to 146 (~66%).
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
5 files changed, 164 insertions(+), 25 deletions(-)
diff --git a/README.md b/README.md
@@ -27,8 +27,8 @@ $ auditor -i input.s
Sample output:
```
-LBB0_4:42: unknown base register x22
-Lloh5:62: unknown base register x8
+LBB0_4:42: unknown base register x8
+Lloh5:62: unknown base register x9
Lines checked: 202
Memory accesses: 37
@@ -41,12 +41,11 @@ Use `-q` for quiet mode (violations only) or `-j` for JSON output.
This is an early-stage tool with known limitations:
-- **Intra-block analysis only**: Taint doesn't propagate across basic
- block boundaries, leading to false positives at block entry points
- **No inter-procedural analysis**: Function calls reset taint state
+ for caller-saved registers; callee behaviour is not analyzed
- **Conservative**: Unknown taint is treated as potentially secret
-These limitations mean the tool will over-report violations. Manual
+These limitations mean the tool may over-report violations. Manual
review of flagged accesses is required.
## Development
diff --git a/lib/Audit/AArch64/CFG.hs b/lib/Audit/AArch64/CFG.hs
@@ -14,6 +14,7 @@ module Audit.AArch64.CFG (
, CFG(..)
, buildCFG
, blockLabels
+ , blockSuccessors
) where
import Audit.AArch64.Types
@@ -108,12 +109,14 @@ isTerminator instr = case instr of
_ -> False
-- | Extract successor labels from a terminating instruction.
+-- Note: Bl/Blr are NOT included as successors; calls are interprocedural
+-- boundaries. Fallthrough after call is handled by hasFallthrough.
successorLabels :: Maybe Instr -> [Text]
successorLabels Nothing = []
successorLabels (Just instr) = case instr of
B lbl -> [lbl]
- BCond _ lbl -> [lbl] -- Conditional also falls through
- Bl lbl -> [lbl] -- Call returns, so also falls through
+ BCond _ lbl -> [lbl] -- Plus fallthrough
+ Bl _ -> [] -- Call; fallthrough handled separately
Blr _ -> [] -- Indirect call
Br _ -> [] -- Indirect branch
Ret _ -> [] -- Return
@@ -122,3 +125,35 @@ successorLabels (Just instr) = case instr of
Tbz _ _ lbl -> [lbl] -- Plus fallthrough
Tbnz _ _ lbl -> [lbl] -- Plus fallthrough
_ -> []
+
+-- | Get successor block indices for a given block.
+-- Includes both jump targets (resolved via label map) and fallthrough edges.
+blockSuccessors :: CFG -> Int -> [Int]
+blockSuccessors cfg idx
+ | idx < 0 || idx >= length (cfgBlocks cfg) = []
+ | otherwise =
+ let blocks = cfgBlocks cfg
+ bb = blocks !! idx
+ lmap = cfgLabelMap cfg
+ -- Jump targets from labels
+ jumpTargets = [ i | lbl <- bbSuccs bb
+ , Just i <- [Map.lookup lbl lmap] ]
+ -- Fallthrough if this block doesn't end with unconditional transfer
+ fallthrough = if hasFallthrough bb && idx + 1 < length blocks
+ then [idx + 1]
+ else []
+ in jumpTargets ++ fallthrough
+
+-- | Check if a block falls through to the next block.
+hasFallthrough :: BasicBlock -> Bool
+hasFallthrough bb = case lastInstrOf bb of
+ Nothing -> True -- No terminator, falls through
+ Just instr -> case instr of
+ B _ -> False -- Unconditional branch
+ Br _ -> False -- Indirect branch
+ Ret _ -> False -- Return
+ _ -> True -- Conditional branches, calls fall through
+ where
+ lastInstrOf b = case bbLines b of
+ [] -> Nothing
+ ls -> lineInstr (last ls)
diff --git a/lib/Audit/AArch64/Check.hs b/lib/Audit/AArch64/Check.hs
@@ -22,6 +22,7 @@ module Audit.AArch64.Check (
import Audit.AArch64.CFG
import Audit.AArch64.Taint
import Audit.AArch64.Types
+import qualified Data.IntMap.Strict as IM
import Data.Text (Text)
-- | Result of auditing assembly.
@@ -61,14 +62,18 @@ checkBlock sym st0 lns = go mempty st0 lns
st' = analyzeLine l st
in go (acc <> result) st' ls
--- | Check entire CFG.
--- Uses a simple forward pass (no fixpoint iteration for now).
+-- | Check entire CFG with inter-block dataflow.
+-- Runs fixpoint dataflow to propagate taint across blocks.
checkCFG :: Text -> CFG -> AuditResult
-checkCFG sym cfg = mconcat
- [ fst (checkBlock blockSym initTaintState (bbLines bb))
- | bb <- cfgBlocks cfg
- , let blockSym = maybe sym id (bbLabel bb)
- ]
+checkCFG sym cfg =
+ let inStates = runDataflow cfg
+ blocks = cfgBlocks cfg
+ in mconcat
+ [ fst (checkBlock blockSym inState (bbLines bb))
+ | (idx, bb) <- zip [0..] blocks
+ , let blockSym = maybe sym id (bbLabel bb)
+ inState = IM.findWithDefault initTaintState idx inStates
+ ]
-- | Extract memory address from a load/store instruction.
getMemoryAccess :: Instr -> Maybe AddrMode
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -21,9 +21,16 @@ module Audit.AArch64.Taint (
, analyzeBlock
, getTaint
, publicRoots
+ , joinTaintState
+ , runDataflow
) where
+import Audit.AArch64.CFG
import Audit.AArch64.Types
+import Data.IntMap.Strict (IntMap)
+import qualified Data.IntMap.Strict as IM
+import Data.IntSet (IntSet)
+import qualified Data.IntSet as IS
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
@@ -46,10 +53,11 @@ type TaintState = Map Reg Taint
publicRoots :: [Reg]
publicRoots =
[ SP, X29 -- Hardware stack/frame pointers
- , X19 -- GHC Base
+ , X19 -- GHC BaseReg (capabilities/TSO pointer)
, X20 -- GHC Sp (STG stack pointer)
, X21 -- GHC Hp (heap pointer)
- , X28 -- GHC SpLim
+ , X22 -- GHC HpLim (heap limit)
+ , X28 -- GHC SpLim (stack limit)
, X18 -- TLS (Darwin platform register)
, XZR, WZR -- Zero registers
]
@@ -129,15 +137,15 @@ transfer instr st = case instr of
Adrp dst _ -> setTaint dst Public st
-- Loads: destination becomes Unknown (memory content is unknown)
- -- This is conservative; could refine if we track memory regions
- Ldr dst _ -> setTaint dst Unknown st
- Ldrb dst _ -> setTaint dst Unknown st
- Ldrh dst _ -> setTaint dst Unknown st
- Ldrsb dst _ -> setTaint dst Unknown st
- Ldrsh dst _ -> setTaint dst Unknown st
- Ldrsw dst _ -> setTaint dst Unknown st
- Ldur dst _ -> setTaint dst Unknown st
- Ldp dst1 dst2 _ -> setTaint dst1 Unknown (setTaint dst2 Unknown st)
+ -- Exception: public roots stay public (GHC spills/restores them)
+ Ldr dst _ -> setTaintLoad dst st
+ Ldrb dst _ -> setTaintLoad dst st
+ Ldrh dst _ -> setTaintLoad dst st
+ Ldrsb dst _ -> setTaintLoad dst st
+ Ldrsh dst _ -> setTaintLoad dst st
+ Ldrsw dst _ -> setTaintLoad dst st
+ Ldur dst _ -> setTaintLoad dst st
+ Ldp dst1 dst2 _ -> setTaintLoad dst1 (setTaintLoad dst2 st)
-- Stores: no destination register change
Str _ _ -> st
@@ -179,6 +187,16 @@ transfer instr st = case instr of
setTaint :: Reg -> Taint -> TaintState -> TaintState
setTaint = Map.insert
+-- | Set taint for a loaded value, preserving public roots.
+-- Public roots (SP, X19-X21, X28, etc.) stay public even when loaded
+-- since GHC spills/restores them from the hardware stack.
+setTaintLoad :: Reg -> TaintState -> TaintState
+setTaintLoad dst st
+ | isPublicRoot dst = setTaint dst Public st
+ | otherwise = setTaint dst Unknown st
+ where
+ isPublicRoot r = r `elem` publicRoots
+
-- | Get taint of an operand.
operandTaint :: Operand -> TaintState -> Taint
operandTaint op st = case op of
@@ -219,3 +237,51 @@ invalidateCallerSaved st = foldr (\r -> Map.insert r Unknown) st callerSaved
, X8, X9, X10, X11, X12, X13, X14, X15
, X16, X17
]
+
+-- | Join two taint states (element-wise join).
+-- For registers in both, take the join. For registers in only one, keep.
+joinTaintState :: TaintState -> TaintState -> TaintState
+joinTaintState = Map.unionWith joinTaint
+
+-- | Run forward dataflow analysis over a CFG.
+-- Returns the IN taint state for each block (indexed by block number).
+runDataflow :: CFG -> IntMap TaintState
+runDataflow cfg
+ | null (cfgBlocks cfg) = IM.empty
+ | otherwise = go initWorklist initIn initOut
+ where
+ blocks = cfgBlocks cfg
+ nBlocks = length blocks
+
+ -- Initial states: all blocks start with public roots (GHC invariant)
+ initIn = IM.fromList [(i, initTaintState) | i <- [0..nBlocks-1]]
+ initOut = IM.empty
+ initWorklist = IS.fromList [0..nBlocks-1]
+
+ go :: IntSet -> IntMap TaintState -> IntMap TaintState -> IntMap TaintState
+ go worklist inStates outStates
+ | IS.null worklist = inStates
+ | otherwise =
+ let (idx, worklist') = IS.deleteFindMin worklist
+ bb = blocks !! idx
+ inState = IM.findWithDefault initTaintState idx inStates
+ outState = analyzeBlock (bbLines bb) inState
+ oldOut = IM.lookup idx outStates
+ changed = oldOut /= Just outState
+ outStates' = IM.insert idx outState outStates
+ succs = blockSuccessors cfg idx
+ (worklist'', inStates') = if changed
+ then propagateToSuccs succs outState worklist' inStates
+ else (worklist', inStates)
+ in go worklist'' inStates' outStates'
+
+ propagateToSuccs :: [Int] -> TaintState -> IntSet -> IntMap TaintState
+ -> (IntSet, IntMap TaintState)
+ propagateToSuccs [] _ wl ins = (wl, ins)
+ propagateToSuccs (s:ss) out wl ins =
+ let oldIn = IM.findWithDefault Map.empty s ins
+ newIn = joinTaintState oldIn out
+ changed = oldIn /= newIn
+ ins' = IM.insert s newIn ins
+ wl' = if changed then IS.insert s wl else wl
+ in propagateToSuccs ss out wl' ins'
diff --git a/test/Main.hs b/test/Main.hs
@@ -177,4 +177,38 @@ auditTests = testGroup "Audit" [
case audit "test" src of
Left e -> assertFailure $ "parse failed: " ++ show e
Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "good: cross-block public taint" $ do
+ -- x8 is made public in block A, used in block B
+ let src = T.unlines
+ [ "blockA:"
+ , " adrp x8, _const@PAGE"
+ , " cbz x0, blockB"
+ , "blockB:"
+ , " ldr x1, [x8]" -- x8 should be known public from block A
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "bad: cross-block unknown taint" $ do
+ -- x8 is loaded (unknown) in block A, used as base in block B
+ let src = T.unlines
+ [ "blockA:"
+ , " ldr x8, [x20]" -- x8 becomes unknown
+ , " cbz x0, blockB"
+ , "blockB:"
+ , " ldr x1, [x8]" -- x8 as base should trigger violation
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ UnknownBase X8 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
]