commit ecca408f0f28a71cfdccfd40c6e0dd32e84a539c
parent 40198feae33690aaa5d5a9a10c5a8afda01cef89
Author: Jared Tobin <jared@jtobin.io>
Date: Tue, 10 Feb 2026 14:01:15 +0400
feat: implement stack slot and GOT tracking (IMPL5)
Extends taint analysis with provenance-aware auto-suppression:
- TaintState now includes stack slot tracking (tsStack :: IntMap Taint)
- Stores to [sp, #imm] save source register taint to stack map
- Loads from [sp, #imm] restore taint from stack map
- GOT/PLT patterns (ldr from BaseSymbol/Literal) mark result as Public
Reduces Curve.s violations from 146 to 3.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
2 files changed, 159 insertions(+), 28 deletions(-)
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -45,8 +45,13 @@ import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import Data.Text (Text)
--- | Taint state: maps registers to their publicness.
-type TaintState = Map Reg Taint
+-- | Taint state: maps registers to their publicness, plus stack slots.
+data TaintState = TaintState
+ { tsRegs :: !(Map Reg Taint)
+ -- ^ Register taints
+ , tsStack :: !(IntMap Taint)
+ -- ^ Stack slot taints (keyed by SP offset)
+ } deriving (Eq, Show)
-- | GHC 9.10.3 AArch64 public root registers.
--
@@ -73,13 +78,23 @@ publicRoots =
, XZR, WZR -- Zero registers
]
+-- | Empty taint state (no known taints).
+emptyTaintState :: TaintState
+emptyTaintState = TaintState
+ { tsRegs = Map.empty
+ , tsStack = IM.empty
+ }
+
-- | Initial taint state with public roots marked.
initTaintState :: TaintState
-initTaintState = Map.fromList [(r, Public) | r <- publicRoots]
+initTaintState = TaintState
+ { tsRegs = Map.fromList [(r, Public) | r <- publicRoots]
+ , tsStack = IM.empty
+ }
-- | Get the taint of a register.
getTaint :: Reg -> TaintState -> Taint
-getTaint r st = Map.findWithDefault Unknown r st
+getTaint r st = Map.findWithDefault Unknown r (tsRegs st)
-- | Analyze a single line, updating taint state.
analyzeLine :: Line -> TaintState -> TaintState
@@ -147,23 +162,23 @@ transfer instr st = case instr of
Adr dst _ -> setTaint dst Public st
Adrp dst _ -> setTaint dst Public st
- -- Loads: destination becomes Unknown (memory content is unknown)
+ -- Loads: restore from stack slots if [sp, #imm], else Unknown
-- Exception: public roots stay public (GHC spills/restores them)
- Ldr dst _ -> setTaintLoad dst st
- Ldrb dst _ -> setTaintLoad dst st
- Ldrh dst _ -> setTaintLoad dst st
- Ldrsb dst _ -> setTaintLoad dst st
- Ldrsh dst _ -> setTaintLoad dst st
- Ldrsw dst _ -> setTaintLoad dst st
- Ldur dst _ -> setTaintLoad dst st
- Ldp dst1 dst2 _ -> setTaintLoad dst1 (setTaintLoad dst2 st)
-
- -- Stores: no destination register change
- Str _ _ -> st
- Strb _ _ -> st
- Strh _ _ -> st
- Stur _ _ -> st
- Stp _ _ _ -> st
+ Ldr dst addr -> loadFromStack dst addr st
+ Ldrb dst addr -> loadFromStack dst addr st
+ Ldrh dst addr -> loadFromStack dst addr st
+ Ldrsb dst addr -> loadFromStack dst addr st
+ Ldrsh dst addr -> loadFromStack dst addr st
+ Ldrsw dst addr -> loadFromStack dst addr st
+ Ldur dst addr -> loadFromStack dst addr st
+ Ldp dst1 dst2 addr -> loadPairFromStack dst1 dst2 addr st
+
+ -- Stores: track stack slot taints for [sp, #imm] patterns
+ Str src addr -> storeToStack src addr st
+ Strb src addr -> storeToStack src addr st
+ Strh src addr -> storeToStack src addr st
+ Stur src addr -> storeToStack src addr st
+ Stp src1 src2 addr -> storePairToStack src1 src2 addr st
-- Conditionals: conservative join
Cmp _ _ -> st
@@ -196,7 +211,7 @@ transfer instr st = case instr of
-- | Set taint for a register.
setTaint :: Reg -> Taint -> TaintState -> TaintState
-setTaint = Map.insert
+setTaint r t st = st { tsRegs = Map.insert r t (tsRegs st) }
-- | Set taint for a loaded value, preserving public roots.
-- Public roots (SP, X19-X21, X28, etc.) stay public even when loaded
@@ -208,6 +223,66 @@ setTaintLoad dst st
where
isPublicRoot r = r `elem` publicRoots
+-- | Set taint for a loaded value from a known stack slot.
+-- If we have tracked taint at this offset, use it; otherwise Unknown.
+setTaintLoadStack :: Reg -> Int -> TaintState -> TaintState
+setTaintLoadStack dst offset st
+ | isPublicRoot dst = setTaint dst Public st
+ | otherwise =
+ let taint = IM.findWithDefault Unknown offset (tsStack st)
+ in setTaint dst taint st
+ where
+ isPublicRoot r = r `elem` publicRoots
+
+-- | Store taint to a stack slot.
+setStackTaint :: Int -> Taint -> TaintState -> TaintState
+setStackTaint offset t st = st { tsStack = IM.insert offset t (tsStack st) }
+
+-- | Track a store to stack if address is [sp, #imm].
+storeToStack :: Reg -> AddrMode -> TaintState -> TaintState
+storeToStack src addr st = case addr of
+ BaseImm SP imm -> setStackTaint (fromInteger imm) (getTaint src st) st
+ PreIndex SP imm -> setStackTaint (fromInteger imm) (getTaint src st) st
+ _ -> st -- Non-SP stores don't affect stack tracking
+
+-- | Track a store pair to stack if address is [sp, #imm].
+-- Stores src1 at offset and src2 at offset+8.
+storePairToStack :: Reg -> Reg -> AddrMode -> TaintState -> TaintState
+storePairToStack src1 src2 addr st = case addr of
+ BaseImm SP imm ->
+ let off = fromInteger imm
+ in setStackTaint off (getTaint src1 st)
+ (setStackTaint (off + 8) (getTaint src2 st) st)
+ PreIndex SP imm ->
+ let off = fromInteger imm
+ in setStackTaint off (getTaint src1 st)
+ (setStackTaint (off + 8) (getTaint src2 st) st)
+ _ -> st
+
+-- | Load from memory, handling special cases:
+-- - [sp, #imm]: restore tracked stack slot taint
+-- - [r, symbol@GOTPAGEOFF]: GOT entry load, result is Public (address)
+-- - Other: Unknown unless dst is a public root
+loadFromStack :: Reg -> AddrMode -> TaintState -> TaintState
+loadFromStack dst addr st = case addr of
+ BaseImm SP imm -> setTaintLoadStack dst (fromInteger imm) st
+ PostIndex SP imm -> setTaintLoadStack dst (fromInteger imm) st
+ BaseSymbol _ _ -> setTaint dst Public st -- GOT/PLT entry -> address
+ Literal _ -> setTaint dst Public st -- PC-relative literal -> address
+ _ -> setTaintLoad dst st -- Other loads use default behavior
+
+-- | Load pair from stack if address is [sp, #imm].
+-- Loads dst1 from offset and dst2 from offset+8.
+loadPairFromStack :: Reg -> Reg -> AddrMode -> TaintState -> TaintState
+loadPairFromStack dst1 dst2 addr st = case addr of
+ BaseImm SP imm ->
+ let off = fromInteger imm
+ in setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st)
+ PostIndex SP imm ->
+ let off = fromInteger imm
+ in setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st)
+ _ -> setTaintLoad dst1 (setTaintLoad dst2 st)
+
-- | Get taint of an operand.
operandTaint :: Operand -> TaintState -> Taint
operandTaint op st = case op of
@@ -241,7 +316,8 @@ join3 a b c = joinTaint a (joinTaint b c)
-- | Invalidate caller-saved registers after a call.
-- Per AArch64 ABI, x0-x17 are caller-saved.
invalidateCallerSaved :: TaintState -> TaintState
-invalidateCallerSaved st = foldr (\r -> Map.insert r Unknown) st callerSaved
+invalidateCallerSaved st =
+ st { tsRegs = foldr (\r -> Map.insert r Unknown) (tsRegs st) callerSaved }
where
callerSaved =
[ X0, X1, X2, X3, X4, X5, X6, X7
@@ -251,8 +327,12 @@ invalidateCallerSaved st = foldr (\r -> Map.insert r Unknown) st callerSaved
-- | Join two taint states (element-wise join).
-- For registers in both, take the join. For registers in only one, keep.
+-- Stack slots are also joined element-wise.
joinTaintState :: TaintState -> TaintState -> TaintState
-joinTaintState = Map.unionWith joinTaint
+joinTaintState a b = TaintState
+ { tsRegs = Map.unionWith joinTaint (tsRegs a) (tsRegs b)
+ , tsStack = IM.unionWith joinTaint (tsStack a) (tsStack b)
+ }
-- | Run forward dataflow analysis over a CFG.
-- Returns the IN taint state for each block (indexed by block number).
@@ -290,7 +370,7 @@ runDataflow cfg
-> (IntSet, IntMap TaintState)
propagateToSuccs [] _ wl ins = (wl, ins)
propagateToSuccs (s:ss) out wl ins =
- let oldIn = IM.findWithDefault Map.empty s ins
+ let oldIn = IM.findWithDefault emptyTaintState s ins
newIn = joinTaintState oldIn out
changed = oldIn /= newIn
ins' = IM.insert s newIn ins
@@ -307,8 +387,10 @@ newtype FuncSummary = FuncSummary { summaryState :: TaintState }
-- | Initial conservative summary: all caller-saved are Unknown.
initSummary :: FuncSummary
-initSummary = FuncSummary $ Map.fromList
- [ (r, Unknown) | r <- callerSavedRegs ]
+initSummary = FuncSummary $ TaintState
+ { tsRegs = Map.fromList [ (r, Unknown) | r <- callerSavedRegs ]
+ , tsStack = IM.empty
+ }
-- | Caller-saved registers per AArch64 ABI.
callerSavedRegs :: [Reg]
@@ -327,9 +409,10 @@ joinSummary (FuncSummary a) (FuncSummary b) =
-- Replaces caller-saved register taints with the summary's values.
applySummary :: FuncSummary -> TaintState -> TaintState
applySummary (FuncSummary summ) st =
- foldr applyReg st callerSavedRegs
+ st { tsRegs = foldr applyReg (tsRegs st) callerSavedRegs }
where
- applyReg r s = Map.insert r (Map.findWithDefault Unknown r summ) s
+ summRegs = tsRegs summ
+ applyReg r s = Map.insert r (Map.findWithDefault Unknown r summRegs) s
-- | Run dataflow analysis for a single function (subset of blocks).
-- Returns the OUT state at return points (joined).
diff --git a/test/Main.hs b/test/Main.hs
@@ -267,6 +267,54 @@ auditTests = testGroup "Audit" [
case audit "test" src of
Left e -> assertFailure $ "parse failed: " ++ show e
Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "good: stack spill/reload preserves public" $ do
+ -- Public value stored to stack, then reloaded, should stay public
+ let src = T.unlines
+ [ "foo:"
+ , " stp x20, x21, [sp]" -- Store public roots to stack
+ , " ldr x8, [sp]" -- Reload x20's value into x8
+ , " ldr x9, [sp, #8]" -- Reload x21's value into x9
+ , " ldr x0, [x8]" -- Use x8 as base (should be public)
+ , " ldr x1, [x9]" -- Use x9 as base (should be public)
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "bad: stack spill/reload preserves unknown" $ do
+ -- Unknown value stored to stack, then reloaded, should stay unknown
+ let src = T.unlines
+ [ "foo:"
+ , " ldr x8, [x20]" -- x8 = unknown (loaded from heap)
+ , " str x8, [sp, #16]" -- Store unknown to stack slot
+ , " ldr x9, [sp, #16]" -- Reload the unknown value
+ , " ldr x0, [x9]" -- Use as base - violation!
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ UnknownBase X9 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
+
+ , testCase "good: GOT/PLT pattern" $ do
+ -- GOTPAGEOFF pattern: adrp + ldr from GOT should produce public
+ let src = T.unlines
+ [ "foo:"
+ , " adrp x8, _sym@GOTPAGE"
+ , " ldr x8, [x8, _sym@GOTPAGEOFF]" -- GOT entry -> address
+ , " ldr x0, [x8]" -- Use GOT result as base
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
]
-- Inter-procedural tests