commit 510af37cc5ebd973a2c9346a62a07f37722d0c6f
parent b2f6ae41c37e0ef447e3f74ea3a0aa24163d581d
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 21:31:56 +0400
feat: track STG stack slots (x20-relative) for taint propagation (IMPL16)
GHC passes arguments via the STG stack (x20), not the hardware stack
(sp). Extend stack slot tracking to recognize [x20, #imm] patterns,
enabling taint propagation through worker code.
Changes:
- Add tsStgStack, tsStgStackProv, tsStgStackKind fields to TaintState
- Update storeToStack/storePairToStack to handle X20-relative addresses
- Update loadFromStack/loadPairFromStack to handle X20-relative addresses
including pre-indexed loads (common in GHC output)
- Clear STG stack maps when X20 is modified (same as SP handling)
- Update joinTaintState to merge STG stack maps
- Add tests for STG stack spill/reload preserving taint and kind
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
4 files changed, 293 insertions(+), 43 deletions(-)
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -65,19 +65,26 @@ import Data.Text (Text)
-- | Taint state: maps registers to their publicness, plus stack slots.
-- Also tracks provenance for upgrading Unknown to Public when provable,
-- and register kinds (pointer vs scalar) for safe provenance upgrades.
+-- Tracks both hardware stack (SP) and STG stack (X20) slots separately.
data TaintState = TaintState
- { tsRegs :: !(Map Reg Taint)
+ { tsRegs :: !(Map Reg Taint)
-- ^ Register taints
- , tsStack :: !(IntMap Taint)
+ , tsStack :: !(IntMap Taint)
-- ^ Stack slot taints (keyed by SP offset)
- , tsProv :: !(Map Reg Provenance)
+ , tsProv :: !(Map Reg Provenance)
-- ^ Register provenance
- , tsStackProv :: !(IntMap Provenance)
+ , tsStackProv :: !(IntMap Provenance)
-- ^ Stack slot provenance (keyed by SP offset)
- , tsKind :: !(Map Reg RegKind)
+ , tsKind :: !(Map Reg RegKind)
-- ^ Register kinds (pointer vs scalar)
- , tsStackKind :: !(IntMap RegKind)
+ , tsStackKind :: !(IntMap RegKind)
-- ^ Stack slot kinds (keyed by SP offset)
+ , tsStgStack :: !(IntMap Taint)
+ -- ^ STG stack slot taints (keyed by X20 offset)
+ , tsStgStackProv :: !(IntMap Provenance)
+ -- ^ STG stack slot provenance (keyed by X20 offset)
+ , tsStgStackKind :: !(IntMap RegKind)
+ -- ^ STG stack slot kinds (keyed by X20 offset)
} deriving (Eq, Show)
-- | GHC 9.10.3 AArch64 public root registers.
@@ -108,24 +115,30 @@ publicRoots =
-- | Empty taint state (no known taints).
emptyTaintState :: TaintState
emptyTaintState = TaintState
- { tsRegs = Map.empty
- , tsStack = IM.empty
- , tsProv = Map.empty
- , tsStackProv = IM.empty
- , tsKind = Map.empty
- , tsStackKind = IM.empty
+ { tsRegs = Map.empty
+ , tsStack = IM.empty
+ , tsProv = Map.empty
+ , tsStackProv = IM.empty
+ , tsKind = Map.empty
+ , tsStackKind = IM.empty
+ , tsStgStack = IM.empty
+ , tsStgStackProv = IM.empty
+ , tsStgStackKind = IM.empty
}
-- | Initial taint state with public roots marked.
-- Public roots are marked as Ptr kind (they are pointers).
initTaintState :: TaintState
initTaintState = TaintState
- { tsRegs = Map.fromList [(r, Public) | r <- publicRoots]
- , tsStack = IM.empty
- , tsProv = Map.fromList [(r, ProvPublic) | r <- publicRoots]
- , tsStackProv = IM.empty
- , tsKind = Map.fromList [(r, KindPtr) | r <- publicRoots]
- , tsStackKind = IM.empty
+ { tsRegs = Map.fromList [(r, Public) | r <- publicRoots]
+ , tsStack = IM.empty
+ , tsProv = Map.fromList [(r, ProvPublic) | r <- publicRoots]
+ , tsStackProv = IM.empty
+ , tsKind = Map.fromList [(r, KindPtr) | r <- publicRoots]
+ , tsStackKind = IM.empty
+ , tsStgStack = IM.empty
+ , tsStgStackProv = IM.empty
+ , tsStgStackKind = IM.empty
}
-- | Seed argument registers according to policy.
@@ -445,17 +458,48 @@ clearStackMap :: TaintState -> TaintState
clearStackMap st = st
{ tsStack = IM.empty, tsStackProv = IM.empty, tsStackKind = IM.empty }
--- | Update register taint, provenance, and kind, clearing stack map if SP.
+-- | Store taint, provenance, and kind to an STG stack slot.
+setStgStackTaintProvKind :: Int -> Taint -> Provenance -> RegKind -> TaintState
+ -> TaintState
+setStgStackTaintProvKind offset t p k st = st
+ { tsStgStack = IM.insert offset t (tsStgStack st)
+ , tsStgStackProv = IM.insert offset p (tsStgStackProv st)
+ , tsStgStackKind = IM.insert offset k (tsStgStackKind st)
+ }
+
+-- | Clear all STG stack slot taints, provenance, and kinds (when X20 changes).
+clearStgStackMap :: TaintState -> TaintState
+clearStgStackMap st = st
+ { tsStgStack = IM.empty, tsStgStackProv = IM.empty, tsStgStackKind = IM.empty }
+
+-- | Set taint for a loaded value from a known STG stack slot.
+-- If we have tracked taint/provenance/kind at this offset, use it; else Unknown.
+-- Public roots always get Public/ProvPublic/KindPtr.
+setTaintLoadStgStack :: Reg -> Int -> TaintState -> TaintState
+setTaintLoadStgStack dst offset st
+ | isPublicRoot dst = setTaintProvKind dst Public ProvPublic KindPtr st
+ | otherwise =
+ let taint = IM.findWithDefault Unknown offset (tsStgStack st)
+ prov = IM.findWithDefault ProvUnknown offset (tsStgStackProv st)
+ kind = IM.findWithDefault KindUnknown offset (tsStgStackKind st)
+ in setTaintProvKind dst taint prov kind st
+ where
+ isPublicRoot r = r `elem` publicRoots
+
+-- | Update register taint, provenance, and kind.
+-- Clears hardware stack map if SP, STG stack map if X20.
updateWithSpCheckProvKind :: Reg -> Taint -> Provenance -> RegKind
-> TaintState -> TaintState
updateWithSpCheckProvKind dst t p k st
- | dst == SP = clearStackMap (setTaintProvKind dst t p k st)
- | otherwise = setTaintProvKind dst t p k st
+ | dst == SP = clearStackMap (setTaintProvKind dst t p k st)
+ | dst == X20 = clearStgStackMap (setTaintProvKind dst t p k st)
+ | otherwise = setTaintProvKind dst t p k st
--- | Track a store to stack if address is [sp, #imm].
--- Pre/post-indexed addressing modifies SP, invalidating the stack map.
+-- | Track a store to stack if address is [sp, #imm] or [x20, #imm].
+-- Pre/post-indexed addressing modifies the base, invalidating the stack map.
storeToStack :: Reg -> AddrMode -> TaintState -> TaintState
storeToStack src addr st = case addr of
+ -- Hardware stack (SP)
BaseImm SP imm ->
let off = fromInteger imm
t = getTaint src st
@@ -477,13 +521,36 @@ storeToStack src addr st = case addr of
k = getKind src st
st' = setStackTaintProvKind 0 t p k st
in clearStackMap st'
- _ -> st -- Non-SP stores don't affect stack tracking
+ -- STG stack (X20)
+ BaseImm X20 imm ->
+ let off = fromInteger imm
+ t = getTaint src st
+ p = getProvenance src st
+ k = getKind src st
+ in setStgStackTaintProvKind off t p k st
+ PreIndex X20 imm ->
+ -- Store at x20+imm, then X20 changes
+ let off = fromInteger imm
+ t = getTaint src st
+ p = getProvenance src st
+ k = getKind src st
+ st' = setStgStackTaintProvKind off t p k st
+ in clearStgStackMap st'
+ PostIndex X20 _ ->
+ -- Store at x20, then X20 changes (offset 0)
+ let t = getTaint src st
+ p = getProvenance src st
+ k = getKind src st
+ st' = setStgStackTaintProvKind 0 t p k st
+ in clearStgStackMap st'
+ _ -> st -- Non-stack stores don't affect stack tracking
--- | Track a store pair to stack if address is [sp, #imm].
+-- | Track a store pair to stack if address is [sp, #imm] or [x20, #imm].
-- Stores src1 at offset and src2 at offset+8.
--- Pre/post-indexed addressing modifies SP, invalidating the stack map.
+-- Pre/post-indexed addressing modifies the base, invalidating the stack map.
storePairToStack :: Reg -> Reg -> AddrMode -> TaintState -> TaintState
storePairToStack src1 src2 addr st = case addr of
+ -- Hardware stack (SP)
BaseImm SP imm ->
let off = fromInteger imm
t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
@@ -505,30 +572,64 @@ storePairToStack src1 src2 addr st = case addr of
st' = setStackTaintProvKind 0 t1 p1 k1
(setStackTaintProvKind 8 t2 p2 k2 st)
in clearStackMap st'
+ -- STG stack (X20)
+ BaseImm X20 imm ->
+ let off = fromInteger imm
+ t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
+ t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st
+ in setStgStackTaintProvKind off t1 p1 k1
+ (setStgStackTaintProvKind (off + 8) t2 p2 k2 st)
+ PreIndex X20 imm ->
+ -- Store at x20+imm and x20+imm+8, then X20 changes
+ let off = fromInteger imm
+ t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
+ t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st
+ st' = setStgStackTaintProvKind off t1 p1 k1
+ (setStgStackTaintProvKind (off + 8) t2 p2 k2 st)
+ in clearStgStackMap st'
+ PostIndex X20 _ ->
+ -- Store at x20 and x20+8, then X20 changes
+ let t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
+ t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st
+ st' = setStgStackTaintProvKind 0 t1 p1 k1
+ (setStgStackTaintProvKind 8 t2 p2 k2 st)
+ in clearStgStackMap st'
_ -> st
-- | Load from memory, handling special cases:
--- - [sp, #imm]: restore tracked stack slot taint and provenance
+-- - [sp, #imm]: restore tracked hardware stack slot taint and provenance
+-- - [x20, #imm]: restore tracked STG stack slot taint and provenance
-- - [r, symbol@GOTPAGEOFF]: GOT entry load, result is Public (address)
-- - Other: Unknown unless dst is a public root
--- Post-indexed addressing modifies SP, invalidating the stack map.
+-- Pre/post-indexed addressing modifies the base, invalidating the stack map.
loadFromStack :: Reg -> AddrMode -> TaintState -> TaintState
loadFromStack dst addr st = case addr of
+ -- Hardware stack (SP)
BaseImm SP imm -> setTaintLoadStack dst (fromInteger imm) st
PostIndex SP imm ->
-- Load first, then clear (SP changes after access)
clearStackMap (setTaintLoadStack dst (fromInteger imm) st)
+ -- STG stack (X20)
+ BaseImm X20 imm -> setTaintLoadStgStack dst (fromInteger imm) st
+ PreIndex X20 imm ->
+ -- Load from x20+imm, then x20 changes
+ clearStgStackMap (setTaintLoadStgStack dst (fromInteger imm) st)
+ PostIndex X20 imm ->
+ -- Load first, then clear (X20 changes after access)
+ clearStgStackMap (setTaintLoadStgStack dst (fromInteger imm) st)
+ -- Symbol/literal loads
BaseSymbol _ _ ->
setTaintProv dst Public ProvPublic st -- GOT/PLT entry -> address
Literal _ ->
setTaintProv dst Public ProvPublic st -- PC-relative literal -> address
_ -> setTaintLoad dst st -- Other loads use default behavior
--- | Load pair from stack if address is [sp, #imm].
+-- | Load pair from stack if address is [sp, #imm] or [x20, #imm].
-- Loads dst1 from offset and dst2 from offset+8.
--- Post-indexed addressing modifies SP, invalidating the stack map.
+-- Pre/post-indexed addressing modifies the base, invalidating the stack map.
loadPairFromStack :: Reg -> Reg -> AddrMode -> TaintState -> TaintState
loadPairFromStack dst1 dst2 addr st = case addr of
+ -- Hardware stack (SP)
BaseImm SP imm ->
let off = fromInteger imm
in setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st)
@@ -537,6 +638,20 @@ loadPairFromStack dst1 dst2 addr st = case addr of
let off = fromInteger imm
st' = setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st)
in clearStackMap st'
+ -- STG stack (X20)
+ BaseImm X20 imm ->
+ let off = fromInteger imm
+ in setTaintLoadStgStack dst1 off (setTaintLoadStgStack dst2 (off + 8) st)
+ PreIndex X20 imm ->
+ -- Load from x20+imm and x20+imm+8, then x20 changes
+ let off = fromInteger imm
+ st' = setTaintLoadStgStack dst1 off (setTaintLoadStgStack dst2 (off+8) st)
+ in clearStgStackMap st'
+ PostIndex X20 imm ->
+ -- Load first, then clear (X20 changes after access)
+ let off = fromInteger imm
+ st' = setTaintLoadStgStack dst1 off (setTaintLoadStgStack dst2 (off+8) st)
+ in clearStgStackMap st'
_ -> setTaintLoad dst1 (setTaintLoad dst2 st)
-- | Get taint of an operand.
@@ -634,15 +749,19 @@ invalidateCallerSaved st = st
-- | Join two taint states (element-wise join).
-- For registers in both, take the join. For registers in only one, keep.
--- Stack slots, provenance, and kinds are also joined element-wise.
+-- Stack slots (SP and STG), provenance, and kinds are also joined element-wise.
joinTaintState :: TaintState -> TaintState -> TaintState
joinTaintState a b = TaintState
- { tsRegs = Map.unionWith joinTaint (tsRegs a) (tsRegs b)
- , tsStack = IM.unionWith joinTaint (tsStack a) (tsStack b)
- , tsProv = Map.unionWith joinProvenance (tsProv a) (tsProv b)
- , tsStackProv = IM.unionWith joinProvenance (tsStackProv a) (tsStackProv b)
- , tsKind = Map.unionWith joinKind (tsKind a) (tsKind b)
- , tsStackKind = IM.unionWith joinKind (tsStackKind a) (tsStackKind b)
+ { tsRegs = Map.unionWith joinTaint (tsRegs a) (tsRegs b)
+ , tsStack = IM.unionWith joinTaint (tsStack a) (tsStack b)
+ , tsProv = Map.unionWith joinProvenance (tsProv a) (tsProv b)
+ , tsStackProv = IM.unionWith joinProvenance (tsStackProv a) (tsStackProv b)
+ , tsKind = Map.unionWith joinKind (tsKind a) (tsKind b)
+ , tsStackKind = IM.unionWith joinKind (tsStackKind a) (tsStackKind b)
+ , tsStgStack = IM.unionWith joinTaint (tsStgStack a) (tsStgStack b)
+ , tsStgStackProv = IM.unionWith joinProvenance (tsStgStackProv a)
+ (tsStgStackProv b)
+ , tsStgStackKind = IM.unionWith joinKind (tsStgStackKind a) (tsStgStackKind b)
}
-- | Run forward dataflow analysis over a CFG.
@@ -698,12 +817,15 @@ newtype FuncSummary = FuncSummary { summaryState :: TaintState }
-- | Initial conservative summary: all caller-saved are Unknown.
initSummary :: FuncSummary
initSummary = FuncSummary $ TaintState
- { tsRegs = Map.fromList [ (r, Unknown) | r <- callerSavedRegs ]
- , tsStack = IM.empty
- , tsProv = Map.fromList [ (r, ProvUnknown) | r <- callerSavedRegs ]
- , tsStackProv = IM.empty
- , tsKind = Map.fromList [ (r, KindUnknown) | r <- callerSavedRegs ]
- , tsStackKind = IM.empty
+ { tsRegs = Map.fromList [ (r, Unknown) | r <- callerSavedRegs ]
+ , tsStack = IM.empty
+ , tsProv = Map.fromList [ (r, ProvUnknown) | r <- callerSavedRegs ]
+ , tsStackProv = IM.empty
+ , tsKind = Map.fromList [ (r, KindUnknown) | r <- callerSavedRegs ]
+ , tsStackKind = IM.empty
+ , tsStgStack = IM.empty
+ , tsStgStackProv = IM.empty
+ , tsStgStackKind = IM.empty
}
-- | Caller-saved registers per AArch64 ABI.
diff --git a/plans/ARCH16.md b/plans/ARCH16.md
@@ -0,0 +1,31 @@
+# ARCH16: Track STG Stack Slots (x20-relative)
+
+## Goal
+
+Extend stack-slot taint/kind tracking to the GHC STG stack pointer
+(`x20`), so argument values passed via the STG stack are propagated.
+
+## Scope
+
+- Treat `[x20, #imm]` like `[sp, #imm]` for load/store tracking.
+- Clear STG stack slot maps when `x20` is modified.
+- Keep SP tracking unchanged.
+
+## Rationale
+
+GHC-generated code passes arguments via STG registers and the STG stack
+(`x20`), not the hardware `sp`. Tracking only `[sp, #imm]` misses most
+value flows in worker code.
+
+## Design
+
+- Maintain a separate `tsStgStack` map (and provenance/kind) keyed by
+ `x20` offsets, or reuse existing maps with a tagged key space.
+- Update `storeToStack`/`loadFromStack` to recognize `BaseImm X20 imm` and
+ pre/post-index variants.
+- Clear the STG stack map when `x20` is updated (similar to SP).
+
+## Risks
+
+- Misclassifying non-stack uses of `x20` (unlikely in GHC ABI).
+- Pre/post-indexed updates must mirror SP handling to avoid stale data.
diff --git a/plans/IMPL16.md b/plans/IMPL16.md
@@ -0,0 +1,33 @@
+# IMPL16: Implement STG Stack Slot Tracking
+
+## Summary
+
+Extend stack-slot taint/provenance/kind tracking to `[x20, #imm]` (STG
+stack) to propagate taints through GHC worker code.
+
+## Steps
+
+1) Extend TaintState
+- Add STG stack maps (e.g., `tsStgStack`, `tsStgStackProv`, `tsStgStackKind`).
+- Keep SP maps as-is.
+
+2) Update stack load/store helpers
+- Recognize `BaseImm X20 imm` in `storeToStack`/`loadFromStack`.
+- Mirror pre/post-index handling for `X20` similar to `SP`.
+
+3) Clear on X20 updates
+- When `x20` is modified, clear STG stack maps (similar to `clearStackMap`).
+
+4) Tests
+- Add tests for spill/reload via `[x20, #imm]` preserving taint and kind.
+
+## Files to Touch
+
+- `lib/Audit/AArch64/Taint.hs`
+- `test/`
+
+## Validation
+
+- `cabal test`
+- Re-run on `etc/Curve.s` with taint config for inv#; expect fewer
+ unknown-base warnings in worker paths.
diff --git a/test/Main.hs b/test/Main.hs
@@ -403,6 +403,70 @@ auditTests = testGroup "Audit" [
other -> assertFailure $ "wrong reason: " ++ show other
_ -> assertFailure "expected one violation"
+ -- STG stack (x20-relative) spill/reload tests
+ , testCase "good: STG stack spill/reload preserves public" $ do
+ -- Public value stored to STG stack (x20), then reloaded, stays public
+ let src = T.unlines
+ [ "foo:"
+ , " adrp x8, _const@PAGE" -- x8 = Public, ProvPublic, KindPtr
+ , " str x8, [x20, #8]" -- Store to STG stack slot
+ , " ldr x9, [x20, #8]" -- Reload from STG stack
+ , " ldr x0, [x9]" -- Use as base - should be public
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "bad: STG stack spill/reload preserves unknown" $ do
+ -- Unknown value stored to STG stack, then reloaded, stays unknown
+ let src = T.unlines
+ [ "foo:"
+ , " ldr x8, [x20]" -- x8 = unknown (loaded from STG stack)
+ , " str x8, [x20, #16]" -- Store unknown to STG stack slot
+ , " ldr x9, [x20, #16]" -- Reload the unknown value
+ , " ldr x0, [x9]" -- Use as base - violation!
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ UnknownBase X9 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
+
+ , testCase "good: STG stack pointer spill preserves kind" $ do
+ -- A pointer spilled to STG stack and reloaded should stay KindPtr
+ let src = T.unlines
+ [ "foo:"
+ , " adrp x8, _const@PAGE" -- x8 = Public, ProvPublic, KindPtr
+ , " str x8, [x20, #24]" -- Spill pointer to STG stack
+ , " ldr x9, [x20, #24]" -- Reload: Unknown, ProvPublic, KindPtr
+ , " ldr x0, [x9]" -- Use as base - allowed via provenance
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "good: STG stack pre-indexed load preserves taint" $ do
+ -- Pre-indexed load from STG stack should restore tracked taint
+ -- ldr x8, [x20, #16]! loads from x20+16, then updates x20
+ let src = T.unlines
+ [ "foo:"
+ , " adrp x8, _const@PAGE" -- x8 = Public, ProvPublic, KindPtr
+ , " str x8, [x20, #16]" -- Store pointer to STG stack slot 16
+ , " ldr x9, [x20, #16]!" -- Pre-indexed load from slot 16
+ , " ldr x0, [x9]" -- Use as base - should be public
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
, testCase "good: GOT/PLT pattern" $ do
-- GOTPAGEOFF pattern: adrp + ldr from GOT should produce public
let src = T.unlines