commit 2549b3e1c405920476b40f28a9f21d340a4d257f
parent 510af37cc5ebd973a2c9346a62a07f37722d0c6f
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 21:58:48 +0400
feat: implement coarse heap taint propagation (IMPL17 Stage 1)
Add a coarse heap bucket to TaintState that tracks taint for non-stack
memory accesses. Secrets stored to heap now propagate to subsequent
heap loads, enabling detection of secret-indexed memory access patterns.
- Add tsHeapTaint, tsHeapProv, tsHeapKind fields to TaintState
- Non-stack stores join source taint into heap bucket
- Non-stack loads read from heap bucket (public roots still protected)
- Heap starts Unknown for conservative analysis
- Add PreIndex SP handling to loadFromStack/loadPairFromStack for symmetry
- Add heap taint propagation tests
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
4 files changed, 191 insertions(+), 12 deletions(-)
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -66,6 +66,7 @@ import Data.Text (Text)
-- Also tracks provenance for upgrading Unknown to Public when provable,
-- and register kinds (pointer vs scalar) for safe provenance upgrades.
-- Tracks both hardware stack (SP) and STG stack (X20) slots separately.
+-- A coarse heap bucket captures taint for non-stack memory accesses.
data TaintState = TaintState
{ tsRegs :: !(Map Reg Taint)
-- ^ Register taints
@@ -85,6 +86,12 @@ data TaintState = TaintState
-- ^ STG stack slot provenance (keyed by X20 offset)
, tsStgStackKind :: !(IntMap RegKind)
-- ^ STG stack slot kinds (keyed by X20 offset)
+ , tsHeapTaint :: !Taint
+ -- ^ Coarse heap taint bucket (joined from all non-stack stores)
+ , tsHeapProv :: !Provenance
+ -- ^ Coarse heap provenance bucket
+ , tsHeapKind :: !RegKind
+ -- ^ Coarse heap kind bucket
} deriving (Eq, Show)
-- | GHC 9.10.3 AArch64 public root registers.
@@ -124,10 +131,14 @@ emptyTaintState = TaintState
, tsStgStack = IM.empty
, tsStgStackProv = IM.empty
, tsStgStackKind = IM.empty
+ , tsHeapTaint = Unknown
+ , tsHeapProv = ProvUnknown
+ , tsHeapKind = KindUnknown
}
-- | Initial taint state with public roots marked.
-- Public roots are marked as Ptr kind (they are pointers).
+-- Heap bucket starts Unknown (conservative for untracked memory).
initTaintState :: TaintState
initTaintState = TaintState
{ tsRegs = Map.fromList [(r, Public) | r <- publicRoots]
@@ -139,6 +150,9 @@ initTaintState = TaintState
, tsStgStack = IM.empty
, tsStgStackProv = IM.empty
, tsStgStackKind = IM.empty
+ , tsHeapTaint = Unknown
+ , tsHeapProv = ProvUnknown
+ , tsHeapKind = KindUnknown
}
-- | Seed argument registers according to policy.
@@ -419,14 +433,14 @@ setTaintProvKind r t p k st = st
, tsKind = Map.insert r k (tsKind st)
}
--- | Set taint for a loaded value, preserving public roots.
--- Public roots (SP, X19-X21, X28, etc.) stay public even when loaded
--- since GHC spills/restores them from the hardware stack.
--- Loaded values are KindScalar unless they're public roots (which are Ptr).
-setTaintLoad :: Reg -> TaintState -> TaintState
-setTaintLoad dst st
+-- | Set taint for a loaded value from the heap bucket.
+-- Public roots always get Public/ProvPublic/KindPtr.
+-- Other destinations get the coarse heap taint.
+setTaintLoadHeap :: Reg -> TaintState -> TaintState
+setTaintLoadHeap dst st
| isPublicRoot dst = setTaintProvKind dst Public ProvPublic KindPtr st
- | otherwise = setTaintProvKind dst Unknown ProvUnknown KindScalar st
+ | otherwise = setTaintProvKind dst (tsHeapTaint st) (tsHeapProv st)
+ (tsHeapKind st) st
where
isPublicRoot r = r `elem` publicRoots
@@ -472,6 +486,15 @@ clearStgStackMap :: TaintState -> TaintState
clearStgStackMap st = st
{ tsStgStack = IM.empty, tsStgStackProv = IM.empty, tsStgStackKind = IM.empty }
+-- | Join taint, provenance, and kind into the heap bucket.
+-- Used for non-stack stores to propagate secrets through heap memory.
+joinHeapBucket :: Taint -> Provenance -> RegKind -> TaintState -> TaintState
+joinHeapBucket t p k st = st
+ { tsHeapTaint = joinTaint (tsHeapTaint st) t
+ , tsHeapProv = joinProvenance (tsHeapProv st) p
+ , tsHeapKind = joinKind (tsHeapKind st) k
+ }
+
-- | Set taint for a loaded value from a known STG stack slot.
-- If we have tracked taint/provenance/kind at this offset, use it; else Unknown.
-- Public roots always get Public/ProvPublic/KindPtr.
@@ -543,7 +566,11 @@ storeToStack src addr st = case addr of
k = getKind src st
st' = setStgStackTaintProvKind 0 t p k st
in clearStgStackMap st'
- _ -> st -- Non-stack stores don't affect stack tracking
+ -- Non-stack stores: join source taint into heap bucket
+ _ -> let t = getTaint src st
+ p = getProvenance src st
+ k = getKind src st
+ in joinHeapBucket t p k st
-- | Track a store pair to stack if address is [sp, #imm] or [x20, #imm].
-- Stores src1 at offset and src2 at offset+8.
@@ -594,7 +621,10 @@ storePairToStack src1 src2 addr st = case addr of
st' = setStgStackTaintProvKind 0 t1 p1 k1
(setStgStackTaintProvKind 8 t2 p2 k2 st)
in clearStgStackMap st'
- _ -> st
+ -- Non-stack stores: join both sources into heap bucket
+ _ -> let t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
+ t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st
+ in joinHeapBucket t1 p1 k1 (joinHeapBucket t2 p2 k2 st)
-- | Load from memory, handling special cases:
-- - [sp, #imm]: restore tracked hardware stack slot taint and provenance
@@ -606,6 +636,9 @@ loadFromStack :: Reg -> AddrMode -> TaintState -> TaintState
loadFromStack dst addr st = case addr of
-- Hardware stack (SP)
BaseImm SP imm -> setTaintLoadStack dst (fromInteger imm) st
+ PreIndex SP imm ->
+ -- Load from sp+imm, then sp changes
+ clearStackMap (setTaintLoadStack dst (fromInteger imm) st)
PostIndex SP imm ->
-- Load first, then clear (SP changes after access)
clearStackMap (setTaintLoadStack dst (fromInteger imm) st)
@@ -622,7 +655,8 @@ loadFromStack dst addr st = case addr of
setTaintProv dst Public ProvPublic st -- GOT/PLT entry -> address
Literal _ ->
setTaintProv dst Public ProvPublic st -- PC-relative literal -> address
- _ -> setTaintLoad dst st -- Other loads use default behavior
+ -- Other loads: read from heap bucket
+ _ -> setTaintLoadHeap dst st
-- | Load pair from stack if address is [sp, #imm] or [x20, #imm].
-- Loads dst1 from offset and dst2 from offset+8.
@@ -633,6 +667,11 @@ loadPairFromStack dst1 dst2 addr st = case addr of
BaseImm SP imm ->
let off = fromInteger imm
in setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st)
+ PreIndex SP imm ->
+ -- Load from sp+imm and sp+imm+8, then sp changes
+ let off = fromInteger imm
+ st' = setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st)
+ in clearStackMap st'
PostIndex SP imm ->
-- Load first, then clear (SP changes after access)
let off = fromInteger imm
@@ -652,7 +691,8 @@ loadPairFromStack dst1 dst2 addr st = case addr of
let off = fromInteger imm
st' = setTaintLoadStgStack dst1 off (setTaintLoadStgStack dst2 (off+8) st)
in clearStgStackMap st'
- _ -> setTaintLoad dst1 (setTaintLoad dst2 st)
+ -- Other loads: read from heap bucket
+ _ -> setTaintLoadHeap dst1 (setTaintLoadHeap dst2 st)
-- | Get taint of an operand.
operandTaint :: Operand -> TaintState -> Taint
@@ -749,7 +789,7 @@ invalidateCallerSaved st = st
-- | Join two taint states (element-wise join).
-- For registers in both, take the join. For registers in only one, keep.
--- Stack slots (SP and STG), provenance, and kinds are also joined element-wise.
+-- Stack slots (SP and STG), provenance, kinds, and heap bucket are also joined.
joinTaintState :: TaintState -> TaintState -> TaintState
joinTaintState a b = TaintState
{ tsRegs = Map.unionWith joinTaint (tsRegs a) (tsRegs b)
@@ -762,6 +802,9 @@ joinTaintState a b = TaintState
, tsStgStackProv = IM.unionWith joinProvenance (tsStgStackProv a)
(tsStgStackProv b)
, tsStgStackKind = IM.unionWith joinKind (tsStgStackKind a) (tsStgStackKind b)
+ , tsHeapTaint = joinTaint (tsHeapTaint a) (tsHeapTaint b)
+ , tsHeapProv = joinProvenance (tsHeapProv a) (tsHeapProv b)
+ , tsHeapKind = joinKind (tsHeapKind a) (tsHeapKind b)
}
-- | Run forward dataflow analysis over a CFG.
@@ -826,6 +869,9 @@ initSummary = FuncSummary $ TaintState
, tsStgStack = IM.empty
, tsStgStackProv = IM.empty
, tsStgStackKind = IM.empty
+ , tsHeapTaint = Unknown
+ , tsHeapProv = ProvUnknown
+ , tsHeapKind = KindUnknown
}
-- | Caller-saved registers per AArch64 ABI.
diff --git a/plans/ARCH17.md b/plans/ARCH17.md
@@ -0,0 +1,47 @@
+# ARCH17: Heap/Memory Taint Propagation
+
+## Goal
+
+Propagate taint and kind information through heap/memory loads and
+stores so secret values loaded from heap objects can influence address
+checks (e.g., secret-indexed table lookups).
+
+## Scope
+
+Stage 1 (coarse heap bucket):
+- Track a single heap taint/provenance/kind bucket for non-stack memory.
+- Stores to non-stack memory join into the heap bucket.
+- Loads from non-stack memory read from the heap bucket.
+
+Stage 2 (refined base+offset tracking):
+- Track taint/kind for addresses of the form `[base, #imm]` when `base`
+ is a public pointer (or provably public). Use `(base, imm)` as key.
+- Fallback to coarse heap bucket for other forms.
+
+## Rationale
+
+Current analysis treats non-stack loads as Unknown, preventing secrets
+from flowing into index calculations. A shadow heap model enables secret
+propagation without full alias analysis.
+
+## Design
+
+### Coarse Heap Bucket (Stage 1)
+
+- Add `tsHeapTaint :: Taint`, `tsHeapProv :: Provenance`,
+ `tsHeapKind :: RegKind`.
+- On store to non-stack address: `heap := join heap (src)`.
+- On load from non-stack address: `dst := heap` (unless public root).
+
+### Refined Map (Stage 2)
+
+- Add `tsHeapSlots :: Map (Reg, Int) (Taint, Provenance, RegKind)` for
+ `[base, #imm]` with `base` KindPtr and public provenance.
+- Stores to such addresses update the slot; loads read from it.
+- Other heap accesses join into the coarse bucket.
+
+## Risks
+
+- Coarse bucket may over-taint (false positives).
+- Refined map needs clear rules for when base is considered public.
+- Aliasing is not solved; this is a conservative approximation.
diff --git a/plans/IMPL17.md b/plans/IMPL17.md
@@ -0,0 +1,39 @@
+# IMPL17: Implement Heap Taint Propagation
+
+## Summary
+
+Add heap taint propagation so non-stack loads/stores transfer taint and
+kind information, enabling detection of secret-indexed memory access.
+
+## Steps
+
+Stage 1 (coarse heap bucket):
+
+1) Extend TaintState
+- Add `tsHeapTaint`, `tsHeapProv`, `tsHeapKind`.
+
+2) Update load/store logic
+- In `loadFromStack` fallback path (non-stack), set dst taint/prov/kind
+ from heap bucket.
+- In store helpers, when address is not SP/X20-based, join source into
+ heap bucket.
+
+3) Preserve public roots
+- If dst is a public root, continue to set it to Public/KindPtr.
+
+4) Tests
+- Add a test where a secret store to heap taints a later load.
+- Add a test where secret-loaded value used as index triggers violation.
+
+Stage 2 (refined map) is optional and can follow once Stage 1 is stable.
+
+## Files to Touch
+
+- `lib/Audit/AArch64/Taint.hs`
+- `test/`
+
+## Validation
+
+- `cabal test`
+- Run auditor on `mul_wnaf` with secret seed; expect secret-indexed
+ access to be flagged.
diff --git a/test/Main.hs b/test/Main.hs
@@ -479,6 +479,53 @@ auditTests = testGroup "Audit" [
case audit "test" src of
Left e -> assertFailure $ "parse failed: " ++ show e
Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ -- Heap taint propagation tests
+ , testCase "bad: heap store propagates secret to later load" $ do
+ -- Secret stored to heap should taint subsequent heap loads
+ -- Using public roots (x21 = Hp) as base addresses to avoid base violations
+ let src = T.unlines
+ [ "_foo:"
+ , " str x0, [x21]" -- Store secret x0 to heap (via Hp)
+ , " ldr x9, [x21, #8]" -- Load from heap -> gets heap taint
+ , " ldr x1, [x20, x9]" -- Use loaded value as index -> violation
+ , " ret"
+ ]
+ cfg = TaintConfig $ Map.singleton "_foo"
+ (ArgPolicy (Set.singleton X0) Set.empty)
+ case auditWithConfig cfg "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ SecretIndex X9 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
+
+ , testCase "bad: heap taints join on multiple stores" $ do
+ -- Multiple stores to heap join; one secret contaminates all loads
+ -- Using public roots (x21 = Hp) as base addresses
+ let src = T.unlines
+ [ "_foo:"
+ , " adrp x8, _const@PAGE" -- x8 = public pointer
+ , " str x8, [x21]" -- Store public to heap
+ , " str x0, [x21, #8]" -- Store secret x0 to heap
+ , " ldr x9, [x21, #16]" -- Load from heap -> secret (joined)
+ , " ldr x1, [x20, x9]" -- Use as index -> violation
+ , " ret"
+ ]
+ cfg = TaintConfig $ Map.singleton "_foo"
+ (ArgPolicy (Set.singleton X0) Set.empty)
+ case auditWithConfig cfg "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ SecretIndex X9 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
]
-- Inter-procedural tests