auditor

An aarch64 constant-time memory access auditing tool.
git clone git://git.ppad.tech/auditor.git
Log | Files | Refs | README | LICENSE

commit 2549b3e1c405920476b40f28a9f21d340a4d257f
parent 510af37cc5ebd973a2c9346a62a07f37722d0c6f
Author: Jared Tobin <jared@jtobin.io>
Date:   Wed, 11 Feb 2026 21:58:48 +0400

feat: implement coarse heap taint propagation (IMPL17 Stage 1)

Add a coarse heap bucket to TaintState that tracks taint for non-stack
memory accesses. Secrets stored to heap now propagate to subsequent
heap loads, enabling detection of secret-indexed memory access patterns.

- Add tsHeapTaint, tsHeapProv, tsHeapKind fields to TaintState
- Non-stack stores join source taint into heap bucket
- Non-stack loads read from heap bucket (public roots still protected)
- Heap starts Unknown for conservative analysis
- Add PreIndex SP handling to loadFromStack/loadPairFromStack for symmetry
- Add heap taint propagation tests

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Diffstat:
Mlib/Audit/AArch64/Taint.hs | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------
Aplans/ARCH17.md | 47+++++++++++++++++++++++++++++++++++++++++++++++
Aplans/IMPL17.md | 39+++++++++++++++++++++++++++++++++++++++
Mtest/Main.hs | 47+++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 191 insertions(+), 12 deletions(-)

diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs @@ -66,6 +66,7 @@ import Data.Text (Text) -- Also tracks provenance for upgrading Unknown to Public when provable, -- and register kinds (pointer vs scalar) for safe provenance upgrades. -- Tracks both hardware stack (SP) and STG stack (X20) slots separately. +-- A coarse heap bucket captures taint for non-stack memory accesses. data TaintState = TaintState { tsRegs :: !(Map Reg Taint) -- ^ Register taints @@ -85,6 +86,12 @@ data TaintState = TaintState -- ^ STG stack slot provenance (keyed by X20 offset) , tsStgStackKind :: !(IntMap RegKind) -- ^ STG stack slot kinds (keyed by X20 offset) + , tsHeapTaint :: !Taint + -- ^ Coarse heap taint bucket (joined from all non-stack stores) + , tsHeapProv :: !Provenance + -- ^ Coarse heap provenance bucket + , tsHeapKind :: !RegKind + -- ^ Coarse heap kind bucket } deriving (Eq, Show) -- | GHC 9.10.3 AArch64 public root registers. @@ -124,10 +131,14 @@ emptyTaintState = TaintState , tsStgStack = IM.empty , tsStgStackProv = IM.empty , tsStgStackKind = IM.empty + , tsHeapTaint = Unknown + , tsHeapProv = ProvUnknown + , tsHeapKind = KindUnknown } -- | Initial taint state with public roots marked. -- Public roots are marked as Ptr kind (they are pointers). +-- Heap bucket starts Unknown (conservative for untracked memory). initTaintState :: TaintState initTaintState = TaintState { tsRegs = Map.fromList [(r, Public) | r <- publicRoots] @@ -139,6 +150,9 @@ initTaintState = TaintState , tsStgStack = IM.empty , tsStgStackProv = IM.empty , tsStgStackKind = IM.empty + , tsHeapTaint = Unknown + , tsHeapProv = ProvUnknown + , tsHeapKind = KindUnknown } -- | Seed argument registers according to policy. @@ -419,14 +433,14 @@ setTaintProvKind r t p k st = st , tsKind = Map.insert r k (tsKind st) } --- | Set taint for a loaded value, preserving public roots. --- Public roots (SP, X19-X21, X28, etc.) stay public even when loaded --- since GHC spills/restores them from the hardware stack. --- Loaded values are KindScalar unless they're public roots (which are Ptr). -setTaintLoad :: Reg -> TaintState -> TaintState -setTaintLoad dst st +-- | Set taint for a loaded value from the heap bucket. +-- Public roots always get Public/ProvPublic/KindPtr. +-- Other destinations get the coarse heap taint. +setTaintLoadHeap :: Reg -> TaintState -> TaintState +setTaintLoadHeap dst st | isPublicRoot dst = setTaintProvKind dst Public ProvPublic KindPtr st - | otherwise = setTaintProvKind dst Unknown ProvUnknown KindScalar st + | otherwise = setTaintProvKind dst (tsHeapTaint st) (tsHeapProv st) + (tsHeapKind st) st where isPublicRoot r = r `elem` publicRoots @@ -472,6 +486,15 @@ clearStgStackMap :: TaintState -> TaintState clearStgStackMap st = st { tsStgStack = IM.empty, tsStgStackProv = IM.empty, tsStgStackKind = IM.empty } +-- | Join taint, provenance, and kind into the heap bucket. +-- Used for non-stack stores to propagate secrets through heap memory. +joinHeapBucket :: Taint -> Provenance -> RegKind -> TaintState -> TaintState +joinHeapBucket t p k st = st + { tsHeapTaint = joinTaint (tsHeapTaint st) t + , tsHeapProv = joinProvenance (tsHeapProv st) p + , tsHeapKind = joinKind (tsHeapKind st) k + } + -- | Set taint for a loaded value from a known STG stack slot. -- If we have tracked taint/provenance/kind at this offset, use it; else Unknown. -- Public roots always get Public/ProvPublic/KindPtr. @@ -543,7 +566,11 @@ storeToStack src addr st = case addr of k = getKind src st st' = setStgStackTaintProvKind 0 t p k st in clearStgStackMap st' - _ -> st -- Non-stack stores don't affect stack tracking + -- Non-stack stores: join source taint into heap bucket + _ -> let t = getTaint src st + p = getProvenance src st + k = getKind src st + in joinHeapBucket t p k st -- | Track a store pair to stack if address is [sp, #imm] or [x20, #imm]. -- Stores src1 at offset and src2 at offset+8. @@ -594,7 +621,10 @@ storePairToStack src1 src2 addr st = case addr of st' = setStgStackTaintProvKind 0 t1 p1 k1 (setStgStackTaintProvKind 8 t2 p2 k2 st) in clearStgStackMap st' - _ -> st + -- Non-stack stores: join both sources into heap bucket + _ -> let t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st + t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st + in joinHeapBucket t1 p1 k1 (joinHeapBucket t2 p2 k2 st) -- | Load from memory, handling special cases: -- - [sp, #imm]: restore tracked hardware stack slot taint and provenance @@ -606,6 +636,9 @@ loadFromStack :: Reg -> AddrMode -> TaintState -> TaintState loadFromStack dst addr st = case addr of -- Hardware stack (SP) BaseImm SP imm -> setTaintLoadStack dst (fromInteger imm) st + PreIndex SP imm -> + -- Load from sp+imm, then sp changes + clearStackMap (setTaintLoadStack dst (fromInteger imm) st) PostIndex SP imm -> -- Load first, then clear (SP changes after access) clearStackMap (setTaintLoadStack dst (fromInteger imm) st) @@ -622,7 +655,8 @@ loadFromStack dst addr st = case addr of setTaintProv dst Public ProvPublic st -- GOT/PLT entry -> address Literal _ -> setTaintProv dst Public ProvPublic st -- PC-relative literal -> address - _ -> setTaintLoad dst st -- Other loads use default behavior + -- Other loads: read from heap bucket + _ -> setTaintLoadHeap dst st -- | Load pair from stack if address is [sp, #imm] or [x20, #imm]. -- Loads dst1 from offset and dst2 from offset+8. @@ -633,6 +667,11 @@ loadPairFromStack dst1 dst2 addr st = case addr of BaseImm SP imm -> let off = fromInteger imm in setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st) + PreIndex SP imm -> + -- Load from sp+imm and sp+imm+8, then sp changes + let off = fromInteger imm + st' = setTaintLoadStack dst1 off (setTaintLoadStack dst2 (off + 8) st) + in clearStackMap st' PostIndex SP imm -> -- Load first, then clear (SP changes after access) let off = fromInteger imm @@ -652,7 +691,8 @@ loadPairFromStack dst1 dst2 addr st = case addr of let off = fromInteger imm st' = setTaintLoadStgStack dst1 off (setTaintLoadStgStack dst2 (off+8) st) in clearStgStackMap st' - _ -> setTaintLoad dst1 (setTaintLoad dst2 st) + -- Other loads: read from heap bucket + _ -> setTaintLoadHeap dst1 (setTaintLoadHeap dst2 st) -- | Get taint of an operand. operandTaint :: Operand -> TaintState -> Taint @@ -749,7 +789,7 @@ invalidateCallerSaved st = st -- | Join two taint states (element-wise join). -- For registers in both, take the join. For registers in only one, keep. --- Stack slots (SP and STG), provenance, and kinds are also joined element-wise. +-- Stack slots (SP and STG), provenance, kinds, and heap bucket are also joined. joinTaintState :: TaintState -> TaintState -> TaintState joinTaintState a b = TaintState { tsRegs = Map.unionWith joinTaint (tsRegs a) (tsRegs b) @@ -762,6 +802,9 @@ joinTaintState a b = TaintState , tsStgStackProv = IM.unionWith joinProvenance (tsStgStackProv a) (tsStgStackProv b) , tsStgStackKind = IM.unionWith joinKind (tsStgStackKind a) (tsStgStackKind b) + , tsHeapTaint = joinTaint (tsHeapTaint a) (tsHeapTaint b) + , tsHeapProv = joinProvenance (tsHeapProv a) (tsHeapProv b) + , tsHeapKind = joinKind (tsHeapKind a) (tsHeapKind b) } -- | Run forward dataflow analysis over a CFG. @@ -826,6 +869,9 @@ initSummary = FuncSummary $ TaintState , tsStgStack = IM.empty , tsStgStackProv = IM.empty , tsStgStackKind = IM.empty + , tsHeapTaint = Unknown + , tsHeapProv = ProvUnknown + , tsHeapKind = KindUnknown } -- | Caller-saved registers per AArch64 ABI. diff --git a/plans/ARCH17.md b/plans/ARCH17.md @@ -0,0 +1,47 @@ +# ARCH17: Heap/Memory Taint Propagation + +## Goal + +Propagate taint and kind information through heap/memory loads and +stores so secret values loaded from heap objects can influence address +checks (e.g., secret-indexed table lookups). + +## Scope + +Stage 1 (coarse heap bucket): +- Track a single heap taint/provenance/kind bucket for non-stack memory. +- Stores to non-stack memory join into the heap bucket. +- Loads from non-stack memory read from the heap bucket. + +Stage 2 (refined base+offset tracking): +- Track taint/kind for addresses of the form `[base, #imm]` when `base` + is a public pointer (or provably public). Use `(base, imm)` as key. +- Fallback to coarse heap bucket for other forms. + +## Rationale + +Current analysis treats non-stack loads as Unknown, preventing secrets +from flowing into index calculations. A shadow heap model enables secret +propagation without full alias analysis. + +## Design + +### Coarse Heap Bucket (Stage 1) + +- Add `tsHeapTaint :: Taint`, `tsHeapProv :: Provenance`, + `tsHeapKind :: RegKind`. +- On store to non-stack address: `heap := join heap (src)`. +- On load from non-stack address: `dst := heap` (unless public root). + +### Refined Map (Stage 2) + +- Add `tsHeapSlots :: Map (Reg, Int) (Taint, Provenance, RegKind)` for + `[base, #imm]` with `base` KindPtr and public provenance. +- Stores to such addresses update the slot; loads read from it. +- Other heap accesses join into the coarse bucket. + +## Risks + +- Coarse bucket may over-taint (false positives). +- Refined map needs clear rules for when base is considered public. +- Aliasing is not solved; this is a conservative approximation. diff --git a/plans/IMPL17.md b/plans/IMPL17.md @@ -0,0 +1,39 @@ +# IMPL17: Implement Heap Taint Propagation + +## Summary + +Add heap taint propagation so non-stack loads/stores transfer taint and +kind information, enabling detection of secret-indexed memory access. + +## Steps + +Stage 1 (coarse heap bucket): + +1) Extend TaintState +- Add `tsHeapTaint`, `tsHeapProv`, `tsHeapKind`. + +2) Update load/store logic +- In `loadFromStack` fallback path (non-stack), set dst taint/prov/kind + from heap bucket. +- In store helpers, when address is not SP/X20-based, join source into + heap bucket. + +3) Preserve public roots +- If dst is a public root, continue to set it to Public/KindPtr. + +4) Tests +- Add a test where a secret store to heap taints a later load. +- Add a test where secret-loaded value used as index triggers violation. + +Stage 2 (refined map) is optional and can follow once Stage 1 is stable. + +## Files to Touch + +- `lib/Audit/AArch64/Taint.hs` +- `test/` + +## Validation + +- `cabal test` +- Run auditor on `mul_wnaf` with secret seed; expect secret-indexed + access to be flagged. diff --git a/test/Main.hs b/test/Main.hs @@ -479,6 +479,53 @@ auditTests = testGroup "Audit" [ case audit "test" src of Left e -> assertFailure $ "parse failed: " ++ show e Right ar -> assertEqual "no violations" 0 (length (arViolations ar)) + + -- Heap taint propagation tests + , testCase "bad: heap store propagates secret to later load" $ do + -- Secret stored to heap should taint subsequent heap loads + -- Using public roots (x21 = Hp) as base addresses to avoid base violations + let src = T.unlines + [ "_foo:" + , " str x0, [x21]" -- Store secret x0 to heap (via Hp) + , " ldr x9, [x21, #8]" -- Load from heap -> gets heap taint + , " ldr x1, [x20, x9]" -- Use loaded value as index -> violation + , " ret" + ] + cfg = TaintConfig $ Map.singleton "_foo" + (ArgPolicy (Set.singleton X0) Set.empty) + case auditWithConfig cfg "test" src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right ar -> do + assertEqual "one violation" 1 (length (arViolations ar)) + case arViolations ar of + [v] -> case vReason v of + SecretIndex X9 -> pure () + other -> assertFailure $ "wrong reason: " ++ show other + _ -> assertFailure "expected one violation" + + , testCase "bad: heap taints join on multiple stores" $ do + -- Multiple stores to heap join; one secret contaminates all loads + -- Using public roots (x21 = Hp) as base addresses + let src = T.unlines + [ "_foo:" + , " adrp x8, _const@PAGE" -- x8 = public pointer + , " str x8, [x21]" -- Store public to heap + , " str x0, [x21, #8]" -- Store secret x0 to heap + , " ldr x9, [x21, #16]" -- Load from heap -> secret (joined) + , " ldr x1, [x20, x9]" -- Use as index -> violation + , " ret" + ] + cfg = TaintConfig $ Map.singleton "_foo" + (ArgPolicy (Set.singleton X0) Set.empty) + case auditWithConfig cfg "test" src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right ar -> do + assertEqual "one violation" 1 (length (arViolations ar)) + case arViolations ar of + [v] -> case vReason v of + SecretIndex X9 -> pure () + other -> assertFailure $ "wrong reason: " ++ show other + _ -> assertFailure "expected one violation" ] -- Inter-procedural tests