commit e427f099dbf1393289ecc30ae636afb62057d14c
parent bfc4d70a9dd50b478247454e14b4757955316cd5
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 22:14:09 +0400
feat: add refined heap slot tracking (IMPL17 Stage 2)
Track per-(base, offset) heap slots for [base, #imm] accesses where
base is a public pointer (KindPtr + ProvPublic). This provides more
precise taint tracking than the coarse bucket alone.
- Add tsHeapSlots :: Map (Reg, Int) (Taint, Provenance, RegKind)
- Stores to qualified [base, #imm] update refined slot AND coarse bucket
- Loads check refined slot first, fall back to coarse bucket
- Clear heap slots for a register when it's modified (via setTaintProvKind)
- joinTaintState merges heap slots with element-wise join
- Add tests for refined heap tracking
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
| M | lib/Audit/AArch64/Taint.hs | | | 79 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- |
| M | test/Main.hs | | | 65 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
2 files changed, 140 insertions(+), 4 deletions(-)
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -67,6 +67,7 @@ import Data.Text (Text)
-- and register kinds (pointer vs scalar) for safe provenance upgrades.
-- Tracks both hardware stack (SP) and STG stack (X20) slots separately.
-- A coarse heap bucket captures taint for non-stack memory accesses.
+-- A refined heap slot map tracks [base, #imm] accesses for public pointers.
data TaintState = TaintState
{ tsRegs :: !(Map Reg Taint)
-- ^ Register taints
@@ -92,6 +93,8 @@ data TaintState = TaintState
-- ^ Coarse heap provenance bucket
, tsHeapKind :: !RegKind
-- ^ Coarse heap kind bucket
+ , tsHeapSlots :: !(Map (Reg, Int) (Taint, Provenance, RegKind))
+ -- ^ Refined heap slots keyed by (base register, offset)
} deriving (Eq, Show)
-- | GHC 9.10.3 AArch64 public root registers.
@@ -134,6 +137,7 @@ emptyTaintState = TaintState
, tsHeapTaint = Unknown
, tsHeapProv = ProvUnknown
, tsHeapKind = KindUnknown
+ , tsHeapSlots = Map.empty
}
-- | Initial taint state with public roots marked.
@@ -153,6 +157,7 @@ initTaintState = TaintState
, tsHeapTaint = Unknown
, tsHeapProv = ProvUnknown
, tsHeapKind = KindUnknown
+ , tsHeapSlots = Map.empty
}
-- | Seed argument registers according to policy.
@@ -431,6 +436,8 @@ setTaintProvKind r t p k st = st
{ tsRegs = Map.insert r t (tsRegs st)
, tsProv = Map.insert r p (tsProv st)
, tsKind = Map.insert r k (tsKind st)
+ -- Clear heap slots keyed by this register (they refer to old value)
+ , tsHeapSlots = Map.filterWithKey (\(base, _) _ -> base /= r) (tsHeapSlots st)
}
-- | Set taint for a loaded value from the heap bucket.
@@ -444,6 +451,20 @@ setTaintLoadHeap dst st
where
isPublicRoot r = r `elem` publicRoots
+-- | Set taint for a loaded value from a refined heap slot if available.
+-- If the slot (base, offset) exists in the refined map, use it.
+-- Otherwise fall back to the coarse heap bucket.
+-- Public roots always get Public/ProvPublic/KindPtr.
+setTaintLoadHeapSlot :: Reg -> Reg -> Int -> TaintState -> TaintState
+setTaintLoadHeapSlot dst base off st
+ | isPublicRoot dst = setTaintProvKind dst Public ProvPublic KindPtr st
+ | otherwise = case getHeapSlot base off st of
+ Just (t, p, k) -> setTaintProvKind dst t p k st
+ Nothing -> setTaintProvKind dst (tsHeapTaint st) (tsHeapProv st)
+ (tsHeapKind st) st
+ where
+ isPublicRoot r = r `elem` publicRoots
+
-- | Set taint for a loaded value from a known stack slot.
-- If we have tracked taint/provenance/kind at this offset, use it; else Unknown.
-- Public roots always get Public/ProvPublic/KindPtr.
@@ -495,6 +516,22 @@ joinHeapBucket t p k st = st
, tsHeapKind = joinKind (tsHeapKind st) k
}
+-- | Check if a register qualifies as a public pointer for refined heap tracking.
+-- Must be KindPtr with ProvPublic provenance.
+isPublicPointer :: Reg -> TaintState -> Bool
+isPublicPointer r st =
+ getKind r st == KindPtr && getProvenance r st == ProvPublic
+
+-- | Set a heap slot for a (base, offset) pair.
+setHeapSlot :: Reg -> Int -> Taint -> Provenance -> RegKind -> TaintState
+ -> TaintState
+setHeapSlot base off t p k st = st
+ { tsHeapSlots = Map.insert (base, off) (t, p, k) (tsHeapSlots st) }
+
+-- | Get a heap slot for a (base, offset) pair, if it exists.
+getHeapSlot :: Reg -> Int -> TaintState -> Maybe (Taint, Provenance, RegKind)
+getHeapSlot base off st = Map.lookup (base, off) (tsHeapSlots st)
+
-- | Set taint for a loaded value from a known STG stack slot.
-- If we have tracked taint/provenance/kind at this offset, use it; else Unknown.
-- Public roots always get Public/ProvPublic/KindPtr.
@@ -566,7 +603,16 @@ storeToStack src addr st = case addr of
k = getKind src st
st' = setStgStackTaintProvKind 0 t p k st
in clearStgStackMap st'
- -- Non-stack stores: join source taint into heap bucket
+ -- Refined heap slot: [base, #imm] where base is a public pointer
+ BaseImm base imm
+ | isPublicPointer base st ->
+ let off = fromInteger imm
+ t = getTaint src st
+ p = getProvenance src st
+ k = getKind src st
+ -- Update both refined slot and coarse bucket for soundness
+ in setHeapSlot base off t p k (joinHeapBucket t p k st)
+ -- Other non-stack stores: join source taint into heap bucket only
_ -> let t = getTaint src st
p = getProvenance src st
k = getKind src st
@@ -621,7 +667,17 @@ storePairToStack src1 src2 addr st = case addr of
st' = setStgStackTaintProvKind 0 t1 p1 k1
(setStgStackTaintProvKind 8 t2 p2 k2 st)
in clearStgStackMap st'
- -- Non-stack stores: join both sources into heap bucket
+ -- Refined heap slot: [base, #imm] where base is a public pointer
+ BaseImm base imm
+ | isPublicPointer base st ->
+ let off = fromInteger imm
+ t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
+ t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st
+ -- Update refined slots and coarse bucket
+ st1 = setHeapSlot base off t1 p1 k1 st
+ st2 = setHeapSlot base (off + 8) t2 p2 k2 st1
+ in joinHeapBucket t1 p1 k1 (joinHeapBucket t2 p2 k2 st2)
+ -- Other non-stack stores: join both sources into heap bucket only
_ -> let t1 = getTaint src1 st; p1 = getProvenance src1 st; k1 = getKind src1 st
t2 = getTaint src2 st; p2 = getProvenance src2 st; k2 = getKind src2 st
in joinHeapBucket t1 p1 k1 (joinHeapBucket t2 p2 k2 st)
@@ -655,7 +711,11 @@ loadFromStack dst addr st = case addr of
setTaintProv dst Public ProvPublic st -- GOT/PLT entry -> address
Literal _ ->
setTaintProv dst Public ProvPublic st -- PC-relative literal -> address
- -- Other loads: read from heap bucket
+ -- Refined heap slot: [base, #imm] where base is a public pointer
+ BaseImm base imm
+ | isPublicPointer base st ->
+ setTaintLoadHeapSlot dst base (fromInteger imm) st
+ -- Other loads: read from coarse heap bucket
_ -> setTaintLoadHeap dst st
-- | Load pair from stack if address is [sp, #imm] or [x20, #imm].
@@ -691,7 +751,13 @@ loadPairFromStack dst1 dst2 addr st = case addr of
let off = fromInteger imm
st' = setTaintLoadStgStack dst1 off (setTaintLoadStgStack dst2 (off+8) st)
in clearStgStackMap st'
- -- Other loads: read from heap bucket
+ -- Refined heap slot: [base, #imm] where base is a public pointer
+ BaseImm base imm
+ | isPublicPointer base st ->
+ let off = fromInteger imm
+ in setTaintLoadHeapSlot dst1 base off
+ (setTaintLoadHeapSlot dst2 base (off + 8) st)
+ -- Other loads: read from coarse heap bucket
_ -> setTaintLoadHeap dst1 (setTaintLoadHeap dst2 st)
-- | Get taint of an operand.
@@ -805,7 +871,11 @@ joinTaintState a b = TaintState
, tsHeapTaint = joinTaint (tsHeapTaint a) (tsHeapTaint b)
, tsHeapProv = joinProvenance (tsHeapProv a) (tsHeapProv b)
, tsHeapKind = joinKind (tsHeapKind a) (tsHeapKind b)
+ , tsHeapSlots = Map.unionWith joinSlot (tsHeapSlots a) (tsHeapSlots b)
}
+ where
+ joinSlot (t1, p1, k1) (t2, p2, k2) =
+ (joinTaint t1 t2, joinProvenance p1 p2, joinKind k1 k2)
-- | Run forward dataflow analysis over a CFG.
-- Returns the IN taint state for each block (indexed by block number).
@@ -872,6 +942,7 @@ initSummary = FuncSummary $ TaintState
, tsHeapTaint = Unknown
, tsHeapProv = ProvUnknown
, tsHeapKind = KindUnknown
+ , tsHeapSlots = Map.empty
}
-- | Caller-saved registers per AArch64 ABI.
diff --git a/test/Main.hs b/test/Main.hs
@@ -526,6 +526,71 @@ auditTests = testGroup "Audit" [
SecretIndex X9 -> pure ()
other -> assertFailure $ "wrong reason: " ++ show other
_ -> assertFailure "expected one violation"
+
+ -- Refined heap slot tests (Stage 2)
+ , testCase "good: refined heap slot preserves public at different offset" $ do
+ -- Store secret to offset 0, public to offset 8
+ -- Load from offset 8 should get public (refined tracking)
+ let src = T.unlines
+ [ "_foo:"
+ , " adrp x8, _const@PAGE" -- x8 = public pointer
+ , " str x0, [x21]" -- Store secret to offset 0
+ , " str x8, [x21, #8]" -- Store public pointer to offset 8
+ , " ldr x9, [x21, #8]" -- Load from offset 8 -> should be public
+ , " ldr x10, [x9]" -- Use as base - should be safe
+ , " ret"
+ ]
+ cfg = TaintConfig $ Map.singleton "_foo"
+ (ArgPolicy (Set.singleton X0) Set.empty)
+ case auditWithConfig cfg "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "bad: refined heap slot preserves secret at its offset" $ do
+ -- Store public to offset 0, secret to offset 8
+ -- Load from offset 8 should get secret (refined tracking)
+ let src = T.unlines
+ [ "_foo:"
+ , " adrp x8, _const@PAGE" -- x8 = public pointer
+ , " str x8, [x21]" -- Store public to offset 0
+ , " str x0, [x21, #8]" -- Store secret to offset 8
+ , " ldr x9, [x21, #8]" -- Load from offset 8 -> should be secret
+ , " ldr x10, [x20, x9]" -- Use as index -> violation
+ , " ret"
+ ]
+ cfg = TaintConfig $ Map.singleton "_foo"
+ (ArgPolicy (Set.singleton X0) Set.empty)
+ case auditWithConfig cfg "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ SecretIndex X9 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
+
+ , testCase "bad: refined heap slot falls back for unknown offset" $ do
+ -- Store secret at known offset, load from unknown offset
+ -- Load should get coarse heap taint (secret)
+ let src = T.unlines
+ [ "_foo:"
+ , " str x0, [x21]" -- Store secret to offset 0
+ , " ldr x9, [x21, #16]" -- Load from offset 16 (no slot) -> coarse
+ , " ldr x10, [x20, x9]" -- Use as index -> violation
+ , " ret"
+ ]
+ cfg = TaintConfig $ Map.singleton "_foo"
+ (ArgPolicy (Set.singleton X0) Set.empty)
+ case auditWithConfig cfg "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> do
+ assertEqual "one violation" 1 (length (arViolations ar))
+ case arViolations ar of
+ [v] -> case vReason v of
+ SecretIndex X9 -> pure ()
+ other -> assertFailure $ "wrong reason: " ++ show other
+ _ -> assertFailure "expected one violation"
]
-- Inter-procedural tests