commit c26861025b6fec495ce79bec99950370156e2ac1
parent 9fc28aac98c81c8990bcfc7421232c9c018d30ef
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 12:39:46 +0400
feat: whitelist GHC pointer untagging pattern
Recognize AND with tag-clearing mask (0xfffffffffffffff8 or -8) as GHC
pointer untagging. Preserve ProvPublic provenance for such operations
when the source register already has ProvPublic provenance.
Conservative approach: only preserve provenance when source is already
known-public; don't upgrade ProvUnknown to ProvPublic, as untagging an
unknown pointer doesn't prove its safety.
Also fix parser to handle hexadecimal immediates (case-insensitive).
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
3 files changed, 47 insertions(+), 2 deletions(-)
diff --git a/lib/Audit/AArch64/Parser.hs b/lib/Audit/AArch64/Parser.hs
@@ -357,7 +357,10 @@ pShiftedRegOp = do
pImm :: Parser Integer
pImm = lexeme $ do
void (optional (char '#'))
- signed L.decimal
+ signed pNumber
+ where
+ pNumber = try pHex <|> L.decimal
+ pHex = string' "0x" *> L.hexadecimal -- case-insensitive
signed :: Parser Integer -> Parser Integer
signed p = do
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -205,10 +205,16 @@ transfer instr st = case instr of
in setTaintProv dst t p st
-- Logical: result is join of operand taints/provenances
+ -- Special case: pointer untagging preserves ProvPublic provenance
And dst r1 op ->
let t = join2 (getTaint r1 st) (operandTaint op st)
p = provJoin2 (getProvenance r1 st) (operandProv op st)
- in setTaintProv dst t p st
+ srcProv = getProvenance r1 st
+ -- Pointer untagging: preserve ProvPublic if source was ProvPublic
+ p' = if isPointerUntagMask op && srcProv == ProvPublic
+ then ProvPublic
+ else p
+ in setTaintProv dst t p' st
Orr dst r1 op ->
let t = join2 (getTaint r1 st) (operandTaint op st)
p = provJoin2 (getProvenance r1 st) (operandProv op st)
@@ -468,6 +474,16 @@ operandProv op st = case op of
OpLabel _ -> ProvPublic
OpAddr _ -> ProvUnknown -- Address operand provenance is complex
+-- | Check if operand is a GHC pointer-untagging mask.
+-- GHC uses low 3 bits for pointer tagging; this mask clears them.
+-- Recognizing this pattern allows whitelisting heap traversal.
+isPointerUntagMask :: Operand -> Bool
+isPointerUntagMask (OpImm imm) =
+ imm == 0xfffffffffffffff8 || -- 64-bit mask
+ imm == 0xfffffff8 || -- 32-bit mask
+ imm == -8 -- signed representation
+isPointerUntagMask _ = False
+
-- | Get taint of address base register.
addrBaseTaint :: AddrMode -> TaintState -> Taint
addrBaseTaint addr st = case addr of
diff --git a/test/Main.hs b/test/Main.hs
@@ -435,4 +435,30 @@ provenanceTests = testGroup "Provenance" [
case audit "test" src of
Left e -> assertFailure $ "parse failed: " ++ show e
Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "good: pointer untagging preserves public provenance" $ do
+ -- Untagging a register with ProvPublic preserves provenance
+ let src = T.unlines
+ [ "foo:"
+ , " mov x8, x20" -- x8 = Public, ProvPublic
+ , " and x8, x8, #0xfffffffffffffff8" -- untag preserves ProvPublic
+ , " ldr x0, [x8]" -- should be safe
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "no violations" 0 (length (arViolations ar))
+
+ , testCase "bad: pointer untagging doesn't upgrade unknown provenance" $ do
+ -- Untagging a register with ProvUnknown doesn't make it safe
+ let src = T.unlines
+ [ "foo:"
+ , " ldr x8, [x20]" -- x8 = Unknown, ProvUnknown
+ , " and x8, x8, #-8" -- untag doesn't upgrade ProvUnknown
+ , " ldr x0, [x8]" -- violation: x8 is Unknown, ProvUnknown
+ , " ret"
+ ]
+ case audit "test" src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right ar -> assertEqual "one violation" 1 (length (arViolations ar))
]