commit 89b9a2ab26920214173ff08dea9dfa039c30ba79
parent 3b2328ecee2adeb3cfae24b20681b718632f6da1
Author: Jared Tobin <jared@jtobin.io>
Date: Fri, 13 Feb 2026 08:21:26 +0400
feat: improve GHC runtime detection with BB-bounded scan
- Use basic block bounded backwards scan for closure entry, dict calls,
and RTS symbol detection (only stop at LBB* labels, not Lloh* hints)
- Add PreIndex/PostIndex addressing modes to closure entry detection
- Add tag check variant: and #0x7; cmp #N pattern
- Add arity/closure-type check: ldur [r, #-N]; cmp pattern
- Reduces false positives from 332 to 6 on Curve.s test case
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
1 file changed, 58 insertions(+), 16 deletions(-)
diff --git a/lib/Audit/AArch64/NCT.hs b/lib/Audit/AArch64/NCT.hs
@@ -32,7 +32,7 @@ import Data.IntMap.Strict (IntMap)
import qualified Data.IntMap.Strict as IntMap
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
-import Data.Text (Text, isInfixOf, isSuffixOf)
+import Data.Text (Text, isInfixOf, isPrefixOf, isSuffixOf)
import GHC.Generics (Generic)
-- | Reason for flagging an instruction as non-constant-time.
@@ -176,7 +176,8 @@ buildLineMap lns = IntMap.fromList [(lineNum l, l) | l <- lns]
-- followed eventually by @blr <reg>@
isGhcRuntimeFinding :: LineMap -> NctFinding -> Bool
isGhcRuntimeFinding lineMap f = case nctReason f of
- CondBranch -> isHeapCheck || isNurseryCheck || isTagCheck || isCafCheck
+ CondBranch -> isHeapCheck || isNurseryCheck || isTagCheck
+ || isCafCheck || isArityCheck
IndirectBranch -> isClosureEntry || isDictCall || isRtsCall
_ -> False
where
@@ -201,10 +202,32 @@ isGhcRuntimeFinding lineMap f = case nctReason f of
Ldur _ (BaseImm X19 _) -> True
_ -> False
- -- Check if conditional branch is a tag check (prev: tst <r>, #7)
+ -- Check if conditional branch is a tag check
+ -- Pattern 1: tst <r>, #7 followed by branch
+ -- Pattern 2: and <r>, <r2>, #7; cmp <r>, #N followed by branch
isTagCheck :: Bool
isTagCheck = case prevInstr of
Just (Tst _ (OpImm 7)) -> True
+ Just (Cmp _ (OpImm _)) -> any isTagMask recentInstrs5
+ _ -> False
+
+ isTagMask :: Instr -> Bool
+ isTagMask instr = case instr of
+ And _ _ (OpImm 7) -> True
+ _ -> False
+
+ -- Check if conditional branch is an arity/closure-type check
+ -- Pattern: ldur <r>, [<r2>, #-N] (info table access); cmp <r>, #M
+ -- Info tables are at negative offsets from info pointer
+ isArityCheck :: Bool
+ isArityCheck = case prevInstr of
+ Just (Cmp _ (OpImm _)) -> any isInfoTableLoad recentInstrs5
+ _ -> False
+
+ isInfoTableLoad :: Instr -> Bool
+ isInfoTableLoad instr = case instr of
+ -- Negative offset indicates info table field access
+ Ldur _ (BaseImm _ off) -> off < 0
_ -> False
-- Check if cbz x0 is a CAF check (recent: bl _newCAF or similar)
@@ -219,38 +242,41 @@ isGhcRuntimeFinding lineMap f = case nctReason f of
Bl label -> "_newCAF" `isSuffixOf` label
_ -> False
- -- Check if blr is closure entry (recent: ldr/ldur to same reg)
- -- Look back up to 5 instructions to account for intervening loads
+ -- Check if blr is closure entry (ldr/ldur to same reg in BB)
isClosureEntry :: Bool
isClosureEntry = case nctInstr f of
- Blr reg -> any (isRegLoad reg) recentInstrs5
+ Blr reg -> any (isRegLoad reg) bbInstrs
_ -> False
-- Check if blr is a dictionary/vtable call
-- (ldr from [base, #offset] pattern - calling through object field)
isDictCall :: Bool
isDictCall = case nctInstr f of
- Blr reg -> any (isDictLoad reg) recentInstrs5
+ Blr reg -> any (isDictLoad reg) bbInstrs
_ -> False
isRegLoad :: Reg -> Instr -> Bool
isRegLoad reg instr = case instr of
- Ldr r (BaseImm _ _) -> r == reg
- Ldur r (BaseImm _ _) -> r == reg
+ Ldr r (BaseImm _ _) -> r == reg
+ Ldr r (PreIndex _ _) -> r == reg -- [xN, #imm]!
+ Ldr r (PostIndex _ _) -> r == reg -- [xN], #imm
+ Ldur r (BaseImm _ _) -> r == reg
Ldp r1 r2 (BaseImm _ _) -> r1 == reg || r2 == reg
_ -> False
isDictLoad :: Reg -> Instr -> Bool
isDictLoad reg instr = case instr of
-- ldr reg, [base, #offset] where offset > 0 (field access)
- Ldr r (BaseImm _ off) -> r == reg && off > 0
+ Ldr r (BaseImm _ off) -> r == reg && off > 0
+ Ldr r (PreIndex _ off) -> r == reg && off > 0
+ Ldr r (PostIndex _ off) -> r == reg && off > 0
_ -> False
-- Check if blr is calling a GHC RTS function
- -- Look for adrp <reg>, _stg_* pattern in recent instructions
+ -- Scan backwards within basic block for adrp <reg>, _stg_* pattern
isRtsCall :: Bool
isRtsCall = case nctInstr f of
- Blr reg -> any (isRtsSymbolLoad reg) recentInstrs20
+ Blr reg -> any (isRtsSymbolLoad reg) bbInstrs
_ -> False
isRtsSymbolLoad :: Reg -> Instr -> Bool
@@ -269,10 +295,6 @@ isGhcRuntimeFinding lineMap f = case nctReason f of
recentInstrs5 :: [Instr]
recentInstrs5 = recentInstrs 5
- -- Get up to 20 recent instructions before finding (for RTS calls)
- recentInstrs20 :: [Instr]
- recentInstrs20 = recentInstrs 20
-
recentInstrs :: Int -> [Instr]
recentInstrs n =
[ instr
@@ -281,6 +303,26 @@ isGhcRuntimeFinding lineMap f = case nctReason f of
, Just instr <- [lineInstr ln]
]
+ -- Get all instructions in current basic block before finding
+ -- Scans backwards until hitting a real BB label (LBB*)
+ -- Ignores linker hints (Lloh*, ltmp*) which aren't BB boundaries
+ bbInstrs :: [Instr]
+ bbInstrs = go (nctLine f - 1)
+ where
+ go lineNum
+ | lineNum <= 0 = []
+ | otherwise = case IntMap.lookup lineNum lineMap of
+ Nothing -> []
+ Just ln -> case lineLabel ln of
+ Just lbl | isBBLabel lbl -> [] -- Hit real BB, stop
+ _ -> case lineInstr ln of
+ Just instr -> instr : go (lineNum - 1)
+ Nothing -> go (lineNum - 1) -- Skip non-instruction lines
+
+ -- Real BB labels start with "LBB", linker hints are Lloh*, ltmp*, etc.
+ isBBLabel :: Text -> Bool
+ isBBLabel lbl = "LBB" `isPrefixOf` lbl
+
-- | Filter out GHC runtime patterns from NCT findings.
filterGhcRuntime :: [Line] -> Map Text [NctFinding] -> Map Text [NctFinding]
filterGhcRuntime lns findings = Map.mapMaybe filterFindings findings