commit 0f8af961e891bcde511792114f7cbcb8b7c3aa26
parent e4dfd705e24890b88487199687c4eb7e890e5d0e
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 14:39:32 +0400
perf: cache call graph and function blocks in inter-proc analysis (IMPL8)
- Add buildFunctionBlocksMap: O(N) single-pass function boundary scan
- Add buildCallerMap: inverted call graph using precomputed blocks map
- Cache both in runInterProc/runInterProcWithConfig to avoid rescanning
- Filter _L+lowercase NCG internal labels (e.g. _Lblock_info)
- Fix worklist propagation bug: use emptyTaintState as default to ensure
all reachable blocks are visited
Performance: inter-proc analysis now completes in ~1s on secp256k1.s
(previously would hang due to O(F²*N) complexity)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
2 files changed, 77 insertions(+), 15 deletions(-)
diff --git a/lib/Audit/AArch64/CFG.hs b/lib/Audit/AArch64/CFG.hs
@@ -21,6 +21,9 @@ module Audit.AArch64.CFG (
, functionLabels
, callTargets
, buildCallGraph
+ -- * Cached analysis structures
+ , buildFunctionBlocksMap
+ , buildCallerMap
) where
import Audit.AArch64.Types
@@ -165,6 +168,13 @@ hasFallthrough bb = case lastInstrOf bb of
[] -> Nothing
ls -> lineInstr (last ls)
+-- | Check if a label is an NCG-internal label (not a function entry).
+-- These start with _L followed by a lowercase letter (e.g. _Lblock_info).
+isNCGInternal :: Text -> Bool
+isNCGInternal lbl = case T.unpack lbl of
+ '_':'L':c:_ -> c >= 'a' && c <= 'z'
+ _ -> False
+
-- | Check if a label is a function entry (not a local label).
-- Function labels start with _ or don't have LBB/Lloh/Lc prefixes.
isFunctionLabel :: Text -> Bool
@@ -177,6 +187,7 @@ isFunctionLabel lbl
| T.isPrefixOf "Lc" lbl = False -- NCG local label (GHC)
| T.isPrefixOf "Ls" lbl = False -- NCG local label (GHC)
| T.isPrefixOf "Lu" lbl = False -- NCG local label (GHC)
+ | isNCGInternal lbl = False -- NCG internal label (GHC)
| otherwise = True -- Likely a function
-- | Get all function entry labels in the CFG.
@@ -224,3 +235,43 @@ buildCallGraph cfg = Map.fromList
indices = functionBlocks cfg func
callees = concatMap (callTargets . (blocks !!)) indices
]
+
+-- | Build map of function labels to their block index ranges.
+-- Single O(N) pass over blocks, detecting function boundaries.
+buildFunctionBlocksMap :: CFG -> Map Text [Int]
+buildFunctionBlocksMap cfg = finalize $ foldl step (Nothing, Map.empty) indexed
+ where
+ blocks = cfgBlocks cfg
+ nBlocks = length blocks
+ indexed = zip [0..] blocks
+
+ step (mCur, acc) (idx, bb) =
+ case bbLabel bb of
+ Just lbl | isFunctionLabel lbl ->
+ -- New function starts; close previous at idx
+ let acc' = closeCurrent idx mCur acc
+ in (Just (lbl, idx), acc')
+ _ ->
+ -- Continue current function
+ (mCur, acc)
+
+ closeCurrent _ Nothing acc = acc
+ closeCurrent endIdx (Just (lbl, start)) acc =
+ Map.insert lbl [start .. endIdx - 1] acc
+
+ finalize (mCur, acc) = closeCurrent nBlocks mCur acc
+
+-- | Build caller map: maps each function to its callers.
+-- Takes precomputed function blocks map to avoid rescanning.
+buildCallerMap :: CFG -> Map Text [Int] -> Map Text [Text]
+buildCallerMap cfg funcBlocksMap = Map.fromListWith (++)
+ [ (callee, [caller])
+ | (caller, callees) <- Map.toList callGraph
+ , callee <- callees
+ ]
+ where
+ blocks = cfgBlocks cfg
+ callGraph = Map.fromList
+ [ (func, concatMap (callTargets . (blocks !!)) idxs)
+ | (func, idxs) <- Map.toList funcBlocksMap
+ ]
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -41,6 +41,10 @@ module Audit.AArch64.Taint (
) where
import Audit.AArch64.CFG
+ ( BasicBlock(..), CFG(..)
+ , blockSuccessors, functionLabels, functionBlocks
+ , buildFunctionBlocksMap, buildCallerMap
+ )
import Audit.AArch64.Types
( Reg(..), Instr(..), Line(..), Operand(..), AddrMode(..)
, Taint(..), joinTaint, Provenance(..), joinProvenance
@@ -710,7 +714,7 @@ runFunctionBlocks cfg (entryIdx:rest) summaries = go initWorklist initIn IM.empt
propagate [] _ wl ins = (wl, ins)
propagate (s:ss) out wl ins =
- let oldIn = IM.findWithDefault initTaintState s ins
+ let oldIn = IM.findWithDefault emptyTaintState s ins
newIn = joinTaintState oldIn out
ins' = IM.insert s newIn ins
wl' = if oldIn /= newIn then IS.insert s wl else wl
@@ -737,38 +741,37 @@ transferWithSummary instr st summaries = case instr of
-- | Run inter-procedural fixpoint analysis.
-- Returns summaries for all functions.
+-- Precomputes caches for function blocks and callers.
runInterProc :: CFG -> Map Text FuncSummary
runInterProc cfg = go initSummaries (Set.fromList funcs)
where
funcs = functionLabels cfg
initSummaries = Map.fromList [(f, initSummary) | f <- funcs]
+ -- Precompute caches once
+ funcBlocksCache = buildFunctionBlocksMap cfg
+ callerMapCache = buildCallerMap cfg funcBlocksCache
+
+ lookupBlocks f = Map.findWithDefault [] f funcBlocksCache
+ lookupCallers f = Map.findWithDefault [] f callerMapCache
+
go summaries worklist
| Set.null worklist = summaries
| otherwise =
let (func, worklist') = Set.deleteFindMin worklist
- blockIdxs = functionBlocks cfg func
+ blockIdxs = lookupBlocks func
outState = runFunctionDataflow cfg blockIdxs summaries
newSumm = FuncSummary outState
oldSumm = Map.findWithDefault initSummary func summaries
changed = newSumm /= oldSumm
summaries' = Map.insert func newSumm summaries
-- If changed, re-analyze callers
- callers = findCallers cfg func
+ callers = lookupCallers func
worklist'' = if changed
then foldr Set.insert worklist' callers
else worklist'
in go summaries' worklist''
--- | Find functions that call the given target.
-findCallers :: CFG -> Text -> [Text]
-findCallers cfg target =
- [ caller | caller <- functionLabels cfg
- , target `elem` callees caller ]
- where
- callGraph = buildCallGraph cfg
- callees f = Map.findWithDefault [] f callGraph
-
-- ---------------------------------------------------------------------
-- Config-aware dataflow analysis
@@ -829,24 +832,32 @@ runDataflowWithConfig tcfg cfg
in propagate ss out wl' ins'
-- | Run inter-procedural analysis with taint config.
+-- Precomputes caches for function blocks and callers.
runInterProcWithConfig :: TaintConfig -> CFG -> Map Text FuncSummary
runInterProcWithConfig tcfg cfg = go initSummaries (Set.fromList funcs)
where
funcs = functionLabels cfg
initSummaries = Map.fromList [(f, initSummary) | f <- funcs]
+ -- Precompute caches once
+ funcBlocksCache = buildFunctionBlocksMap cfg
+ callerMapCache = buildCallerMap cfg funcBlocksCache
+
+ lookupBlocks f = Map.findWithDefault [] f funcBlocksCache
+ lookupCallers f = Map.findWithDefault [] f callerMapCache
+
go summaries worklist
| Set.null worklist = summaries
| otherwise =
let (func, worklist') = Set.deleteFindMin worklist
- blockIdxs = functionBlocks cfg func
+ blockIdxs = lookupBlocks func
outState = runFunctionDataflowWithConfig tcfg cfg func
blockIdxs summaries
newSumm = FuncSummary outState
oldSumm = Map.findWithDefault initSummary func summaries
changed = newSumm /= oldSumm
summaries' = Map.insert func newSumm summaries
- callers = findCallers cfg func
+ callers = lookupCallers func
worklist'' = if changed
then foldr Set.insert worklist' callers
else worklist'
@@ -905,7 +916,7 @@ runFunctionBlocksWithEntry cfg (entryIdx:rest) summaries entryState =
propagate [] _ wl ins = (wl, ins)
propagate (s:ss) out wl ins =
- let oldIn = IM.findWithDefault entryState s ins
+ let oldIn = IM.findWithDefault emptyTaintState s ins
newIn = joinTaintState oldIn out
ins' = IM.insert s newIn ins
wl' = if oldIn /= newIn then IS.insert s wl else wl