GHC.hs (12904B)
1 {-# OPTIONS_HADDOCK prune #-} 2 {-# LANGUAGE OverloadedStrings #-} 3 4 -- | 5 -- Module: Audit.AArch64.Runtime.GHC 6 -- Copyright: (c) 2025 Jared Tobin 7 -- License: MIT 8 -- Maintainer: jared@ppad.tech 9 -- 10 -- GHC runtime configuration for the AArch64 auditor. 11 -- 12 -- Provides 'ghcRuntime' which reproduces all current 13 -- GHC-specific behaviour, and 'genericRuntime' as a 14 -- baseline for C/Rust/Go runtimes. 15 16 module Audit.AArch64.Runtime.GHC ( 17 -- * Runtime configurations 18 ghcRuntime 19 , genericRuntime 20 -- * GHC-specific utilities 21 , ghcIsLocalLabel 22 , isGhcRuntimeFinding 23 , zEncodeSymbol 24 , zEncodePart 25 ) where 26 27 import Audit.AArch64.Runtime 28 (RuntimeConfig(..), SecondaryStack(..)) 29 import Audit.AArch64.Types 30 import qualified Data.IntMap.Strict as IntMap 31 import Data.Text (Text) 32 import qualified Data.Text as T 33 34 -- | GHC runtime configuration. 35 -- 36 -- Reproduces all current GHC/STG-specific behaviour: 37 -- 38 -- * Public roots include GHC's STG registers 39 -- * Secondary stack via X20 (STG Sp) 40 -- * NCG local label filtering 41 -- * Pointer untagging masks 42 -- * GHC runtime NCT pattern filtering 43 -- * Z-encoding for symbol lookup 44 ghcRuntime :: RuntimeConfig 45 ghcRuntime = RuntimeConfig 46 { rtPublicRoots = ghcPublicRoots 47 , rtSecondaryStack = 48 Just (SecondaryStack X20 True) 49 , rtIsLocalLabel = ghcIsLocalLabel 50 , rtUntagMasks = 51 [ 0xfffffffffffffff8 -- 64-bit mask 52 , 0xfffffff8 -- 32-bit mask 53 , -8 -- signed representation 54 ] 55 , rtFilterNct = isGhcRuntimeFinding 56 , rtEncodeSymbol = Just zEncodeSymbol 57 } 58 59 -- | Generic runtime configuration. 60 -- 61 -- Baseline with no secondary stack, no untagging, no NCT 62 -- filtering. Useful for C/Rust/Go as a starting point. 63 genericRuntime :: RuntimeConfig 64 genericRuntime = RuntimeConfig 65 { rtPublicRoots = 66 [ SP, X29 -- Hardware stack/frame pointers 67 , XZR, WZR -- Zero registers 68 ] 69 , rtSecondaryStack = Nothing 70 , rtIsLocalLabel = const False 71 , rtUntagMasks = [] 72 , rtFilterNct = \_ _ -> False 73 , rtEncodeSymbol = Nothing 74 } 75 76 -- | GHC 9.10.3 AArch64 public root registers. 77 ghcPublicRoots :: [Reg] 78 ghcPublicRoots = 79 [ SP, X29 -- Hardware stack/frame pointers 80 , X19 -- GHC BaseReg (capabilities/TSO pointer) 81 , X20 -- GHC Sp (STG stack pointer) 82 , X21 -- GHC Hp (heap pointer) 83 , X22 -- GHC HpLim (heap limit) 84 , X28 -- GHC SpLim (stack limit) 85 , X18 -- TLS (Darwin platform register) 86 , XZR, WZR -- Zero registers 87 ] 88 89 -- | Check if a label is a GHC NCG-specific local label. 90 -- 91 -- NCG local labels start with Lc, Ls, Lu, or _L followed 92 -- by a lowercase letter. 93 ghcIsLocalLabel :: Text -> Bool 94 ghcIsLocalLabel lbl 95 | T.isPrefixOf "Lc" lbl = True 96 | T.isPrefixOf "Ls" lbl = True 97 | T.isPrefixOf "Lu" lbl = True 98 | isNCGInternal lbl = True 99 | otherwise = False 100 101 -- | Check if a label is an NCG-internal label. 102 -- These start with _L followed by a lowercase letter. 103 isNCGInternal :: Text -> Bool 104 isNCGInternal lbl = case T.unpack lbl of 105 '_':'L':c:_ -> c >= 'a' && c <= 'z' 106 _ -> False 107 108 -- | Check if a finding is a GHC runtime pattern. 109 -- 110 -- Recognizes heap checks, tag checks, CAF checks, 111 -- closure entry, dictionary calls, RTS calls, and 112 -- closure table lookups. 113 isGhcRuntimeFinding :: LineMap -> NctFinding -> Bool 114 isGhcRuntimeFinding lineMap f = case nctReason f of 115 CondBranch -> isHeapCheck || isNurseryCheck 116 || isTagCheck || isCafCheck 117 || isArityCheck 118 IndirectBranch -> isClosureEntry || isDictCall 119 || isRtsCall 120 RegIndexAddr -> isClosureTableLookup 121 _ -> False 122 where 123 -- Heap check: cmp <r>, x28 124 isHeapCheck :: Bool 125 isHeapCheck = case prevInstr of 126 Just (Cmp _ (OpReg X28)) -> True 127 _ -> False 128 129 -- Nursery check: cmp + recent ldr from [x19, #offset] 130 isNurseryCheck :: Bool 131 isNurseryCheck = case prevInstr of 132 Just (Cmp _ (OpReg _)) -> 133 any isBaseRegLoad recentInstrs5 134 _ -> False 135 136 isBaseRegLoad :: Instr -> Bool 137 isBaseRegLoad instr = case instr of 138 Ldr _ (BaseImm X19 _) -> True 139 Ldur _ (BaseImm X19 _) -> True 140 _ -> False 141 142 -- Tag check patterns 143 isTagCheck :: Bool 144 isTagCheck = case prevInstr of 145 Just (Tst _ (OpImm 7)) -> True 146 Just (Cmp _ (OpImm _)) -> 147 any isTagMask recentInstrs5 148 Just (Cmp r1 (OpReg r2)) -> 149 any (isTagMaskFor r1) recentInstrs5 150 || any (isTagMaskFor r2) recentInstrs5 151 _ -> case nctInstr f of 152 Cbnz reg _ -> 153 any (isTagMaskFor reg) recentInstrs5 154 Cbz reg _ -> 155 any (isTagMaskFor reg) recentInstrs5 156 _ -> False 157 158 isTagMask :: Instr -> Bool 159 isTagMask instr = case instr of 160 And _ _ (OpImm 7) -> True 161 _ -> False 162 163 isTagMaskFor :: Reg -> Instr -> Bool 164 isTagMaskFor reg instr = case instr of 165 And r _ (OpImm 7) -> r == reg 166 _ -> False 167 168 -- Arity/closure-type check 169 isArityCheck :: Bool 170 isArityCheck = case prevInstr of 171 Just (Cmp _ (OpImm _)) -> 172 any isInfoTableLoad recentInstrs5 173 Just (Cmp r1 (OpReg r2)) -> 174 any (isInfoTableLoadFor r1) recentInstrs5 175 || any (isInfoTableLoadFor r2) recentInstrs5 176 _ -> False 177 178 isInfoTableLoad :: Instr -> Bool 179 isInfoTableLoad instr = case instr of 180 Ldur _ (BaseImm _ off) -> off < 0 181 Ldr _ (BaseImm _ off) -> off < 0 182 _ -> False 183 184 isInfoTableLoadFor :: Reg -> Instr -> Bool 185 isInfoTableLoadFor reg instr = case instr of 186 Ldur r (BaseImm _ off) -> 187 samePhysicalReg r reg && off < 0 188 Ldr r (BaseImm _ off) -> 189 samePhysicalReg r reg && off < 0 190 _ -> False 191 192 -- CAF check: cbz/cbnz x0 after bl _newCAF 193 isCafCheck :: Bool 194 isCafCheck = case nctInstr f of 195 Cbz X0 _ -> any isCafCall recentInstrs5 196 Cbnz X0 _ -> any isCafCall recentInstrs5 197 Cbz reg _ -> any isCafCall recentInstrs5 198 && any (isCafResultMove reg) recentInstrs5 199 Cbnz reg _ -> any isCafCall recentInstrs5 200 && any (isCafResultMove reg) recentInstrs5 201 _ -> False 202 203 isCafCall :: Instr -> Bool 204 isCafCall instr = case instr of 205 Bl label -> "_newCAF" `T.isSuffixOf` label 206 _ -> False 207 208 isCafResultMove :: Reg -> Instr -> Bool 209 isCafResultMove reg instr = case instr of 210 Mov r (OpReg X0) -> r == reg 211 _ -> False 212 213 -- Closure entry: ldr/ldur to reg, then blr/br reg 214 isClosureEntry :: Bool 215 isClosureEntry = case nctInstr f of 216 Blr reg -> any (isRegLoad reg) bbInstrs 217 Br reg -> any (isRegLoad reg) bbInstrs 218 _ -> False 219 220 -- Dictionary/vtable call 221 isDictCall :: Bool 222 isDictCall = case nctInstr f of 223 Blr reg -> any (isDictLoad reg) bbInstrs 224 Br reg -> any (isDictLoad reg) bbInstrs 225 _ -> False 226 227 isRegLoad :: Reg -> Instr -> Bool 228 isRegLoad reg instr = case instr of 229 Ldr r (BaseImm _ _) -> r == reg 230 Ldr r (PreIndex _ _) -> r == reg 231 Ldr r (PostIndex _ _) -> r == reg 232 Ldur r (BaseImm _ _) -> r == reg 233 Ldp r1 r2 (BaseImm _ _) -> 234 r1 == reg || r2 == reg 235 _ -> False 236 237 isDictLoad :: Reg -> Instr -> Bool 238 isDictLoad reg instr = case instr of 239 Ldr r (BaseImm _ off) -> 240 r == reg && off > 0 241 Ldr r (PreIndex _ off) -> 242 r == reg && off > 0 243 Ldr r (PostIndex _ off) -> 244 r == reg && off > 0 245 _ -> False 246 247 -- RTS call: adrp <reg>, _stg_* or STG stack load 248 isRtsCall :: Bool 249 isRtsCall = case nctInstr f of 250 Blr reg -> any (isRtsSymbolLoad reg) bbInstrs 251 || any (isStgStackLoad reg) bbInstrs 252 Br reg -> any (isRtsSymbolLoad reg) bbInstrs 253 || any (isStgStackLoad reg) bbInstrs 254 _ -> False 255 256 isRtsSymbolLoad :: Reg -> Instr -> Bool 257 isRtsSymbolLoad reg instr = case instr of 258 Adrp r label -> 259 r == reg && "_stg_" `T.isInfixOf` label 260 _ -> False 261 262 isStgStackLoad :: Reg -> Instr -> Bool 263 isStgStackLoad reg instr = case instr of 264 Ldr r (BaseImm X20 _) -> r == reg 265 Ldr r (BaseImm X19 _) -> r == reg 266 Ldur r (BaseImm X20 _) -> r == reg 267 Ldur r (BaseImm X19 _) -> r == reg 268 _ -> False 269 270 -- Closure table lookup 271 isClosureTableLookup :: Bool 272 isClosureTableLookup = case getBaseReg (nctInstr f) of 273 Nothing -> False 274 Just reg -> 275 any (isClosureTableLoad reg) bbInstrs 276 277 getBaseReg :: Instr -> Maybe Reg 278 getBaseReg instr = case instr of 279 Ldr _ addr -> baseOfRegIndex addr 280 Ldrb _ addr -> baseOfRegIndex addr 281 Ldrh _ addr -> baseOfRegIndex addr 282 Str _ addr -> baseOfRegIndex addr 283 Strb _ addr -> baseOfRegIndex addr 284 Strh _ addr -> baseOfRegIndex addr 285 _ -> Nothing 286 287 baseOfRegIndex :: AddrMode -> Maybe Reg 288 baseOfRegIndex addr = case addr of 289 BaseReg base _ -> Just base 290 BaseRegShift base _ _ -> Just base 291 BaseRegExtend base _ _ -> Just base 292 _ -> Nothing 293 294 isClosureTableLoad :: Reg -> Instr -> Bool 295 isClosureTableLoad reg instr = case instr of 296 Adrp r label -> 297 samePhysicalReg r reg 298 && "_closure_tbl" `T.isInfixOf` label 299 _ -> False 300 301 -- Helpers for line context 302 prevInstr :: Maybe Instr 303 prevInstr = do 304 prevLine <- IntMap.lookup 305 (nctLine f - 1) lineMap 306 lineInstr prevLine 307 308 recentInstrs5 :: [Instr] 309 recentInstrs5 = recentInstrs 5 310 311 recentInstrs :: Int -> [Instr] 312 recentInstrs n = 313 [ instr 314 | offset <- [1..n] 315 , Just ln <- [IntMap.lookup 316 (nctLine f - offset) lineMap] 317 , Just instr <- [lineInstr ln] 318 ] 319 320 -- All instructions in current basic block 321 bbInstrs :: [Instr] 322 bbInstrs = go (nctLine f - 1) 323 where 324 go lnum 325 | lnum <= 0 = [] 326 | otherwise = 327 case IntMap.lookup lnum lineMap of 328 Nothing -> [] 329 Just ln -> case lineLabel ln of 330 Just lbl | isBBLabel lbl -> [] 331 _ -> case lineInstr ln of 332 Just instr -> 333 instr : go (lnum - 1) 334 Nothing -> go (lnum - 1) 335 336 isBBLabel :: Text -> Bool 337 isBBLabel lbl = "LBB" `T.isPrefixOf` lbl 338 339 -- | Check if two registers refer to the same physical 340 -- register. W and X variants of the same number are the 341 -- same physical register. 342 samePhysicalReg :: Reg -> Reg -> Bool 343 samePhysicalReg r1 r2 = 344 r1 == r2 || physNum r1 == physNum r2 345 where 346 physNum :: Reg -> Maybe Int 347 physNum r = case r of 348 X0 -> Just 0; W0 -> Just 0 349 X1 -> Just 1; W1 -> Just 1 350 X2 -> Just 2; W2 -> Just 2 351 X3 -> Just 3; W3 -> Just 3 352 X4 -> Just 4; W4 -> Just 4 353 X5 -> Just 5; W5 -> Just 5 354 X6 -> Just 6; W6 -> Just 6 355 X7 -> Just 7; W7 -> Just 7 356 X8 -> Just 8; W8 -> Just 8 357 X9 -> Just 9; W9 -> Just 9 358 X10 -> Just 10; W10 -> Just 10 359 X11 -> Just 11; W11 -> Just 11 360 X12 -> Just 12; W12 -> Just 12 361 X13 -> Just 13; W13 -> Just 13 362 X14 -> Just 14; W14 -> Just 14 363 X15 -> Just 15; W15 -> Just 15 364 X16 -> Just 16; W16 -> Just 16 365 X17 -> Just 17; W17 -> Just 17 366 X18 -> Just 18; W18 -> Just 18 367 X19 -> Just 19; W19 -> Just 19 368 X20 -> Just 20; W20 -> Just 20 369 X21 -> Just 21; W21 -> Just 21 370 X22 -> Just 22; W22 -> Just 22 371 X23 -> Just 23; W23 -> Just 23 372 X24 -> Just 24; W24 -> Just 24 373 X25 -> Just 25; W25 -> Just 25 374 X26 -> Just 26; W26 -> Just 26 375 X27 -> Just 27; W27 -> Just 27 376 X28 -> Just 28; W28 -> Just 28 377 X29 -> Just 29; W29 -> Just 29 378 X30 -> Just 30; W30 -> Just 30 379 _ -> Nothing 380 381 -- | Z-encode a human-readable Haskell symbol for GHC 382 -- assembly lookup. 383 -- 384 -- Input format: @\<package\>:\<Module.Path\>:\<identifier\>@ 385 -- 386 -- Output: @_\<z-pkg\>_\<z-mod\>_\<z-id\>_info$def@ 387 zEncodeSymbol :: Text -> Either Text Text 388 zEncodeSymbol input = 389 case T.splitOn ":" input of 390 [pkg, modPath, ident] -> 391 let encoded = T.intercalate "_" 392 [ zEncodePart pkg 393 , zEncodePart modPath 394 , zEncodePart ident 395 ] 396 in Right ("_" <> encoded <> "_info$def") 397 parts -> 398 Left $ "Invalid symbol format: expected " 399 <> "<package>:<Module.Path>:<id>, got " 400 <> T.pack (show (length parts)) 401 <> " parts" 402 403 -- | Z-encode a single component. 404 zEncodePart :: Text -> Text 405 zEncodePart = T.concatMap encodeChar 406 where 407 encodeChar c = case c of 408 '-' -> "zm" 409 '.' -> "zi" 410 '_' -> "zu" 411 'z' -> "zz" 412 'Z' -> "ZZ" 413 '$' -> "zd" 414 '\'' -> "zq" 415 '#' -> "zh" 416 _ -> T.singleton c