auditor

An aarch64 constant-time memory access auditing tool.
git clone git://git.ppad.tech/auditor.git
Log | Files | Refs | README | LICENSE

GHC.hs (12904B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE OverloadedStrings #-}
      3 
      4 -- |
      5 -- Module: Audit.AArch64.Runtime.GHC
      6 -- Copyright: (c) 2025 Jared Tobin
      7 -- License: MIT
      8 -- Maintainer: jared@ppad.tech
      9 --
     10 -- GHC runtime configuration for the AArch64 auditor.
     11 --
     12 -- Provides 'ghcRuntime' which reproduces all current
     13 -- GHC-specific behaviour, and 'genericRuntime' as a
     14 -- baseline for C/Rust/Go runtimes.
     15 
     16 module Audit.AArch64.Runtime.GHC (
     17     -- * Runtime configurations
     18     ghcRuntime
     19   , genericRuntime
     20     -- * GHC-specific utilities
     21   , ghcIsLocalLabel
     22   , isGhcRuntimeFinding
     23   , zEncodeSymbol
     24   , zEncodePart
     25   ) where
     26 
     27 import Audit.AArch64.Runtime
     28   (RuntimeConfig(..), SecondaryStack(..))
     29 import Audit.AArch64.Types
     30 import qualified Data.IntMap.Strict as IntMap
     31 import Data.Text (Text)
     32 import qualified Data.Text as T
     33 
     34 -- | GHC runtime configuration.
     35 --
     36 -- Reproduces all current GHC/STG-specific behaviour:
     37 --
     38 -- * Public roots include GHC's STG registers
     39 -- * Secondary stack via X20 (STG Sp)
     40 -- * NCG local label filtering
     41 -- * Pointer untagging masks
     42 -- * GHC runtime NCT pattern filtering
     43 -- * Z-encoding for symbol lookup
     44 ghcRuntime :: RuntimeConfig
     45 ghcRuntime = RuntimeConfig
     46   { rtPublicRoots = ghcPublicRoots
     47   , rtSecondaryStack =
     48       Just (SecondaryStack X20 True)
     49   , rtIsLocalLabel = ghcIsLocalLabel
     50   , rtUntagMasks =
     51       [ 0xfffffffffffffff8  -- 64-bit mask
     52       , 0xfffffff8          -- 32-bit mask
     53       , -8                  -- signed representation
     54       ]
     55   , rtFilterNct = isGhcRuntimeFinding
     56   , rtEncodeSymbol = Just zEncodeSymbol
     57   }
     58 
     59 -- | Generic runtime configuration.
     60 --
     61 -- Baseline with no secondary stack, no untagging, no NCT
     62 -- filtering. Useful for C/Rust/Go as a starting point.
     63 genericRuntime :: RuntimeConfig
     64 genericRuntime = RuntimeConfig
     65   { rtPublicRoots =
     66       [ SP, X29   -- Hardware stack/frame pointers
     67       , XZR, WZR  -- Zero registers
     68       ]
     69   , rtSecondaryStack = Nothing
     70   , rtIsLocalLabel = const False
     71   , rtUntagMasks = []
     72   , rtFilterNct = \_ _ -> False
     73   , rtEncodeSymbol = Nothing
     74   }
     75 
     76 -- | GHC 9.10.3 AArch64 public root registers.
     77 ghcPublicRoots :: [Reg]
     78 ghcPublicRoots =
     79   [ SP, X29   -- Hardware stack/frame pointers
     80   , X19       -- GHC BaseReg (capabilities/TSO pointer)
     81   , X20       -- GHC Sp (STG stack pointer)
     82   , X21       -- GHC Hp (heap pointer)
     83   , X22       -- GHC HpLim (heap limit)
     84   , X28       -- GHC SpLim (stack limit)
     85   , X18       -- TLS (Darwin platform register)
     86   , XZR, WZR  -- Zero registers
     87   ]
     88 
     89 -- | Check if a label is a GHC NCG-specific local label.
     90 --
     91 -- NCG local labels start with Lc, Ls, Lu, or _L followed
     92 -- by a lowercase letter.
     93 ghcIsLocalLabel :: Text -> Bool
     94 ghcIsLocalLabel lbl
     95   | T.isPrefixOf "Lc" lbl   = True
     96   | T.isPrefixOf "Ls" lbl   = True
     97   | T.isPrefixOf "Lu" lbl   = True
     98   | isNCGInternal lbl        = True
     99   | otherwise                = False
    100 
    101 -- | Check if a label is an NCG-internal label.
    102 -- These start with _L followed by a lowercase letter.
    103 isNCGInternal :: Text -> Bool
    104 isNCGInternal lbl = case T.unpack lbl of
    105   '_':'L':c:_ -> c >= 'a' && c <= 'z'
    106   _           -> False
    107 
    108 -- | Check if a finding is a GHC runtime pattern.
    109 --
    110 -- Recognizes heap checks, tag checks, CAF checks,
    111 -- closure entry, dictionary calls, RTS calls, and
    112 -- closure table lookups.
    113 isGhcRuntimeFinding :: LineMap -> NctFinding -> Bool
    114 isGhcRuntimeFinding lineMap f = case nctReason f of
    115   CondBranch     -> isHeapCheck || isNurseryCheck
    116                  || isTagCheck || isCafCheck
    117                  || isArityCheck
    118   IndirectBranch -> isClosureEntry || isDictCall
    119                  || isRtsCall
    120   RegIndexAddr   -> isClosureTableLookup
    121   _              -> False
    122   where
    123     -- Heap check: cmp <r>, x28
    124     isHeapCheck :: Bool
    125     isHeapCheck = case prevInstr of
    126       Just (Cmp _ (OpReg X28)) -> True
    127       _                        -> False
    128 
    129     -- Nursery check: cmp + recent ldr from [x19, #offset]
    130     isNurseryCheck :: Bool
    131     isNurseryCheck = case prevInstr of
    132       Just (Cmp _ (OpReg _)) ->
    133         any isBaseRegLoad recentInstrs5
    134       _ -> False
    135 
    136     isBaseRegLoad :: Instr -> Bool
    137     isBaseRegLoad instr = case instr of
    138       Ldr _ (BaseImm X19 _)  -> True
    139       Ldur _ (BaseImm X19 _) -> True
    140       _                      -> False
    141 
    142     -- Tag check patterns
    143     isTagCheck :: Bool
    144     isTagCheck = case prevInstr of
    145       Just (Tst _ (OpImm 7)) -> True
    146       Just (Cmp _ (OpImm _)) ->
    147         any isTagMask recentInstrs5
    148       Just (Cmp r1 (OpReg r2)) ->
    149         any (isTagMaskFor r1) recentInstrs5
    150         || any (isTagMaskFor r2) recentInstrs5
    151       _ -> case nctInstr f of
    152         Cbnz reg _ ->
    153           any (isTagMaskFor reg) recentInstrs5
    154         Cbz reg _ ->
    155           any (isTagMaskFor reg) recentInstrs5
    156         _ -> False
    157 
    158     isTagMask :: Instr -> Bool
    159     isTagMask instr = case instr of
    160       And _ _ (OpImm 7) -> True
    161       _                 -> False
    162 
    163     isTagMaskFor :: Reg -> Instr -> Bool
    164     isTagMaskFor reg instr = case instr of
    165       And r _ (OpImm 7) -> r == reg
    166       _                 -> False
    167 
    168     -- Arity/closure-type check
    169     isArityCheck :: Bool
    170     isArityCheck = case prevInstr of
    171       Just (Cmp _ (OpImm _)) ->
    172         any isInfoTableLoad recentInstrs5
    173       Just (Cmp r1 (OpReg r2)) ->
    174         any (isInfoTableLoadFor r1) recentInstrs5
    175         || any (isInfoTableLoadFor r2) recentInstrs5
    176       _ -> False
    177 
    178     isInfoTableLoad :: Instr -> Bool
    179     isInfoTableLoad instr = case instr of
    180       Ldur _ (BaseImm _ off) -> off < 0
    181       Ldr _ (BaseImm _ off)  -> off < 0
    182       _                      -> False
    183 
    184     isInfoTableLoadFor :: Reg -> Instr -> Bool
    185     isInfoTableLoadFor reg instr = case instr of
    186       Ldur r (BaseImm _ off) ->
    187         samePhysicalReg r reg && off < 0
    188       Ldr r (BaseImm _ off) ->
    189         samePhysicalReg r reg && off < 0
    190       _ -> False
    191 
    192     -- CAF check: cbz/cbnz x0 after bl _newCAF
    193     isCafCheck :: Bool
    194     isCafCheck = case nctInstr f of
    195       Cbz X0 _  -> any isCafCall recentInstrs5
    196       Cbnz X0 _ -> any isCafCall recentInstrs5
    197       Cbz reg _ -> any isCafCall recentInstrs5
    198         && any (isCafResultMove reg) recentInstrs5
    199       Cbnz reg _ -> any isCafCall recentInstrs5
    200         && any (isCafResultMove reg) recentInstrs5
    201       _ -> False
    202 
    203     isCafCall :: Instr -> Bool
    204     isCafCall instr = case instr of
    205       Bl label -> "_newCAF" `T.isSuffixOf` label
    206       _        -> False
    207 
    208     isCafResultMove :: Reg -> Instr -> Bool
    209     isCafResultMove reg instr = case instr of
    210       Mov r (OpReg X0) -> r == reg
    211       _                -> False
    212 
    213     -- Closure entry: ldr/ldur to reg, then blr/br reg
    214     isClosureEntry :: Bool
    215     isClosureEntry = case nctInstr f of
    216       Blr reg -> any (isRegLoad reg) bbInstrs
    217       Br reg  -> any (isRegLoad reg) bbInstrs
    218       _       -> False
    219 
    220     -- Dictionary/vtable call
    221     isDictCall :: Bool
    222     isDictCall = case nctInstr f of
    223       Blr reg -> any (isDictLoad reg) bbInstrs
    224       Br reg  -> any (isDictLoad reg) bbInstrs
    225       _       -> False
    226 
    227     isRegLoad :: Reg -> Instr -> Bool
    228     isRegLoad reg instr = case instr of
    229       Ldr r (BaseImm _ _)      -> r == reg
    230       Ldr r (PreIndex _ _)     -> r == reg
    231       Ldr r (PostIndex _ _)    -> r == reg
    232       Ldur r (BaseImm _ _)     -> r == reg
    233       Ldp r1 r2 (BaseImm _ _) ->
    234         r1 == reg || r2 == reg
    235       _ -> False
    236 
    237     isDictLoad :: Reg -> Instr -> Bool
    238     isDictLoad reg instr = case instr of
    239       Ldr r (BaseImm _ off)  ->
    240         r == reg && off > 0
    241       Ldr r (PreIndex _ off) ->
    242         r == reg && off > 0
    243       Ldr r (PostIndex _ off) ->
    244         r == reg && off > 0
    245       _ -> False
    246 
    247     -- RTS call: adrp <reg>, _stg_* or STG stack load
    248     isRtsCall :: Bool
    249     isRtsCall = case nctInstr f of
    250       Blr reg -> any (isRtsSymbolLoad reg) bbInstrs
    251         || any (isStgStackLoad reg) bbInstrs
    252       Br reg -> any (isRtsSymbolLoad reg) bbInstrs
    253         || any (isStgStackLoad reg) bbInstrs
    254       _ -> False
    255 
    256     isRtsSymbolLoad :: Reg -> Instr -> Bool
    257     isRtsSymbolLoad reg instr = case instr of
    258       Adrp r label ->
    259         r == reg && "_stg_" `T.isInfixOf` label
    260       _ -> False
    261 
    262     isStgStackLoad :: Reg -> Instr -> Bool
    263     isStgStackLoad reg instr = case instr of
    264       Ldr r (BaseImm X20 _)  -> r == reg
    265       Ldr r (BaseImm X19 _)  -> r == reg
    266       Ldur r (BaseImm X20 _) -> r == reg
    267       Ldur r (BaseImm X19 _) -> r == reg
    268       _                      -> False
    269 
    270     -- Closure table lookup
    271     isClosureTableLookup :: Bool
    272     isClosureTableLookup = case getBaseReg (nctInstr f) of
    273       Nothing  -> False
    274       Just reg ->
    275         any (isClosureTableLoad reg) bbInstrs
    276 
    277     getBaseReg :: Instr -> Maybe Reg
    278     getBaseReg instr = case instr of
    279       Ldr _ addr  -> baseOfRegIndex addr
    280       Ldrb _ addr -> baseOfRegIndex addr
    281       Ldrh _ addr -> baseOfRegIndex addr
    282       Str _ addr  -> baseOfRegIndex addr
    283       Strb _ addr -> baseOfRegIndex addr
    284       Strh _ addr -> baseOfRegIndex addr
    285       _           -> Nothing
    286 
    287     baseOfRegIndex :: AddrMode -> Maybe Reg
    288     baseOfRegIndex addr = case addr of
    289       BaseReg base _        -> Just base
    290       BaseRegShift base _ _ -> Just base
    291       BaseRegExtend base _ _ -> Just base
    292       _                     -> Nothing
    293 
    294     isClosureTableLoad :: Reg -> Instr -> Bool
    295     isClosureTableLoad reg instr = case instr of
    296       Adrp r label ->
    297         samePhysicalReg r reg
    298         && "_closure_tbl" `T.isInfixOf` label
    299       _ -> False
    300 
    301     -- Helpers for line context
    302     prevInstr :: Maybe Instr
    303     prevInstr = do
    304       prevLine <- IntMap.lookup
    305         (nctLine f - 1) lineMap
    306       lineInstr prevLine
    307 
    308     recentInstrs5 :: [Instr]
    309     recentInstrs5 = recentInstrs 5
    310 
    311     recentInstrs :: Int -> [Instr]
    312     recentInstrs n =
    313       [ instr
    314       | offset <- [1..n]
    315       , Just ln <- [IntMap.lookup
    316           (nctLine f - offset) lineMap]
    317       , Just instr <- [lineInstr ln]
    318       ]
    319 
    320     -- All instructions in current basic block
    321     bbInstrs :: [Instr]
    322     bbInstrs = go (nctLine f - 1)
    323       where
    324         go lnum
    325           | lnum <= 0 = []
    326           | otherwise =
    327               case IntMap.lookup lnum lineMap of
    328                 Nothing -> []
    329                 Just ln -> case lineLabel ln of
    330                   Just lbl | isBBLabel lbl -> []
    331                   _ -> case lineInstr ln of
    332                     Just instr ->
    333                       instr : go (lnum - 1)
    334                     Nothing -> go (lnum - 1)
    335 
    336     isBBLabel :: Text -> Bool
    337     isBBLabel lbl = "LBB" `T.isPrefixOf` lbl
    338 
    339 -- | Check if two registers refer to the same physical
    340 -- register. W and X variants of the same number are the
    341 -- same physical register.
    342 samePhysicalReg :: Reg -> Reg -> Bool
    343 samePhysicalReg r1 r2 =
    344   r1 == r2 || physNum r1 == physNum r2
    345   where
    346     physNum :: Reg -> Maybe Int
    347     physNum r = case r of
    348       X0  -> Just 0;  W0  -> Just 0
    349       X1  -> Just 1;  W1  -> Just 1
    350       X2  -> Just 2;  W2  -> Just 2
    351       X3  -> Just 3;  W3  -> Just 3
    352       X4  -> Just 4;  W4  -> Just 4
    353       X5  -> Just 5;  W5  -> Just 5
    354       X6  -> Just 6;  W6  -> Just 6
    355       X7  -> Just 7;  W7  -> Just 7
    356       X8  -> Just 8;  W8  -> Just 8
    357       X9  -> Just 9;  W9  -> Just 9
    358       X10 -> Just 10; W10 -> Just 10
    359       X11 -> Just 11; W11 -> Just 11
    360       X12 -> Just 12; W12 -> Just 12
    361       X13 -> Just 13; W13 -> Just 13
    362       X14 -> Just 14; W14 -> Just 14
    363       X15 -> Just 15; W15 -> Just 15
    364       X16 -> Just 16; W16 -> Just 16
    365       X17 -> Just 17; W17 -> Just 17
    366       X18 -> Just 18; W18 -> Just 18
    367       X19 -> Just 19; W19 -> Just 19
    368       X20 -> Just 20; W20 -> Just 20
    369       X21 -> Just 21; W21 -> Just 21
    370       X22 -> Just 22; W22 -> Just 22
    371       X23 -> Just 23; W23 -> Just 23
    372       X24 -> Just 24; W24 -> Just 24
    373       X25 -> Just 25; W25 -> Just 25
    374       X26 -> Just 26; W26 -> Just 26
    375       X27 -> Just 27; W27 -> Just 27
    376       X28 -> Just 28; W28 -> Just 28
    377       X29 -> Just 29; W29 -> Just 29
    378       X30 -> Just 30; W30 -> Just 30
    379       _   -> Nothing
    380 
    381 -- | Z-encode a human-readable Haskell symbol for GHC
    382 -- assembly lookup.
    383 --
    384 -- Input format: @\<package\>:\<Module.Path\>:\<identifier\>@
    385 --
    386 -- Output: @_\<z-pkg\>_\<z-mod\>_\<z-id\>_info$def@
    387 zEncodeSymbol :: Text -> Either Text Text
    388 zEncodeSymbol input =
    389   case T.splitOn ":" input of
    390     [pkg, modPath, ident] ->
    391       let encoded = T.intercalate "_"
    392             [ zEncodePart pkg
    393             , zEncodePart modPath
    394             , zEncodePart ident
    395             ]
    396       in  Right ("_" <> encoded <> "_info$def")
    397     parts ->
    398       Left $ "Invalid symbol format: expected "
    399         <> "<package>:<Module.Path>:<id>, got "
    400         <> T.pack (show (length parts))
    401         <> " parts"
    402 
    403 -- | Z-encode a single component.
    404 zEncodePart :: Text -> Text
    405 zEncodePart = T.concatMap encodeChar
    406   where
    407     encodeChar c = case c of
    408       '-'  -> "zm"
    409       '.'  -> "zi"
    410       '_'  -> "zu"
    411       'z'  -> "zz"
    412       'Z'  -> "ZZ"
    413       '$'  -> "zd"
    414       '\'' -> "zq"
    415       '#'  -> "zh"
    416       _    -> T.singleton c