auditor

An aarch64 constant-time memory access auditing tool.
git clone git://git.ppad.tech/auditor.git
Log | Files | Refs | README | LICENSE

Parser.hs (19697B)


      1 {-# OPTIONS_HADDOCK prune #-}
      2 {-# LANGUAGE OverloadedStrings #-}
      3 
      4 -- |
      5 -- Module: Audit.AArch64.Parser
      6 -- Copyright: (c) 2025 Jared Tobin
      7 -- License: MIT
      8 -- Maintainer: jared@ppad.tech
      9 --
     10 -- Parser for AArch64 assembly.
     11 
     12 module Audit.AArch64.Parser (
     13     parseAsm
     14   , ParseError
     15   , showParseError
     16   ) where
     17 
     18 import Audit.AArch64.Types
     19 import Control.Monad (void)
     20 import Data.Char (isAlphaNum)
     21 import Data.Text (Text)
     22 import qualified Data.Text as T
     23 import Data.Void (Void)
     24 import Text.Megaparsec hiding (ParseError)
     25 import Text.Megaparsec.Char
     26 import qualified Text.Megaparsec.Char.Lexer as L
     27 
     28 type Parser = Parsec Void Text
     29 type ParseError = ParseErrorBundle Text Void
     30 
     31 -- | Parse assembly source into a list of lines.
     32 parseAsm :: Text -> Either ParseError [Line]
     33 parseAsm = parse (many pLine <* eof) "<asm>"
     34 
     35 -- | Format a parse error succinctly (no giant input dump).
     36 showParseError :: ParseError -> String
     37 showParseError = errorBundlePretty
     38 
     39 -- Space consumer (skips horizontal whitespace and comments, not newlines).
     40 sc :: Parser ()
     41 sc = L.space
     42   hspace1
     43   (L.skipLineComment "//")
     44   (L.skipBlockComment "/*" "*/")
     45 
     46 -- Lexeme wrapper.
     47 lexeme :: Parser a -> Parser a
     48 lexeme = L.lexeme sc
     49 
     50 -- Parse a single line.
     51 pLine :: Parser Line
     52 pLine = do
     53   pos <- getSourcePos
     54   let ln = unPos (sourceLine pos)
     55   hspace  -- Skip leading horizontal whitespace only
     56   choice
     57     [ pDirectiveLine ln
     58     , pLabelOrInstrLine ln
     59     , pEmptyLine ln
     60     ]
     61 
     62 pEmptyLine :: Int -> Parser Line
     63 pEmptyLine ln = do
     64   void (optional pComment)
     65   void eol  -- Must consume actual newline
     66   pure (Line ln Nothing Nothing)
     67 
     68 pComment :: Parser ()
     69 pComment = do
     70   void (choice [string "//", string ";", string "@"])
     71   void (takeWhileP Nothing (/= '\n'))
     72 
     73 pDirectiveLine :: Int -> Parser Line
     74 pDirectiveLine ln = do
     75   void (char '.')
     76   void (takeWhileP Nothing (\c -> c /= '\n'))
     77   void (optional eol)
     78   pure (Line ln Nothing Nothing)
     79 
     80 -- Combined parser for label or instruction lines.
     81 -- Parses the leading identifier once, then branches on ':'.
     82 pLabelOrInstrLine :: Int -> Parser Line
     83 pLabelOrInstrLine ln = do
     84   name <- pIdentifier
     85   mColon <- optional (char ':')
     86   case mColon of
     87     Just _ -> do
     88       -- Label line: name is the label
     89       hspace
     90       mInstr <- optional pInstr
     91       hspace
     92       void (optional pComment)
     93       void (optional eol)
     94       pure (Line ln (Just name) mInstr)
     95     Nothing -> do
     96       -- Instruction line: name must look like a valid mnemonic.
     97       -- Guard: mnemonics start with alphanumeric or '.' (not '_' etc.)
     98       case T.uncons name of
     99         Just (c, _) | isAlphaNum c || c == '.' -> do
    100           sc
    101           instr <- parseByMnemonic (T.toLower name)
    102           hspace
    103           void (optional pComment)
    104           void (optional eol)
    105           pure (Line ln Nothing (Just instr))
    106         _ -> fail "not a valid mnemonic"
    107 
    108 pIdentifier :: Parser Text
    109 pIdentifier = do
    110   c <- satisfy isIdentStart
    111   cs <- takeWhileP Nothing isIdentChar
    112   pure (T.cons c cs)
    113   where
    114     isIdentStart c = c == '_' || c == '.' || c == 'L' ||
    115                      (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
    116     -- GHC NCG uses many special chars in symbols (operators, types, etc.)
    117     -- Note: ':' excluded here (label terminator); use pSymbolIdent for refs
    118     isIdentChar c = isAlphaNum c || c `elem` ("_.$'#()[]+-*/<>=!|&^~" :: String)
    119 
    120 -- | Like pIdentifier but allows ':' for GHC operator symbols (e.g., _:_con_info)
    121 pSymbolIdent :: Parser Text
    122 pSymbolIdent = do
    123   c <- satisfy isIdentStart
    124   cs <- takeWhileP Nothing isIdentChar
    125   pure (T.cons c cs)
    126   where
    127     isIdentStart c = c == '_' || c == '.' || c == 'L' ||
    128                      (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
    129     -- Include ':' for GHC operator symbols like _:_con_info
    130     isIdentChar c = isAlphaNum c || c `elem` ("_.$'#()[]+-*/<>=!|&^~:" :: String)
    131 
    132 pInstr :: Parser Instr
    133 pInstr = do
    134   mnem <- lexeme pMnemonic
    135   parseByMnemonic mnem
    136 
    137 pMnemonic :: Parser Text
    138 pMnemonic = do
    139   cs <- takeWhile1P Nothing isIdentChar
    140   pure (T.toLower cs)
    141   where
    142     isIdentChar c = isAlphaNum c || c == '.'
    143 
    144 parseByMnemonic :: Text -> Parser Instr
    145 parseByMnemonic m = case m of
    146   -- Data movement
    147   "mov"  -> Mov <$> pReg <*> (pComma *> pOperand)
    148   "movz" -> Movz <$> pReg <*> (pComma *> pImm) <*> optional (pComma *> pShift)
    149   "movk" -> Movk <$> pReg <*> (pComma *> pImm) <*> optional (pComma *> pShift)
    150   "movn" -> Movn <$> pReg <*> (pComma *> pImm) <*> optional (pComma *> pShift)
    151 
    152   -- Arithmetic
    153   "add"  -> Add <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    154   "sub"  -> Sub <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    155   "adds" -> Adds <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    156   "subs" -> Subs <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    157   "adc"  -> Adc <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    158   "adcs" -> Adcs <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    159   "sbc"  -> Sbc <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    160   "neg"  -> Neg <$> pReg <*> (pComma *> pOperand)
    161   "negs" -> Negs <$> pReg <*> (pComma *> pOperand)
    162   "mul"  -> Mul <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    163   "mneg" -> Mneg <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    164   "madd" -> Madd <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    165                  <*> (pComma *> pReg)
    166   "msub" -> Msub <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    167                  <*> (pComma *> pReg)
    168   "umulh" -> Umulh <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    169   "smulh" -> Smulh <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    170   "udiv" -> Udiv <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    171   "sdiv" -> Sdiv <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    172 
    173   -- Logical
    174   "and"  -> And <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    175   "orr"  -> Orr <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    176   "eor"  -> Eor <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    177   "bic"  -> Bic <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    178   "mvn"  -> Mvn <$> pReg <*> (pComma *> pOperand)
    179   "tst"  -> Tst <$> pReg <*> (pComma *> pOperand)
    180 
    181   -- Shifts
    182   "lsl"  -> Lsl <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    183   "lsr"  -> Lsr <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    184   "asr"  -> Asr <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    185   "ror"  -> Ror <$> pReg <*> (pComma *> pReg) <*> (pComma *> pOperand)
    186 
    187   -- Bit manipulation
    188   "ubfx" -> Ubfx <$> pReg <*> (pComma *> pReg) <*> (pComma *> pInt)
    189                  <*> (pComma *> pInt)
    190   "sbfx" -> Sbfx <$> pReg <*> (pComma *> pReg) <*> (pComma *> pInt)
    191                  <*> (pComma *> pInt)
    192   "bfi"  -> Bfi <$> pReg <*> (pComma *> pReg) <*> (pComma *> pInt)
    193                 <*> (pComma *> pInt)
    194   "extr" -> Extr <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    195                  <*> (pComma *> pInt)
    196 
    197   -- Address generation
    198   "adr"  -> Adr <$> pReg <*> (pComma *> pSymbolRef)
    199   "adrp" -> Adrp <$> pReg <*> (pComma *> pSymbolRef)
    200 
    201   -- Load/Store
    202   "ldr"  -> Ldr <$> pReg <*> (pComma *> pAddrMode)
    203   "ldrb" -> Ldrb <$> pReg <*> (pComma *> pAddrMode)
    204   "ldrh" -> Ldrh <$> pReg <*> (pComma *> pAddrMode)
    205   "ldrsb" -> Ldrsb <$> pReg <*> (pComma *> pAddrMode)
    206   "ldrsh" -> Ldrsh <$> pReg <*> (pComma *> pAddrMode)
    207   "ldrsw" -> Ldrsw <$> pReg <*> (pComma *> pAddrMode)
    208   "ldur" -> Ldur <$> pReg <*> (pComma *> pAddrMode)
    209   "str"  -> Str <$> pReg <*> (pComma *> pAddrMode)
    210   "strb" -> Strb <$> pReg <*> (pComma *> pAddrMode)
    211   "strh" -> Strh <$> pReg <*> (pComma *> pAddrMode)
    212   "stur" -> Stur <$> pReg <*> (pComma *> pAddrMode)
    213   "ldp"  -> Ldp <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    214   "stp"  -> Stp <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    215 
    216   -- Acquire/release loads and stores
    217   "ldar"  -> Ldar <$> pReg <*> (pComma *> pAddrMode)
    218   "ldarb" -> Ldarb <$> pReg <*> (pComma *> pAddrMode)
    219   "ldarh" -> Ldarh <$> pReg <*> (pComma *> pAddrMode)
    220   "stlr"  -> Stlr <$> pReg <*> (pComma *> pAddrMode)
    221   "stlrb" -> Stlrb <$> pReg <*> (pComma *> pAddrMode)
    222   "stlrh" -> Stlrh <$> pReg <*> (pComma *> pAddrMode)
    223 
    224   -- Exclusive loads and stores
    225   "ldxr"  -> Ldxr <$> pReg <*> (pComma *> pAddrMode)
    226   "ldxrb" -> Ldxrb <$> pReg <*> (pComma *> pAddrMode)
    227   "ldxrh" -> Ldxrh <$> pReg <*> (pComma *> pAddrMode)
    228   "stxr"  -> Stxr <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    229   "stxrb" -> Stxrb <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    230   "stxrh" -> Stxrh <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    231 
    232   -- Acquire-exclusive loads and release-exclusive stores
    233   "ldaxr"  -> Ldaxr <$> pReg <*> (pComma *> pAddrMode)
    234   "ldaxrb" -> Ldaxrb <$> pReg <*> (pComma *> pAddrMode)
    235   "ldaxrh" -> Ldaxrh <$> pReg <*> (pComma *> pAddrMode)
    236   "stlxr"  -> Stlxr <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    237   "stlxrb" -> Stlxrb <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    238   "stlxrh" -> Stlxrh <$> pReg <*> (pComma *> pReg) <*> (pComma *> pAddrMode)
    239 
    240   -- Compare and select
    241   "cmp"  -> Cmp <$> pReg <*> (pComma *> pOperand)
    242   "cmn"  -> Cmn <$> pReg <*> (pComma *> pOperand)
    243   "csel" -> Csel <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    244                  <*> (pComma *> pCondCode)
    245   "csinc" -> Csinc <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    246                    <*> (pComma *> pCondCode)
    247   "csinv" -> Csinv <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    248                    <*> (pComma *> pCondCode)
    249   "csneg" -> Csneg <$> pReg <*> (pComma *> pReg) <*> (pComma *> pReg)
    250                    <*> (pComma *> pCondCode)
    251   "cset" -> Cset <$> pReg <*> (pComma *> pCondCode)
    252   "cinc" -> Cinc <$> pReg <*> (pComma *> pReg) <*> (pComma *> pCondCode)
    253 
    254   -- Branches
    255   "b"    -> B <$> pSymbolRef
    256   "b.eq" -> BCond "eq" <$> pSymbolRef
    257   "b.ne" -> BCond "ne" <$> pSymbolRef
    258   "b.lt" -> BCond "lt" <$> pSymbolRef
    259   "b.le" -> BCond "le" <$> pSymbolRef
    260   "b.gt" -> BCond "gt" <$> pSymbolRef
    261   "b.ge" -> BCond "ge" <$> pSymbolRef
    262   "b.hi" -> BCond "hi" <$> pSymbolRef
    263   "b.hs" -> BCond "hs" <$> pSymbolRef
    264   "b.lo" -> BCond "lo" <$> pSymbolRef
    265   "b.ls" -> BCond "ls" <$> pSymbolRef
    266   "b.mi" -> BCond "mi" <$> pSymbolRef
    267   "b.pl" -> BCond "pl" <$> pSymbolRef
    268   "b.vs" -> BCond "vs" <$> pSymbolRef
    269   "b.vc" -> BCond "vc" <$> pSymbolRef
    270   "b.cc" -> BCond "cc" <$> pSymbolRef
    271   "b.cs" -> BCond "cs" <$> pSymbolRef
    272   "bl"   -> Bl <$> pSymbolRef
    273   "blr"  -> Blr <$> pReg
    274   "br"   -> Br <$> pReg
    275   "ret"  -> Ret <$> optional pReg
    276   "cbz"  -> Cbz <$> pReg <*> (pComma *> pSymbolRef)
    277   "cbnz" -> Cbnz <$> pReg <*> (pComma *> pSymbolRef)
    278   "tbz"  -> Tbz <$> pReg <*> (pComma *> pInt) <*> (pComma *> pSymbolRef)
    279   "tbnz" -> Tbnz <$> pReg <*> (pComma *> pInt) <*> (pComma *> pSymbolRef)
    280 
    281   -- System
    282   "nop"  -> pure Nop
    283   "svc"  -> Svc <$> pImm
    284 
    285   -- Fallback: store as Other
    286   _      -> Other m <$> pRemainingOperands
    287 
    288 pRemainingOperands :: Parser [Text]
    289 pRemainingOperands = do
    290   rest <- takeWhileP Nothing (\c -> c /= '\n' && c /= ';' && c /= '/')
    291   let trimmed = T.strip rest
    292   if T.null trimmed
    293     then pure []
    294     else pure (map T.strip (T.splitOn "," trimmed))
    295 
    296 pComma :: Parser ()
    297 pComma = void (lexeme (char ','))
    298 
    299 -- Dispatch on first character to avoid trying all register types.
    300 -- All alternatives use try to ensure pReg doesn't consume on total failure,
    301 -- allowing callers to try pSymbolRef or other alternatives.
    302 pReg :: Parser Reg
    303 pReg = lexeme $ do
    304   c <- lookAhead anySingle
    305   case c of
    306     'x' -> try pXReg <|> try (XZR <$ string' "xzr")
    307     'X' -> try pXReg <|> try (XZR <$ string' "xzr")
    308     'w' -> try pWReg <|> try (WZR <$ string' "wzr")
    309     'W' -> try pWReg <|> try (WZR <$ string' "wzr")
    310     'd' -> try pDReg
    311     'D' -> try pDReg
    312     's' -> try pSReg <|> try (SP <$ string' "sp")
    313     'S' -> try pSReg <|> try (SP <$ string' "sp")
    314     'q' -> try pQReg
    315     'Q' -> try pQReg
    316     'f' -> try (X29 <$ string' "fp")
    317     'F' -> try (X29 <$ string' "fp")
    318     'l' -> try (X30 <$ string' "lr")
    319     'L' -> try (X30 <$ string' "lr")
    320     _   -> fail "not a register"
    321 
    322 pXReg :: Parser Reg
    323 pXReg = do
    324   void (char' 'x')
    325   n <- L.decimal
    326   case n :: Int of
    327     0  -> pure X0;  1  -> pure X1;  2  -> pure X2;  3  -> pure X3
    328     4  -> pure X4;  5  -> pure X5;  6  -> pure X6;  7  -> pure X7
    329     8  -> pure X8;  9  -> pure X9;  10 -> pure X10; 11 -> pure X11
    330     12 -> pure X12; 13 -> pure X13; 14 -> pure X14; 15 -> pure X15
    331     16 -> pure X16; 17 -> pure X17; 18 -> pure X18; 19 -> pure X19
    332     20 -> pure X20; 21 -> pure X21; 22 -> pure X22; 23 -> pure X23
    333     24 -> pure X24; 25 -> pure X25; 26 -> pure X26; 27 -> pure X27
    334     28 -> pure X28; 29 -> pure X29; 30 -> pure X30
    335     _  -> fail "invalid x register number"
    336 
    337 pWReg :: Parser Reg
    338 pWReg = do
    339   void (char' 'w')
    340   n <- L.decimal
    341   case n :: Int of
    342     0  -> pure W0;  1  -> pure W1;  2  -> pure W2;  3  -> pure W3
    343     4  -> pure W4;  5  -> pure W5;  6  -> pure W6;  7  -> pure W7
    344     8  -> pure W8;  9  -> pure W9;  10 -> pure W10; 11 -> pure W11
    345     12 -> pure W12; 13 -> pure W13; 14 -> pure W14; 15 -> pure W15
    346     16 -> pure W16; 17 -> pure W17; 18 -> pure W18; 19 -> pure W19
    347     20 -> pure W20; 21 -> pure W21; 22 -> pure W22; 23 -> pure W23
    348     24 -> pure W24; 25 -> pure W25; 26 -> pure W26; 27 -> pure W27
    349     28 -> pure W28; 29 -> pure W29; 30 -> pure W30
    350     _  -> fail "invalid w register number"
    351 
    352 pDReg :: Parser Reg
    353 pDReg = do
    354   void (char' 'd')
    355   n <- L.decimal
    356   case n :: Int of
    357     0  -> pure D0;  1  -> pure D1;  2  -> pure D2;  3  -> pure D3
    358     4  -> pure D4;  5  -> pure D5;  6  -> pure D6;  7  -> pure D7
    359     8  -> pure D8;  9  -> pure D9;  10 -> pure D10; 11 -> pure D11
    360     12 -> pure D12; 13 -> pure D13; 14 -> pure D14; 15 -> pure D15
    361     16 -> pure D16; 17 -> pure D17; 18 -> pure D18; 19 -> pure D19
    362     20 -> pure D20; 21 -> pure D21; 22 -> pure D22; 23 -> pure D23
    363     24 -> pure D24; 25 -> pure D25; 26 -> pure D26; 27 -> pure D27
    364     28 -> pure D28; 29 -> pure D29; 30 -> pure D30; 31 -> pure D31
    365     _  -> fail "invalid d register number"
    366 
    367 pSReg :: Parser Reg
    368 pSReg = do
    369   void (char' 's')
    370   n <- L.decimal
    371   case n :: Int of
    372     0  -> pure S0;  1  -> pure S1;  2  -> pure S2;  3  -> pure S3
    373     4  -> pure S4;  5  -> pure S5;  6  -> pure S6;  7  -> pure S7
    374     8  -> pure S8;  9  -> pure S9;  10 -> pure S10; 11 -> pure S11
    375     12 -> pure S12; 13 -> pure S13; 14 -> pure S14; 15 -> pure S15
    376     16 -> pure S16; 17 -> pure S17; 18 -> pure S18; 19 -> pure S19
    377     20 -> pure S20; 21 -> pure S21; 22 -> pure S22; 23 -> pure S23
    378     24 -> pure S24; 25 -> pure S25; 26 -> pure S26; 27 -> pure S27
    379     28 -> pure S28; 29 -> pure S29; 30 -> pure S30; 31 -> pure S31
    380     _  -> fail "invalid s register number"
    381 
    382 pQReg :: Parser Reg
    383 pQReg = do
    384   void (char' 'q')
    385   n <- L.decimal
    386   case n :: Int of
    387     0  -> pure Q0;  1  -> pure Q1;  2  -> pure Q2;  3  -> pure Q3
    388     4  -> pure Q4;  5  -> pure Q5;  6  -> pure Q6;  7  -> pure Q7
    389     8  -> pure Q8;  9  -> pure Q9;  10 -> pure Q10; 11 -> pure Q11
    390     12 -> pure Q12; 13 -> pure Q13; 14 -> pure Q14; 15 -> pure Q15
    391     16 -> pure Q16; 17 -> pure Q17; 18 -> pure Q18; 19 -> pure Q19
    392     20 -> pure Q20; 21 -> pure Q21; 22 -> pure Q22; 23 -> pure Q23
    393     24 -> pure Q24; 25 -> pure Q25; 26 -> pure Q26; 27 -> pure Q27
    394     28 -> pure Q28; 29 -> pure Q29; 30 -> pure Q30; 31 -> pure Q31
    395     _  -> fail "invalid q register number"
    396 
    397 pOperand :: Parser Operand
    398 pOperand = choice
    399   [ try (OpAddr <$> pAddrMode)
    400   , try pRegOrShiftedReg
    401   , OpImm <$> pImm
    402   , OpLabel <$> pSymbolRef
    403   ]
    404 
    405 -- Parse register once, then optionally check for shift suffix.
    406 pRegOrShiftedReg :: Parser Operand
    407 pRegOrShiftedReg = do
    408   r <- pReg
    409   mShift <- optional (pComma *> pShift)
    410   pure $ case mShift of
    411     Just sh -> OpShiftedReg r sh
    412     Nothing -> OpReg r
    413 
    414 pImm :: Parser Integer
    415 pImm = lexeme $ do
    416   void (optional (char '#'))
    417   signed pNumber
    418   where
    419     pNumber = try pHex <|> L.decimal
    420     pHex = string' "0x" *> L.hexadecimal  -- case-insensitive
    421 
    422 signed :: Parser Integer -> Parser Integer
    423 signed p = do
    424   neg <- optional (char '-')
    425   n <- p
    426   case neg of
    427     Just _  -> pure (negate n)
    428     Nothing -> pure n
    429 
    430 pInt :: Parser Int
    431 pInt = lexeme $ do
    432   void (optional (char '#'))
    433   fromInteger <$> signed L.decimal
    434 
    435 pShift :: Parser Shift
    436 pShift = lexeme $ choice
    437   [ LSL <$> (string' "lsl" *> sc *> pShiftAmt)
    438   , LSR <$> (string' "lsr" *> sc *> pShiftAmt)
    439   , ASR <$> (string' "asr" *> sc *> pShiftAmt)
    440   ]
    441 
    442 pShiftAmt :: Parser Int
    443 pShiftAmt = do
    444   void (optional (char '#'))
    445   fromInteger <$> L.decimal
    446 
    447 pSymbolRef :: Parser Text
    448 pSymbolRef = lexeme $ do
    449   -- Handle =literal, @PAGE, @PAGEOFF suffixes, etc.
    450   hasPrefix <- option False (True <$ char '=')
    451   name <- pSymbolIdent  -- Use pSymbolIdent to allow ':' in GHC symbols
    452   suffix <- optional pSymbolSuffix
    453   let base = if hasPrefix then T.cons '=' name else name
    454   pure (maybe base (base <>) suffix)
    455 
    456 pSymbolSuffix :: Parser Text
    457 pSymbolSuffix = do
    458   void (char '@')
    459   suf <- takeWhile1P Nothing isAlphaNum
    460   -- Optional addend like +2 or -3
    461   addend <- optional pAddend
    462   let base = T.cons '@' suf
    463   pure (maybe base (base <>) addend)
    464 
    465 pAddend :: Parser Text
    466 pAddend = do
    467   sign <- satisfy (\c -> c == '+' || c == '-')
    468   digits <- takeWhile1P Nothing isDigit
    469   pure (T.cons sign digits)
    470   where
    471     isDigit c = c >= '0' && c <= '9'
    472 
    473 pCondCode :: Parser Text
    474 pCondCode = lexeme $ takeWhile1P Nothing isAlphaNum
    475 
    476 -- Dispatch by first char: '=' for literal, '[' for bracket address.
    477 -- No try needed since char '=' doesn't consume on failure.
    478 pAddrMode :: Parser AddrMode
    479 pAddrMode = choice
    480   [ pLiteralAddr
    481   , pBracketAddr
    482   ]
    483 
    484 pLiteralAddr :: Parser AddrMode
    485 pLiteralAddr = Literal <$> (char '=' *> pIdentifier)
    486 
    487 pBracketAddr :: Parser AddrMode
    488 pBracketAddr = lexeme $ do
    489   void (char '[')
    490   sc
    491   base <- pReg
    492   sc
    493   innerMode <- optional (pComma *> pAddrModeInner)
    494   sc
    495   void (char ']')
    496   -- Check for pre-index ! or post-index offset
    497   preIdx <- option False (True <$ char '!')
    498   postIdx <- optional (try pPostIndex)
    499   case (innerMode, preIdx, postIdx) of
    500     (Nothing, False, Nothing) -> pure (BaseImm base 0)
    501     (Nothing, False, Just imm) -> pure (PostIndex base imm)
    502     (Just (Left (Left imm)), True, Nothing) -> pure (PreIndex base imm)
    503     (Just (Left (Left imm)), False, Nothing) -> pure (BaseImm base imm)
    504     (Just (Left (Right sym)), False, Nothing) -> pure (BaseSymbol base sym)
    505     (Just (Right (r, Nothing)), False, Nothing) -> pure (BaseReg base r)
    506     (Just (Right (r, Just (Left sh))), False, Nothing) -> pure (BaseRegShift base r sh)
    507     (Just (Right (r, Just (Right ext))), False, Nothing) -> pure (BaseRegExtend base r ext)
    508     _ -> fail "invalid addressing mode"
    509 
    510 -- | Inner part of [base, ...] after the comma.
    511 -- Left (Left imm) = immediate offset
    512 -- Left (Right sym) = symbol offset (e.g., @PAGEOFF)
    513 -- Right (reg, mod) = register with optional shift/extend
    514 type AddrInner = Either (Either Integer Text) (Reg, Maybe (Either Shift Extend))
    515 
    516 -- Parse the inner part of [base, ...] after the comma
    517 pAddrModeInner :: Parser AddrInner
    518 pAddrModeInner = choice
    519   [ Left . Left <$> pImm
    520   , try (Right <$> pRegWithModifier)
    521   , Left . Right <$> pSymbolRef
    522   ]
    523 
    524 pRegWithModifier :: Parser (Reg, Maybe (Either Shift Extend))
    525 pRegWithModifier = do
    526   r <- pReg
    527   modifier <- optional (pComma *> pShiftOrExtend)
    528   pure (r, modifier)
    529 
    530 pShiftOrExtend :: Parser (Either Shift Extend)
    531 pShiftOrExtend = choice
    532   [ Left <$> pShift
    533   , Right <$> pExtend
    534   ]
    535 
    536 pExtend :: Parser Extend
    537 pExtend = lexeme $ do
    538   ext <- choice
    539     [ UXTW <$ string' "uxtw"
    540     , SXTW <$ string' "sxtw"
    541     , UXTX <$ string' "uxtx"
    542     , SXTX <$ string' "sxtx"
    543     ]
    544   -- Optionally consume shift amount (e.g., #3) - ignored for taint analysis
    545   void $ optional (sc *> pImm)
    546   pure ext
    547 
    548 pPostIndex :: Parser Integer
    549 pPostIndex = do
    550   sc
    551   void (char ',')
    552   sc
    553   pImm