commit 36a32d4668eecafc049ab77f1f90824e1a16e244
parent 09d6199af719380169b380f837ee267675be0245
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 18:24:16 +0400
fix: address reviewer comments on parser optimizations
- Guard pLabelOrInstrLine to only treat identifiers as mnemonics if they
start with alphanumeric or '.', preventing behavior change for lines
starting with '_' without colon
- Add try to all pReg alternatives so it doesn't consume on total
failure, allowing pSymbolRef to handle register-like symbols (e.g.,
xfoo)
- Add try around pRegWithModifier in pAddrModeInner to allow fallback to
pSymbolRef for symbol-based index offsets
- Remove misleading test comment about Other mnemonic
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
2 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/lib/Audit/AArch64/Parser.hs b/lib/Audit/AArch64/Parser.hs
@@ -93,13 +93,17 @@ pLabelOrInstrLine ln = do
void (optional eol)
pure (Line ln (Just name) mInstr)
Nothing -> do
- -- Instruction line: name is the mnemonic
- sc
- instr <- parseByMnemonic (T.toLower name)
- hspace
- void (optional pComment)
- void (optional eol)
- pure (Line ln Nothing (Just instr))
+ -- Instruction line: name must look like a valid mnemonic.
+ -- Guard: mnemonics start with alphanumeric or '.' (not '_' etc.)
+ case T.uncons name of
+ Just (c, _) | isAlphaNum c || c == '.' -> do
+ sc
+ instr <- parseByMnemonic (T.toLower name)
+ hspace
+ void (optional pComment)
+ void (optional eol)
+ pure (Line ln Nothing (Just instr))
+ _ -> fail "not a valid mnemonic"
pIdentifier :: Parser Text
pIdentifier = do
@@ -269,24 +273,26 @@ pComma :: Parser ()
pComma = void (lexeme (char ','))
-- Dispatch on first character to avoid trying all register types.
+-- All alternatives use try to ensure pReg doesn't consume on total failure,
+-- allowing callers to try pSymbolRef or other alternatives.
pReg :: Parser Reg
pReg = lexeme $ do
c <- lookAhead anySingle
case c of
- 'x' -> try pXReg <|> (XZR <$ string' "xzr")
- 'X' -> try pXReg <|> (XZR <$ string' "xzr")
- 'w' -> try pWReg <|> (WZR <$ string' "wzr")
- 'W' -> try pWReg <|> (WZR <$ string' "wzr")
- 'd' -> pDReg
- 'D' -> pDReg
- 's' -> try pSReg <|> (SP <$ string' "sp")
- 'S' -> try pSReg <|> (SP <$ string' "sp")
- 'q' -> pQReg
- 'Q' -> pQReg
- 'f' -> X29 <$ string' "fp"
- 'F' -> X29 <$ string' "fp"
- 'l' -> X30 <$ string' "lr"
- 'L' -> X30 <$ string' "lr"
+ 'x' -> try pXReg <|> try (XZR <$ string' "xzr")
+ 'X' -> try pXReg <|> try (XZR <$ string' "xzr")
+ 'w' -> try pWReg <|> try (WZR <$ string' "wzr")
+ 'W' -> try pWReg <|> try (WZR <$ string' "wzr")
+ 'd' -> try pDReg
+ 'D' -> try pDReg
+ 's' -> try pSReg <|> try (SP <$ string' "sp")
+ 'S' -> try pSReg <|> try (SP <$ string' "sp")
+ 'q' -> try pQReg
+ 'Q' -> try pQReg
+ 'f' -> try (X29 <$ string' "fp")
+ 'F' -> try (X29 <$ string' "fp")
+ 'l' -> try (X30 <$ string' "lr")
+ 'L' -> try (X30 <$ string' "lr")
_ -> fail "not a register"
pXReg :: Parser Reg
@@ -487,7 +493,7 @@ type AddrInner = Either (Either Integer Text) (Reg, Maybe (Either Shift Extend))
pAddrModeInner :: Parser AddrInner
pAddrModeInner = choice
[ Left . Left <$> pImm
- , Right <$> pRegWithModifier
+ , try (Right <$> pRegWithModifier)
, Left . Right <$> pSymbolRef
]
diff --git a/test/Main.hs b/test/Main.hs
@@ -113,7 +113,7 @@ parserTests = testGroup "Parser" [
let src = T.unlines
[ "mov x0, x1"
, "mov w0, w1"
- , "mov d0, d1" -- Other mnemonic treated as fallback
+ , "mov d0, d1"
, "mov s0, s1"
, "mov q0, q1"
, "mov x0, sp"