commit 09d6199af719380169b380f837ee267675be0245
parent 1f6ee9dd27249823cd3878d77c51eead36cb5e46
Author: Jared Tobin <jared@jtobin.io>
Date: Wed, 11 Feb 2026 18:04:38 +0400
perf: reduce backtracking in parser
- Merge pLabelLine/pInstrLine into pLabelOrInstrLine to parse leading
identifier once, then branch on ':' (eliminates double-parse per
instruction line)
- Merge pShiftedRegOp into pRegOrShiftedReg to parse register once,
then optionally parse shift (eliminates double-parse per operand)
- Dispatch pReg on first character instead of sequential try chain
(reduces failed attempts for most register types)
- Remove unnecessary try from pLiteralAddr in pAddrMode
Add parser tests for shifted/plain register operands, literal addresses,
all register types, and label/instruction line variants.
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
2 files changed, 110 insertions(+), 43 deletions(-)
diff --git a/lib/Audit/AArch64/Parser.hs b/lib/Audit/AArch64/Parser.hs
@@ -55,8 +55,7 @@ pLine = do
hspace -- Skip leading horizontal whitespace only
choice
[ pDirectiveLine ln
- , pLabelLine ln
- , pInstrLine ln
+ , pLabelOrInstrLine ln
, pEmptyLine ln
]
@@ -78,29 +77,29 @@ pDirectiveLine ln = do
void (optional eol)
pure (Line ln Nothing Nothing)
-pLabelLine :: Int -> Parser Line
-pLabelLine ln = do
- lbl <- try pLabel
- hspace
- mInstr <- optional pInstr
- hspace
- void (optional pComment)
- void (optional eol)
- pure (Line ln (Just lbl) mInstr)
-
-pInstrLine :: Int -> Parser Line
-pInstrLine ln = do
- instr <- pInstr
- hspace
- void (optional pComment)
- void (optional eol)
- pure (Line ln Nothing (Just instr))
-
-pLabel :: Parser Text
-pLabel = lexeme $ do
+-- Combined parser for label or instruction lines.
+-- Parses the leading identifier once, then branches on ':'.
+pLabelOrInstrLine :: Int -> Parser Line
+pLabelOrInstrLine ln = do
name <- pIdentifier
- void (char ':')
- pure name
+ mColon <- optional (char ':')
+ case mColon of
+ Just _ -> do
+ -- Label line: name is the label
+ sc
+ mInstr <- optional pInstr
+ hspace
+ void (optional pComment)
+ void (optional eol)
+ pure (Line ln (Just name) mInstr)
+ Nothing -> do
+ -- Instruction line: name is the mnemonic
+ sc
+ instr <- parseByMnemonic (T.toLower name)
+ hspace
+ void (optional pComment)
+ void (optional eol)
+ pure (Line ln Nothing (Just instr))
pIdentifier :: Parser Text
pIdentifier = do
@@ -269,9 +268,26 @@ pRemainingOperands = do
pComma :: Parser ()
pComma = void (lexeme (char ','))
+-- Dispatch on first character to avoid trying all register types.
pReg :: Parser Reg
-pReg = lexeme $ choice
- [ try pXReg, try pWReg, try pDReg, try pSReg, try pQReg, pSpecialReg ]
+pReg = lexeme $ do
+ c <- lookAhead anySingle
+ case c of
+ 'x' -> try pXReg <|> (XZR <$ string' "xzr")
+ 'X' -> try pXReg <|> (XZR <$ string' "xzr")
+ 'w' -> try pWReg <|> (WZR <$ string' "wzr")
+ 'W' -> try pWReg <|> (WZR <$ string' "wzr")
+ 'd' -> pDReg
+ 'D' -> pDReg
+ 's' -> try pSReg <|> (SP <$ string' "sp")
+ 'S' -> try pSReg <|> (SP <$ string' "sp")
+ 'q' -> pQReg
+ 'Q' -> pQReg
+ 'f' -> X29 <$ string' "fp"
+ 'F' -> X29 <$ string' "fp"
+ 'l' -> X30 <$ string' "lr"
+ 'L' -> X30 <$ string' "lr"
+ _ -> fail "not a register"
pXReg :: Parser Reg
pXReg = do
@@ -348,30 +364,22 @@ pQReg = do
28 -> pure Q28; 29 -> pure Q29; 30 -> pure Q30; 31 -> pure Q31
_ -> fail "invalid q register number"
-pSpecialReg :: Parser Reg
-pSpecialReg = choice
- [ SP <$ string' "sp"
- , XZR <$ string' "xzr"
- , WZR <$ string' "wzr"
- , X29 <$ string' "fp" -- fp is alias for x29
- , X30 <$ string' "lr" -- lr is alias for x30
- ]
-
pOperand :: Parser Operand
pOperand = choice
[ try (OpAddr <$> pAddrMode)
- , try pShiftedRegOp
+ , try pRegOrShiftedReg
, OpImm <$> pImm
- , OpReg <$> pReg
, OpLabel <$> pSymbolRef
]
-pShiftedRegOp :: Parser Operand
-pShiftedRegOp = do
+-- Parse register once, then optionally check for shift suffix.
+pRegOrShiftedReg :: Parser Operand
+pRegOrShiftedReg = do
r <- pReg
- pComma
- sh <- pShift
- pure (OpShiftedReg r sh)
+ mShift <- optional (pComma *> pShift)
+ pure $ case mShift of
+ Just sh -> OpShiftedReg r sh
+ Nothing -> OpReg r
pImm :: Parser Integer
pImm = lexeme $ do
@@ -435,9 +443,11 @@ pAddend = do
pCondCode :: Parser Text
pCondCode = lexeme $ takeWhile1P Nothing isAlphaNum
+-- Dispatch by first char: '=' for literal, '[' for bracket address.
+-- No try needed since char '=' doesn't consume on failure.
pAddrMode :: Parser AddrMode
pAddrMode = choice
- [ try pLiteralAddr
+ [ pLiteralAddr
, pBracketAddr
]
diff --git a/test/Main.hs b/test/Main.hs
@@ -84,6 +84,63 @@ parserTests = testGroup "Parser" [
Right lns -> case lineInstr (safeHead lns) of
Just (Ldr X8 (BaseSymbol X8 "_foo@GOTPAGEOFF")) -> pure ()
other -> assertFailure $ "unexpected: " ++ show other
+
+ , testCase "parse shifted register operand" $ do
+ let src = "add x0, x1, x2, lsl #3\n"
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> case lineInstr (safeHead lns) of
+ Just (Add X0 X1 (OpShiftedReg X2 (LSL 3))) -> pure ()
+ other -> assertFailure $ "unexpected: " ++ show other
+
+ , testCase "parse plain register operand" $ do
+ let src = "add x0, x1, x2\n"
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> case lineInstr (safeHead lns) of
+ Just (Add X0 X1 (OpReg X2)) -> pure ()
+ other -> assertFailure $ "unexpected: " ++ show other
+
+ , testCase "parse literal address" $ do
+ let src = "ldr x0, =foo\n"
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> case lineInstr (safeHead lns) of
+ Just (Ldr X0 (Literal "foo")) -> pure ()
+ other -> assertFailure $ "unexpected: " ++ show other
+
+ , testCase "parse all register types" $ do
+ let src = T.unlines
+ [ "mov x0, x1"
+ , "mov w0, w1"
+ , "mov d0, d1" -- Other mnemonic treated as fallback
+ , "mov s0, s1"
+ , "mov q0, q1"
+ , "mov x0, sp"
+ , "mov x0, xzr"
+ , "mov w0, wzr"
+ , "mov x0, fp"
+ , "mov x0, lr"
+ ]
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> assertEqual "parsed all lines" 10 (length lns)
+
+ , testCase "parse label with instruction" $ do
+ let src = "foo: mov x0, x1\n"
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> case safeHead lns of
+ Line _ (Just "foo") (Just (Mov X0 (OpReg X1))) -> pure ()
+ other -> assertFailure $ "unexpected: " ++ show other
+
+ , testCase "parse instruction without label" $ do
+ let src = " mov x0, x1\n"
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> case safeHead lns of
+ Line _ Nothing (Just (Mov X0 (OpReg X1))) -> pure ()
+ other -> assertFailure $ "unexpected: " ++ show other
]
safeHead :: [a] -> a