auditor

An aarch64 constant-time memory access auditing tool.
git clone git://git.ppad.tech/auditor.git
Log | Files | Refs | README | LICENSE

commit 09d6199af719380169b380f837ee267675be0245
parent 1f6ee9dd27249823cd3878d77c51eead36cb5e46
Author: Jared Tobin <jared@jtobin.io>
Date:   Wed, 11 Feb 2026 18:04:38 +0400

perf: reduce backtracking in parser

- Merge pLabelLine/pInstrLine into pLabelOrInstrLine to parse leading
  identifier once, then branch on ':' (eliminates double-parse per
  instruction line)
- Merge pShiftedRegOp into pRegOrShiftedReg to parse register once,
  then optionally parse shift (eliminates double-parse per operand)
- Dispatch pReg on first character instead of sequential try chain
  (reduces failed attempts for most register types)
- Remove unnecessary try from pLiteralAddr in pAddrMode

Add parser tests for shifted/plain register operands, literal addresses,
all register types, and label/instruction line variants.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Diffstat:
Mlib/Audit/AArch64/Parser.hs | 96++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mtest/Main.hs | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 110 insertions(+), 43 deletions(-)

diff --git a/lib/Audit/AArch64/Parser.hs b/lib/Audit/AArch64/Parser.hs @@ -55,8 +55,7 @@ pLine = do hspace -- Skip leading horizontal whitespace only choice [ pDirectiveLine ln - , pLabelLine ln - , pInstrLine ln + , pLabelOrInstrLine ln , pEmptyLine ln ] @@ -78,29 +77,29 @@ pDirectiveLine ln = do void (optional eol) pure (Line ln Nothing Nothing) -pLabelLine :: Int -> Parser Line -pLabelLine ln = do - lbl <- try pLabel - hspace - mInstr <- optional pInstr - hspace - void (optional pComment) - void (optional eol) - pure (Line ln (Just lbl) mInstr) - -pInstrLine :: Int -> Parser Line -pInstrLine ln = do - instr <- pInstr - hspace - void (optional pComment) - void (optional eol) - pure (Line ln Nothing (Just instr)) - -pLabel :: Parser Text -pLabel = lexeme $ do +-- Combined parser for label or instruction lines. +-- Parses the leading identifier once, then branches on ':'. +pLabelOrInstrLine :: Int -> Parser Line +pLabelOrInstrLine ln = do name <- pIdentifier - void (char ':') - pure name + mColon <- optional (char ':') + case mColon of + Just _ -> do + -- Label line: name is the label + sc + mInstr <- optional pInstr + hspace + void (optional pComment) + void (optional eol) + pure (Line ln (Just name) mInstr) + Nothing -> do + -- Instruction line: name is the mnemonic + sc + instr <- parseByMnemonic (T.toLower name) + hspace + void (optional pComment) + void (optional eol) + pure (Line ln Nothing (Just instr)) pIdentifier :: Parser Text pIdentifier = do @@ -269,9 +268,26 @@ pRemainingOperands = do pComma :: Parser () pComma = void (lexeme (char ',')) +-- Dispatch on first character to avoid trying all register types. pReg :: Parser Reg -pReg = lexeme $ choice - [ try pXReg, try pWReg, try pDReg, try pSReg, try pQReg, pSpecialReg ] +pReg = lexeme $ do + c <- lookAhead anySingle + case c of + 'x' -> try pXReg <|> (XZR <$ string' "xzr") + 'X' -> try pXReg <|> (XZR <$ string' "xzr") + 'w' -> try pWReg <|> (WZR <$ string' "wzr") + 'W' -> try pWReg <|> (WZR <$ string' "wzr") + 'd' -> pDReg + 'D' -> pDReg + 's' -> try pSReg <|> (SP <$ string' "sp") + 'S' -> try pSReg <|> (SP <$ string' "sp") + 'q' -> pQReg + 'Q' -> pQReg + 'f' -> X29 <$ string' "fp" + 'F' -> X29 <$ string' "fp" + 'l' -> X30 <$ string' "lr" + 'L' -> X30 <$ string' "lr" + _ -> fail "not a register" pXReg :: Parser Reg pXReg = do @@ -348,30 +364,22 @@ pQReg = do 28 -> pure Q28; 29 -> pure Q29; 30 -> pure Q30; 31 -> pure Q31 _ -> fail "invalid q register number" -pSpecialReg :: Parser Reg -pSpecialReg = choice - [ SP <$ string' "sp" - , XZR <$ string' "xzr" - , WZR <$ string' "wzr" - , X29 <$ string' "fp" -- fp is alias for x29 - , X30 <$ string' "lr" -- lr is alias for x30 - ] - pOperand :: Parser Operand pOperand = choice [ try (OpAddr <$> pAddrMode) - , try pShiftedRegOp + , try pRegOrShiftedReg , OpImm <$> pImm - , OpReg <$> pReg , OpLabel <$> pSymbolRef ] -pShiftedRegOp :: Parser Operand -pShiftedRegOp = do +-- Parse register once, then optionally check for shift suffix. +pRegOrShiftedReg :: Parser Operand +pRegOrShiftedReg = do r <- pReg - pComma - sh <- pShift - pure (OpShiftedReg r sh) + mShift <- optional (pComma *> pShift) + pure $ case mShift of + Just sh -> OpShiftedReg r sh + Nothing -> OpReg r pImm :: Parser Integer pImm = lexeme $ do @@ -435,9 +443,11 @@ pAddend = do pCondCode :: Parser Text pCondCode = lexeme $ takeWhile1P Nothing isAlphaNum +-- Dispatch by first char: '=' for literal, '[' for bracket address. +-- No try needed since char '=' doesn't consume on failure. pAddrMode :: Parser AddrMode pAddrMode = choice - [ try pLiteralAddr + [ pLiteralAddr , pBracketAddr ] diff --git a/test/Main.hs b/test/Main.hs @@ -84,6 +84,63 @@ parserTests = testGroup "Parser" [ Right lns -> case lineInstr (safeHead lns) of Just (Ldr X8 (BaseSymbol X8 "_foo@GOTPAGEOFF")) -> pure () other -> assertFailure $ "unexpected: " ++ show other + + , testCase "parse shifted register operand" $ do + let src = "add x0, x1, x2, lsl #3\n" + case parseAsm src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right lns -> case lineInstr (safeHead lns) of + Just (Add X0 X1 (OpShiftedReg X2 (LSL 3))) -> pure () + other -> assertFailure $ "unexpected: " ++ show other + + , testCase "parse plain register operand" $ do + let src = "add x0, x1, x2\n" + case parseAsm src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right lns -> case lineInstr (safeHead lns) of + Just (Add X0 X1 (OpReg X2)) -> pure () + other -> assertFailure $ "unexpected: " ++ show other + + , testCase "parse literal address" $ do + let src = "ldr x0, =foo\n" + case parseAsm src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right lns -> case lineInstr (safeHead lns) of + Just (Ldr X0 (Literal "foo")) -> pure () + other -> assertFailure $ "unexpected: " ++ show other + + , testCase "parse all register types" $ do + let src = T.unlines + [ "mov x0, x1" + , "mov w0, w1" + , "mov d0, d1" -- Other mnemonic treated as fallback + , "mov s0, s1" + , "mov q0, q1" + , "mov x0, sp" + , "mov x0, xzr" + , "mov w0, wzr" + , "mov x0, fp" + , "mov x0, lr" + ] + case parseAsm src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right lns -> assertEqual "parsed all lines" 10 (length lns) + + , testCase "parse label with instruction" $ do + let src = "foo: mov x0, x1\n" + case parseAsm src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right lns -> case safeHead lns of + Line _ (Just "foo") (Just (Mov X0 (OpReg X1))) -> pure () + other -> assertFailure $ "unexpected: " ++ show other + + , testCase "parse instruction without label" $ do + let src = " mov x0, x1\n" + case parseAsm src of + Left e -> assertFailure $ "parse failed: " ++ show e + Right lns -> case safeHead lns of + Line _ Nothing (Just (Mov X0 (OpReg X1))) -> pure () + other -> assertFailure $ "unexpected: " ++ show other ] safeHead :: [a] -> a