commit 76a0c0083d39c841e48ff418c21a0f92f1edcd73
parent 5c94996fc63c8005b8eba620933adf31620e82c6
Author: Jared Tobin <jared@jtobin.io>
Date: Sun, 17 May 2026 21:09:31 -0230
Merge branch 'fix/parse-shifted-immediate'
Teach the AArch64 parser the shifted-immediate operand form
(e.g. `cmp x8, #16, lsl #12`). LLVM emits this when comparing
against constants larger than the 12-bit unsigned immediate limit
but that fit when shifted; it was previously a parse error,
preventing analysis of any assembly that crossed that threshold.
Encountered while auditing ppad-bolt8: the `require (len <= 65535)`
check in `encrypt` and the matching `decrypt_frame_partial` length
check lower to `cmp x8, #16, lsl #12 ; =65536`, halting the parser
at line 24546.
Changes:
* Parser.hs: extend `pOperand` so an immediate may carry an
optional trailing `, lsl/lsr/asr #k`. The shift is folded into
the literal at parse time (e.g. `#16, lsl #12` becomes the
integer 65536) so downstream analysis sees the effective value
without any new operand shape. Reuses the existing `Shift` ADT
via `pShift` rather than introducing a parallel type.
* Only the bare-immediate `pOperand` call site changes; immediates
inside addressing modes, Movz/Movk/Movn, Svc, etc. continue to
use `pImm` directly and are unaffected.
* AArch64 only encodes uimm12 shifted-immediates with `lsl #0` or
`lsl #12`; LSR/ASR variants are not produced by real assemblers
for this form. They are accepted defensively (folded as floor
division, which is ASR-equivalent for the small non-negative
values these fields hold) and Haddock documents why.
* test/Main.hs: regression test
`cmp x8, #16, lsl #12` -> `Cmp X8 (OpImm 65536)`.
All 116 tests pass (was 115).
Diffstat:
2 files changed, 33 insertions(+), 1 deletion(-)
diff --git a/lib/Audit/AArch64/Parser.hs b/lib/Audit/AArch64/Parser.hs
@@ -398,10 +398,28 @@ pOperand :: Parser Operand
pOperand = choice
[ try (OpAddr <$> pAddrMode)
, try pRegOrShiftedReg
- , OpImm <$> pImm
+ , OpImm <$> pImmShifted
, OpLabel <$> pSymbolRef
]
+-- | Like pImm but accepts the AArch64 shifted-immediate form
+-- (e.g., @#16, lsl #12@) and folds the shift into the value.
+--
+-- In practice AArch64 only encodes @lsl #0@ or @lsl #12@ on uimm12
+-- fields (e.g., add/adds/sub/subs/cmp/cmn shifted-immediate); LSR/ASR
+-- never appear in this position. We accept them defensively and treat
+-- both as floor division (semantically equivalent to ASR for the
+-- small non-negative values these fields hold).
+pImmShifted :: Parser Integer
+pImmShifted = do
+ n <- pImm
+ mSh <- optional (try (pComma *> pShift))
+ pure $ case mSh of
+ Nothing -> n
+ Just (LSL k) -> n * (2 ^ k)
+ Just (LSR k) -> n `div` (2 ^ k)
+ Just (ASR k) -> n `div` (2 ^ k)
+
-- Parse register once, then optionally check for shift suffix.
pRegOrShiftedReg :: Parser Operand
pRegOrShiftedReg = do
diff --git a/test/Main.hs b/test/Main.hs
@@ -129,6 +129,20 @@ parserTests = testGroup "Parser" [
other -> assertFailure $
"unexpected: " ++ show other
+ , testCase "parse shifted immediate operand" $ do
+ -- regression: `cmp x8, #16, lsl #12` is the uimm12
+ -- shifted-immediate form (16 << 12 = 65536). The
+ -- parser must fold the shift into the literal.
+ let src = "cmp x8, #16, lsl #12\n"
+ case parseAsm src of
+ Left e -> assertFailure $
+ "parse failed: " ++ show e
+ Right lns ->
+ case lineInstr (safeHead lns) of
+ Just (Cmp X8 (OpImm 65536)) -> pure ()
+ other -> assertFailure $
+ "unexpected: " ++ show other
+
, testCase "parse plain register operand" $ do
let src = "add x0, x1, x2\n"
case parseAsm src of