commit 4ad0780e31db624f9220e922e60b6e5aecfe3618
parent ef4009793589243d2e89c42e7ce930d56bd202f4
Author: Jared Tobin <jared@jtobin.io>
Date: Tue, 10 Feb 2026 12:44:02 +0400
feat: add BaseSymbol addressing mode for GOT/PIC relocations
Parser now handles [base, symbol@PAGEOFF] addressing used in
position-independent code and GOT relocations. This unblocks
parsing real GHC aarch64 assembly output.
- Add BaseSymbol Reg Text constructor to AddrMode
- Extend pAddrModeInner to parse symbol references after comma
- Update taint analysis and violation checks for new mode
- Add parser test for ldr with symbol offset
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
5 files changed, 36 insertions(+), 10 deletions(-)
diff --git a/lib/Audit/AArch64/Check.hs b/lib/Audit/AArch64/Check.hs
@@ -104,6 +104,10 @@ checkAddrMode sym ln instr addr st = case addr of
checkBase sym ln instr base st ++
checkIndex sym ln instr idx st
+ BaseSymbol base _ ->
+ -- Symbol offset is constant (compile-time), only check base
+ checkBase sym ln instr base st
+
PreIndex base _ ->
checkBase sym ln instr base st
diff --git a/lib/Audit/AArch64/Parser.hs b/lib/Audit/AArch64/Parser.hs
@@ -421,18 +421,26 @@ pBracketAddr = lexeme $ do
case (innerMode, preIdx, postIdx) of
(Nothing, False, Nothing) -> pure (BaseImm base 0)
(Nothing, False, Just imm) -> pure (PostIndex base imm)
- (Just (Left imm), True, Nothing) -> pure (PreIndex base imm)
- (Just (Left imm), False, Nothing) -> pure (BaseImm base imm)
+ (Just (Left (Left imm)), True, Nothing) -> pure (PreIndex base imm)
+ (Just (Left (Left imm)), False, Nothing) -> pure (BaseImm base imm)
+ (Just (Left (Right sym)), False, Nothing) -> pure (BaseSymbol base sym)
(Just (Right (r, Nothing)), False, Nothing) -> pure (BaseReg base r)
(Just (Right (r, Just (Left sh))), False, Nothing) -> pure (BaseRegShift base r sh)
(Just (Right (r, Just (Right ext))), False, Nothing) -> pure (BaseRegExtend base r ext)
_ -> fail "invalid addressing mode"
+-- | Inner part of [base, ...] after the comma.
+-- Left (Left imm) = immediate offset
+-- Left (Right sym) = symbol offset (e.g., @PAGEOFF)
+-- Right (reg, mod) = register with optional shift/extend
+type AddrInner = Either (Either Integer Text) (Reg, Maybe (Either Shift Extend))
+
-- Parse the inner part of [base, ...] after the comma
-pAddrModeInner :: Parser (Either Integer (Reg, Maybe (Either Shift Extend)))
+pAddrModeInner :: Parser AddrInner
pAddrModeInner = choice
- [ Left <$> pImm
+ [ Left . Left <$> pImm
, Right <$> pRegWithModifier
+ , Left . Right <$> pSymbolRef
]
pRegWithModifier :: Parser (Reg, Maybe (Either Shift Extend))
diff --git a/lib/Audit/AArch64/Taint.hs b/lib/Audit/AArch64/Taint.hs
@@ -187,13 +187,14 @@ operandTaint op st = case op of
-- | Get taint of address base register.
addrBaseTaint :: AddrMode -> TaintState -> Taint
addrBaseTaint addr st = case addr of
- BaseImm r _ -> getTaint r st
- BaseReg r _ -> getTaint r st
- BaseRegShift r _ _ -> getTaint r st
+ BaseImm r _ -> getTaint r st
+ BaseReg r _ -> getTaint r st
+ BaseRegShift r _ _ -> getTaint r st
BaseRegExtend r _ _ -> getTaint r st
- PreIndex r _ -> getTaint r st
- PostIndex r _ -> getTaint r st
- Literal _ -> Public
+ BaseSymbol r _ -> getTaint r st
+ PreIndex r _ -> getTaint r st
+ PostIndex r _ -> getTaint r st
+ Literal _ -> Public
-- | Join two taints.
join2 :: Taint -> Taint -> Taint
diff --git a/lib/Audit/AArch64/Types.hs b/lib/Audit/AArch64/Types.hs
@@ -116,6 +116,7 @@ data AddrMode
| BaseReg !Reg !Reg -- ^ [xN, xM]
| BaseRegShift !Reg !Reg !Shift -- ^ [xN, xM, lsl #k]
| BaseRegExtend !Reg !Reg !Extend -- ^ [xN, xM, sxtw]
+ | BaseSymbol !Reg !Text -- ^ [xN, symbol@PAGEOFF]
| PreIndex !Reg !Integer -- ^ [xN, #imm]!
| PostIndex !Reg !Integer -- ^ [xN], #imm
| Literal !Text -- ^ =label or PC-relative
diff --git a/test/Main.hs b/test/Main.hs
@@ -70,8 +70,20 @@ parserTests = testGroup "Parser" [
-- Should have 4 lines, first two are directives (no instr)
let instrs = filter ((/= Nothing) . lineInstr) lns
assertEqual "instruction count" 1 (length instrs)
+
+ , testCase "parse ldr base+symbol" $ do
+ let src = "ldr x8, [x8, _foo@GOTPAGEOFF]\n"
+ case parseAsm src of
+ Left e -> assertFailure $ "parse failed: " ++ show e
+ Right lns -> case lineInstr (safeHead lns) of
+ Just (Ldr X8 (BaseSymbol X8 "_foo@GOTPAGEOFF")) -> pure ()
+ other -> assertFailure $ "unexpected: " ++ show other
]
+safeHead :: [a] -> a
+safeHead (x:_) = x
+safeHead [] = error "safeHead: empty list"
+
-- Taint tests
taintTests :: TestTree