commit 70ad704ffd5bd9000441e69539fbfc3e34f74104
parent 1bfe17d14e2dd2ecf12e2e937aab38a2b71c1bc0
Author: Jared Tobin <jared@jtobin.io>
Date: Sat, 14 Feb 2026 13:46:02 +0400
feat: add --zsymbol flag for human-readable symbol input
Adds z-encoding support to convert human-readable Haskell symbols to
GHC's internal z-encoded format. Input format uses colons as separators:
<package>:<Module.Path>:<identifier>
Automatically prepends _ and appends _info$def.
Example: pkg-1.0:Mod.Sub:sqrt_vartime ->
_pkgzm1zi0_ModziSub_sqrtzuvartime_info$def
Encodes: - -> zm, . -> zi, _ -> zu, z -> zz, Z -> ZZ, $ -> zd, ' -> zq, # -> zh
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Diffstat:
| M | app/Main.hs | | | 81 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------- |
1 file changed, 65 insertions(+), 16 deletions(-)
diff --git a/app/Main.hs b/app/Main.hs
@@ -41,6 +41,7 @@ data Options = Options
, optNctDetail :: !Bool
, optShowGhcRuntime :: !Bool
, optSymbol :: !(Maybe Text)
+ , optZSymbol :: !(Maybe Text)
, optListSymbols :: !Bool
, optSymbolFilter :: !(Maybe Text)
, optCallers :: !Bool
@@ -102,6 +103,13 @@ optParser = Options
<> metavar "SYMBOL"
<> help "Analyze only this symbol and its callees (NCT scan mode)"
))
+ <*> optional (strOption
+ ( long "zsymbol"
+ <> short 'z'
+ <> metavar "SYMBOL"
+ <> help "Human-readable symbol, auto z-encoded with _info$def \
+ \(e.g., pkg-1.0:Mod.Sub:func)"
+ ))
<*> switch
( long "list-symbols"
<> short 'l'
@@ -129,11 +137,20 @@ optInfo = info (optParser <**> helper)
main :: IO ()
main = do
opts <- execParser optInfo
- if optListSymbols opts
- then listSymbols opts
- else if optParseOnly opts
+ -- Compute effective symbol from --symbol or --zsymbol
+ effSym <- case optZSymbol opts of
+ Just zs -> case zEncodeSymbol zs of
+ Left err -> do
+ TIO.putStrLn $ "Error: " <> err
+ exitFailure
+ Right encoded -> pure (Just encoded)
+ Nothing -> pure (optSymbol opts)
+ let opts' = opts { optSymbol = effSym }
+ if optListSymbols opts'
+ then listSymbols opts'
+ else if optParseOnly opts'
then do
- result <- parseFile (optInput opts)
+ result <- parseFile (optInput opts')
case result of
Left err -> do
TIO.putStrLn $ "Error: " <> err
@@ -141,27 +158,27 @@ main = do
Right n -> do
TIO.putStrLn $ "Parsed " <> T.pack (show n) <> " lines"
exitSuccess
- else if optScanNct opts
- then case optSymbol opts of
+ else if optScanNct opts'
+ then case optSymbol opts' of
Just sym -> do
- result <- scanNctForSymbol opts sym
+ result <- scanNctForSymbol opts' sym
case result of
Left err -> do
TIO.putStrLn $ "Error: " <> err
exitFailure
Right ssr ->
- outputNctSymbol opts ssr
+ outputNctSymbol opts' ssr
Nothing -> do
- result <- scanNctFile (optInput opts)
+ result <- scanNctFile (optInput opts')
case result of
Left err -> do
TIO.putStrLn $ "Error: " <> err
exitFailure
Right (lineMap, findings) ->
- outputNct opts lineMap findings
+ outputNct opts' lineMap findings
else do
-- Load taint config if provided
- mcfg <- case optTaintConfig opts of
+ mcfg <- case optTaintConfig opts' of
Nothing -> pure (Right emptyConfig)
Just path -> loadTaintConfig path
case mcfg of
@@ -169,16 +186,16 @@ main = do
TIO.putStrLn $ "Error loading taint config: " <> err
exitFailure
Right cfg -> do
- let auditor = selectAuditor opts cfg
- result <- auditor (optInput opts)
+ let auditor = selectAuditor opts' cfg
+ result <- auditor (optInput opts')
case result of
Left err -> do
TIO.putStrLn $ "Error: " <> err
exitFailure
Right ar ->
- if optJson opts
- then outputJson opts ar
- else outputText opts ar
+ if optJson opts'
+ then outputJson opts' ar
+ else outputText opts' ar
where
emptyConfig = TaintConfig Map.empty
@@ -408,3 +425,35 @@ reasonText r = case r of
UnknownBase reg -> "unknown base register " <> regName reg
UnknownIndex reg -> "unknown index register " <> regName reg
NonConstOffset -> "non-constant offset without masking"
+
+-- | Z-encode a human-readable Haskell symbol for GHC assembly lookup.
+--
+-- Input format: @\<package\>:\<Module.Path\>:\<identifier\>@
+--
+-- Output: @_\<z-pkg\>_\<z-mod\>_\<z-id\>_info$def@
+zEncodeSymbol :: Text -> Either Text Text
+zEncodeSymbol input =
+ case T.splitOn ":" input of
+ [pkg, modPath, ident] ->
+ let encoded = T.intercalate "_"
+ [zEncodePart pkg, zEncodePart modPath, zEncodePart ident]
+ in Right ("_" <> encoded <> "_info$def")
+ parts ->
+ Left $ "Invalid symbol format: expected <package>:<Module.Path>:<id>, \
+ \got " <> T.pack (show (length parts)) <> " parts"
+
+-- | Z-encode a single component (package, module path, or identifier).
+-- See GHC's compiler/GHC/Utils/Encoding.hs for the full encoding table.
+zEncodePart :: Text -> Text
+zEncodePart = T.concatMap encodeChar
+ where
+ encodeChar c = case c of
+ '-' -> "zm"
+ '.' -> "zi"
+ '_' -> "zu"
+ 'z' -> "zz"
+ 'Z' -> "ZZ"
+ '$' -> "zd"
+ '\'' -> "zq"
+ '#' -> "zh"
+ _ -> T.singleton c