lmdb

Minimal LMDB bindings for Haskell.
git clone git://git.ppad.tech/lmdb.git
Log | Files | Refs | README | LICENSE

commit d53d51b03e1160de9e3105cdbfd5b482e5025dbd
parent 0c973be28ac9a83e02ec64cf7811aeee03b3e7aa
Author: Jared Tobin <jared@jtobin.io>
Date:   Sun, 24 May 2026 16:35:26 -0230

perf: inline hot paths and combine MDB_val allocations

The Haskell layer over the FFI was using `with` + `alloca` pairs and
shared `cursorAt` with a `Maybe BS.ByteString` dispatch on every cursor
step. None of it costs much per call, but cursor scans iterate it tens
of thousands of times per second.

- INLINE on `check`, `withBSAsVal`, `peekVal`, `get`, `put`, `del`,
  all `cursor*` wrappers, and the `MDB_val` Storable methods. Lets
  `-O1` consumers see through the wrappers at each call site.
- `withBSAsVal` / `peekVal` poke fields directly via `pokeByteOff`
  / `peekByteOff` rather than going through `with` and the Storable
  instance. Same generated code under inlining, but no longer depends
  on it.
- `get`, `put`, `cursorMove`, `cursorSeek` use a single
  `allocaBytes 32` to hold both the input-key and output-value (or
  the two input) `MDB_val` structs side-by-side instead of two
  separate allocations.
- `cursorAt` split into `cursorMove` (no input key — used by
  first/last/next/prev) and an inlined seek body. Removes the
  per-call `Maybe` dispatch that the hot scan path was paying.

Criterion (3s time-limit):
  put 10k inserts   4.319 ms  ->  4.160 ms   (-3.7%)
  get 1k random     332.1 us  ->  312.8 us   (-5.8%)
  get 10k random    3.200 ms  ->  2.971 ms   (-7.2%)
  cursor scan 10k   1.439 ms  ->  1.217 ms   (-15.4%)

Weigh:
  1k put            9,919,040 B  ->  9,782,608 B   (-1.4%)
  1k get            5,063,024 B  ->  4,922,792 B   (-2.8%)
  scan 1k entries     423,448 B  ->    267,040 B   (-37%)

Test suite still passes.

Also bundles unrelated doc tidy-ups: README badges, dev section
pointing at `nix develop`, cabal synopsis/description reflow.

Diffstat:
MREADME.md | 33++++++++++++++++++++++++++++-----
Mlib/Database/LMDB.hs | 230+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Mlib/Database/LMDB/Internal.hs | 34+++++++++++++++++++---------------
Mppad-lmdb.cabal | 10+++++-----
4 files changed, 214 insertions(+), 93 deletions(-)

diff --git a/README.md b/README.md @@ -1,6 +1,8 @@ # lmdb +[![](https://img.shields.io/hackage/v/ppad-lmdb?color=blue)](https://hackage.haskell.org/package/ppad-lmdb) ![](https://img.shields.io/badge/license-MIT-brightgreen) +[![](https://img.shields.io/badge/haddock-lmdb-lightblue)](https://docs.ppad.tech/lmdb) Minimal Haskell bindings to [LMDB][lmdb] (Lightning Memory-Mapped Database), an embedded ACID key-value store. LMDB is single-writer, @@ -20,7 +22,8 @@ A sample GHCi session: > import qualified Database.LMDB as L > > -- open an environment + dbi, write a value, read it back - > L.withEnv "/tmp/mydb" L.defaultEnvFlags { L.envNoSubdir = True } $ \env -> do + > let flags = L.defaultEnvFlags { L.envNoSubdir = True } + > L.withEnv "/tmp/mydb" flags $ \env -> do > L.withWriteTxn env $ \txn -> do > dbi <- L.openDbi txn Nothing True > L.put txn dbi "hello" "world" @@ -30,12 +33,32 @@ A sample GHCi session: Just "world" ``` -Read-only transactions and read-write transactions are distinguished -at the type level via a phantom parameter, so writing from a read-only -transaction is a compile-time error. +Read-only and read-write transactions are distinguished at the type +level via a phantom parameter, so writing from a read-only transaction +is a compile-time error. ## Documentation -Haddocks are exported from the `Database.LMDB` module. +Haddocks (API documentation, etc.) are hosted at +[docs.ppad.tech/lmdb](https://docs.ppad.tech/lmdb). + +## Development + +You'll require [Nix][nixos] with [flake][flake] support enabled. Enter a +development shell with: + +``` +$ nix develop +``` + +Then do e.g.: + +``` +$ cabal repl ppad-lmdb +``` + +to get a REPL for the main library. [lmdb]: https://www.symas.com/lmdb +[nixos]: https://nixos.org/ +[flake]: https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html diff --git a/lib/Database/LMDB.hs b/lib/Database/LMDB.hs @@ -1,4 +1,5 @@ {-# OPTIONS_HADDOCK prune #-} +{-# LANGUAGE BangPatterns #-} {-# LANGUAGE EmptyDataDecls #-} -- | @@ -9,8 +10,8 @@ -- -- Minimal bindings to -- [LMDB](https://www.symas.com/lmdb), an embedded ACID key-value --- store. The upstream LMDB C source is vendored in this package; no --- external @liblmdb@ is needed. +-- store. Links against the system @liblmdb@ via @pkg-config@; the +-- bundled nix flake provides one. -- -- LMDB has a single-writer, many-reader transaction model and uses -- memory mapping for zero-copy reads. This module presents that as a @@ -20,36 +21,36 @@ -- level via a phantom parameter on 'Txn', so using a write primitive -- (e.g. 'put') on a read transaction is a compile-time error. -- --- Most operations throw 'LMDBException' on error. 'get' is the --- exception: it returns 'Nothing' on 'LMDBNotFound' since missing --- keys are normal control flow. +-- Most operations throw 'LMDBException' on error. 'get' and 'del' are +-- the exceptions: they map a missing key onto 'Nothing' and 'False' +-- respectively, since absence is normal control flow. module Database.LMDB ( - -- * environments + -- * Environments Env - , EnvFlags (..) + , EnvFlags(..) , defaultEnvFlags , open , close , withEnv - -- * transactions + -- * Transactions , Txn , RO , RW , withReadTxn , withWriteTxn - -- * databases + -- * Databases , Dbi , openDbi - -- * key-value ops + -- * Key-value operations , get , put , del - -- * cursors + -- * Cursors , Cursor , withCursor , cursorFirst @@ -58,8 +59,8 @@ module Database.LMDB ( , cursorPrev , cursorSeek - -- * errors - , LMDBException (..) + -- * Errors + , LMDBException(..) ) where import Control.Exception @@ -67,16 +68,15 @@ import Control.Exception import qualified Data.ByteString as BS import qualified Data.ByteString.Unsafe as BU import Foreign.C.String (peekCString, withCString) -import Foreign.C.Types (CInt, CUInt) -import Foreign.Marshal.Alloc (alloca) -import Foreign.Marshal.Utils (with) -import Foreign.Ptr (Ptr, nullPtr) -import Foreign.Storable (peek) +import Foreign.C.Types (CInt, CSize, CUInt) +import Foreign.Marshal.Alloc (alloca, allocaBytes) +import Foreign.Ptr (Ptr, nullPtr, plusPtr) +import Foreign.Storable (peek, peekByteOff, pokeByteOff) import Database.LMDB.Internal -- handles -------------------------------------------------------------------- --- | An LMDB environment. Roughly an open database file (or, with +-- | An LMDB environment. Roughly, an open database file (or, with -- 'envNoSubdir' off, a directory containing one). newtype Env = Env (Ptr MDB_env) @@ -87,10 +87,11 @@ data RO data RW -- | A transaction. The phantom @s@ distinguishes 'RO' from 'RW' so --- write operations refuse to type-check against read transactions. +-- that write operations refuse to type-check against read-only +-- transactions. newtype Txn s = Txn (Ptr MDB_txn) --- | A database identifier within an environment. +-- | A database identifier within an 'Env'. newtype Dbi = Dbi MDB_dbi -- | A cursor for range-scanning a 'Dbi'. @@ -108,17 +109,20 @@ data EnvFlags = EnvFlags -- ^ maximum number of named sub-databases. Default 1. , envNoSubdir :: !Bool -- ^ if 'True', treat the path as a regular file rather than a - -- directory containing @data.mdb@/@lock.mdb@. + -- directory containing @data.mdb@ and @lock.mdb@. , envReadOnly :: !Bool -- ^ open the environment read-only. , envNoSync :: !Bool -- ^ skip @fsync@ at commit time. Faster, but a crash may lose -- the last transaction. The database remains internally -- consistent. - } deriving (Show) + } deriving Show --- | Default configuration: 10 MiB map, one db, directory layout, +-- | Default 'EnvFlags': 10 MiB map, one database, directory layout, -- read-write, syncing at commit. +-- +-- >>> envMapSize defaultEnvFlags +-- 10485760 defaultEnvFlags :: EnvFlags defaultEnvFlags = EnvFlags { envMapSize = 10 * 1024 * 1024 @@ -140,15 +144,15 @@ envFlagBits EnvFlags { envNoSubdir = nsd, envReadOnly = ro, envNoSync = ns } = -- raw error code plus the message from @mdb_strerror@. data LMDBException = LMDBKeyExist - | LMDBNotFound -- ^ only seen via 'Internal'; the safe - -- layer maps this to @Nothing@/@False@ - -- instead of throwing + | LMDBNotFound -- ^ Only seen via "Database.LMDB.Internal"; + -- the safe layer maps this to 'Nothing' + -- or 'False' instead of throwing. | LMDBMapFull | LMDBCorrupted | LMDBPanic | LMDBVersionMismatch | LMDBOther !Int !String - deriving (Show) + deriving Show instance Exception LMDBException @@ -167,6 +171,7 @@ throwLmdb code check :: CInt -> IO () check 0 = pure () check c = throwLmdb c +{-# INLINE check #-} -- environment lifecycle ------------------------------------------------------ @@ -175,6 +180,8 @@ check c = throwLmdb c -- the file's parent directory must exist). -- -- Throws 'LMDBException' on failure. +-- +-- >>> env <- open "/tmp/mydb" defaultEnvFlags { envNoSubdir = True } open :: FilePath -> EnvFlags -> IO Env open path flags = mask $ \restore -> do envp <- alloca $ \pp -> do @@ -195,8 +202,10 @@ open path flags = mask $ \restore -> do close :: Env -> IO () close (Env envp) = mdb_env_close envp --- | Bracketed environment lifecycle: open, run the action, close on --- exit (including async exceptions). +-- | Bracketed environment lifecycle: 'open' the environment, run the +-- action, 'close' on exit (including async exceptions). +-- +-- >>> withEnv "/tmp/mydb" defaultEnvFlags $ \env -> ... withEnv :: FilePath -> EnvFlags -> (Env -> IO a) -> IO a withEnv path flags = bracket (open path flags) close @@ -209,6 +218,8 @@ beginTxn (Env envp) flags = alloca $ \pp -> do -- | Run an action in a read-only transaction. The transaction is -- aborted on exit; read transactions have no work to commit. +-- +-- >>> withReadTxn env $ \txn -> ... withReadTxn :: Env -> (Txn RO -> IO a) -> IO a withReadTxn env act = bracketOnError (beginTxn env _MDB_RDONLY) @@ -220,6 +231,8 @@ withReadTxn env act = bracketOnError -- | Run an action in a read-write transaction. Commits if the action -- returns normally; aborts on exception. +-- +-- >>> withWriteTxn env $ \txn -> ... withWriteTxn :: Env -> (Txn RW -> IO a) -> IO a withWriteTxn env act = bracketOnError (beginTxn env 0) @@ -232,10 +245,16 @@ withWriteTxn env act = bracketOnError -- databases ------------------------------------------------------------------ -- | Open a database within a transaction. Pass 'Nothing' for the --- unnamed default database. If @create@ is 'True', the database is --- created if it does not already exist — and the transaction must be --- a read-write transaction. -openDbi :: Txn s -> Maybe BS.ByteString -> Bool -> IO Dbi +-- unnamed default database. If the @create@ flag is 'True', the +-- database is created if it does not already exist — and the +-- transaction must be a read-write transaction. +-- +-- >>> dbi <- openDbi txn Nothing True +openDbi + :: Txn s + -> Maybe BS.ByteString -- ^ database name; 'Nothing' for the default + -> Bool -- ^ create if not present + -> IO Dbi openDbi (Txn txnp) mname create = alloca $ \pp -> do let flags = if create then _MDB_CREATE else 0 go cstr = do @@ -247,15 +266,26 @@ openDbi (Txn txnp) mname create = alloca $ \pp -> do -- bytestring <-> MDB_val helpers -------------------------------------------- +-- An MDB_val is two words: a CSize length at offset 0, a payload Ptr +-- at offset 8 (on 64-bit). We poke fields directly rather than going +-- through the Storable instance so hot paths don't depend on it being +-- inlined. + withBSAsVal :: BS.ByteString -> (Ptr MDB_val -> IO a) -> IO a withBSAsVal bs k = BU.unsafeUseAsCStringLen bs $ \(p, n) -> - with (MDB_val (fromIntegral n) p) k + allocaBytes 16 $ \vp -> do + pokeByteOff vp 0 (fromIntegral n :: CSize) + pokeByteOff vp 8 p + k vp +{-# INLINE withBSAsVal #-} peekVal :: Ptr MDB_val -> IO BS.ByteString peekVal vp = do - MDB_val sz pd <- peek vp + sz <- peekByteOff vp 0 :: IO CSize + pd <- peekByteOff vp 8 BS.packCStringLen (pd, fromIntegral sz) +{-# INLINE peekVal #-} -- key-value ops -------------------------------------------------------------- @@ -264,27 +294,59 @@ peekVal vp = do -- -- The returned 'BS.ByteString' is a copy of the data; LMDB's -- memory-mapped buffer is not aliased into Haskell. -get :: Txn s -> Dbi -> BS.ByteString -> IO (Maybe BS.ByteString) +-- +-- >>> get txn dbi "hello" +-- Just "world" +get + :: Txn s + -> Dbi + -> BS.ByteString -- ^ key + -> IO (Maybe BS.ByteString) -- ^ value, if present get (Txn txnp) (Dbi dbi) k = - withBSAsVal k $ \kp -> - alloca $ \vp -> do + BU.unsafeUseAsCStringLen k $ \(kdata, klen) -> + allocaBytes 32 $ \kp -> do + let !vp = kp `plusPtr` 16 + pokeByteOff kp 0 (fromIntegral klen :: CSize) + pokeByteOff kp 8 kdata code <- mdb_get txnp dbi kp vp if code == _MDB_NOTFOUND then pure Nothing else do check code Just <$> peekVal vp +{-# INLINE get #-} -- | Insert or replace a key. Throws 'LMDBException' on failure. -put :: Txn RW -> Dbi -> BS.ByteString -> BS.ByteString -> IO () +-- +-- >>> put txn dbi "hello" "world" +put + :: Txn RW + -> Dbi + -> BS.ByteString -- ^ key + -> BS.ByteString -- ^ value + -> IO () put (Txn txnp) (Dbi dbi) k v = - withBSAsVal k $ \kp -> - withBSAsVal v $ \vp -> + BU.unsafeUseAsCStringLen k $ \(kdata, klen) -> + BU.unsafeUseAsCStringLen v $ \(vdata, vlen) -> + allocaBytes 32 $ \kp -> do + let !vp = kp `plusPtr` 16 + pokeByteOff kp 0 (fromIntegral klen :: CSize) + pokeByteOff kp 8 kdata + pokeByteOff vp 0 (fromIntegral vlen :: CSize) + pokeByteOff vp 8 vdata check =<< mdb_put txnp dbi kp vp 0 +{-# INLINE put #-} -- | Delete a key. Returns 'True' if the key existed and was removed, -- 'False' if it was already absent. -del :: Txn RW -> Dbi -> BS.ByteString -> IO Bool +-- +-- >>> del txn dbi "hello" +-- True +del + :: Txn RW + -> Dbi + -> BS.ByteString -- ^ key + -> IO Bool -- ^ 'True' if the key was present and removed del (Txn txnp) (Dbi dbi) k = withBSAsVal k $ \kp -> do code <- mdb_del txnp dbi kp nullPtr @@ -292,10 +354,15 @@ del (Txn txnp) (Dbi dbi) k = else do check code pure True +{-# INLINE del #-} -- cursors -------------------------------------------------------------------- --- | Bracketed cursor lifecycle. +-- | Bracketed cursor lifecycle: open a cursor over the given 'Dbi', +-- run the action, close the cursor on exit (including async +-- exceptions). +-- +-- >>> withCursor txn dbi $ \cur -> ... withCursor :: Txn s -> Dbi -> (Cursor s -> IO a) -> IO a withCursor (Txn txnp) (Dbi dbi) act = bracket open' mdb_cursor_close use' where @@ -304,45 +371,72 @@ withCursor (Txn txnp) (Dbi dbi) act = bracket open' mdb_cursor_close use' peek pp use' p = act (Cursor p) -cursorAt +-- Scan-position helper (no input key): one 32-byte buffer for both +-- output MDB_vals. +cursorMove :: Cursor s -> CInt -- MDB_cursor_op - -> Maybe BS.ByteString -> IO (Maybe (BS.ByteString, BS.ByteString)) -cursorAt (Cursor cp) op mk = case mk of - Nothing -> alloca $ \kp -> alloca $ \vp -> step kp vp - Just bs -> withBSAsVal bs $ \kp -> alloca $ \vp -> step kp vp - where - step kp vp = do - code <- mdb_cursor_get cp kp vp op - if code == _MDB_NOTFOUND - then pure Nothing - else do - check code - k <- peekVal kp - v <- peekVal vp - pure (Just (k, v)) +cursorMove (Cursor cp) op = + allocaBytes 32 $ \kp -> do + let !vp = kp `plusPtr` 16 + code <- mdb_cursor_get cp kp vp op + if code == _MDB_NOTFOUND + then pure Nothing + else do + check code + k <- peekVal kp + v <- peekVal vp + pure (Just (k, v)) +{-# INLINE cursorMove #-} -- | Position at the first key/value pair, or 'Nothing' on an empty -- database. +-- +-- >>> cursorFirst cur +-- Just ("a","alpha") cursorFirst :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) -cursorFirst c = cursorAt c _MDB_FIRST Nothing +cursorFirst c = cursorMove c _MDB_FIRST +{-# INLINE cursorFirst #-} --- | Position at the last key/value pair. +-- | Position at the last key/value pair, or 'Nothing' on an empty +-- database. cursorLast :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) -cursorLast c = cursorAt c _MDB_LAST Nothing +cursorLast c = cursorMove c _MDB_LAST +{-# INLINE cursorLast #-} --- | Advance to the next pair. +-- | Advance to the next key/value pair, or 'Nothing' if already at +-- the end. cursorNext :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) -cursorNext c = cursorAt c _MDB_NEXT Nothing +cursorNext c = cursorMove c _MDB_NEXT +{-# INLINE cursorNext #-} --- | Step back to the previous pair. +-- | Step back to the previous key/value pair, or 'Nothing' if already +-- at the start. cursorPrev :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) -cursorPrev c = cursorAt c _MDB_PREV Nothing +cursorPrev c = cursorMove c _MDB_PREV +{-# INLINE cursorPrev #-} --- | Position at the first key >= the given key. +-- | Position at the first key /greater than or equal to/ the given +-- key, or 'Nothing' if no such key exists. +-- +-- >>> cursorSeek cur "m" +-- Just ("n","november") cursorSeek :: Cursor s -> BS.ByteString -> IO (Maybe (BS.ByteString, BS.ByteString)) -cursorSeek c k = cursorAt c _MDB_SET_RANGE (Just k) +cursorSeek (Cursor cp) k = + BU.unsafeUseAsCStringLen k $ \(kdata, klen) -> + allocaBytes 32 $ \kp -> do + let !vp = kp `plusPtr` 16 + pokeByteOff kp 0 (fromIntegral klen :: CSize) + pokeByteOff kp 8 kdata + code <- mdb_cursor_get cp kp vp _MDB_SET_RANGE + if code == _MDB_NOTFOUND + then pure Nothing + else do + check code + ok <- peekVal kp + ov <- peekVal vp + pure (Just (ok, ov)) diff --git a/lib/Database/LMDB/Internal.hs b/lib/Database/LMDB/Internal.hs @@ -8,34 +8,34 @@ -- Maintainer: Jared Tobin <jared@ppad.tech> -- -- Raw FFI declarations and constants for the LMDB C API. This module --- is exported for power-users; prefer "Database.LMDB" for the safe, +-- is exposed for power users; prefer "Database.LMDB" for the safe, -- bracketed interface. module Database.LMDB.Internal ( - -- * opaque handles + -- * Opaque handles MDB_env , MDB_txn , MDB_dbi , MDB_cursor - -- * value pair - , MDB_val (..) + -- * Value pair + , MDB_val(..) - -- * environment flags + -- * Environment flags , _MDB_NOSUBDIR , _MDB_NOSYNC , _MDB_RDONLY , _MDB_NOMETASYNC , _MDB_NOTLS - -- * database open flags + -- * Database open flags , _MDB_CREATE - -- * put flags + -- * Put flags , _MDB_NOOVERWRITE , _MDB_APPEND - -- * cursor ops + -- * Cursor operations , _MDB_FIRST , _MDB_LAST , _MDB_NEXT @@ -44,7 +44,7 @@ module Database.LMDB.Internal ( , _MDB_SET_RANGE , _MDB_GET_CURRENT - -- * error codes + -- * Error codes , _MDB_SUCCESS , _MDB_KEYEXIST , _MDB_NOTFOUND @@ -57,7 +57,7 @@ module Database.LMDB.Internal ( , _MDB_BAD_VALSIZE , _MDB_BAD_DBI - -- * environment lifecycle + -- * Environment lifecycle , mdb_env_create , mdb_env_open , mdb_env_close @@ -65,26 +65,26 @@ module Database.LMDB.Internal ( , mdb_env_set_maxdbs , mdb_env_sync - -- * transactions + -- * Transactions , mdb_txn_begin , mdb_txn_commit , mdb_txn_abort - -- * databases + -- * Databases , mdb_dbi_open , mdb_dbi_close - -- * key-value ops + -- * Key-value operations , mdb_get , mdb_put , mdb_del - -- * cursors + -- * Cursors , mdb_cursor_open , mdb_cursor_close , mdb_cursor_get - -- * diagnostics + -- * Diagnostics , mdb_strerror ) where @@ -115,14 +115,18 @@ data MDB_val = MDB_val instance Storable MDB_val where sizeOf _ = 2 * sizeOf (undefined :: Ptr ()) + {-# INLINE sizeOf #-} alignment _ = alignment (undefined :: Ptr ()) + {-# INLINE alignment #-} peek p = do sz <- peekByteOff p 0 pd <- peekByteOff p (sizeOf (undefined :: Ptr ())) pure (MDB_val sz pd) + {-# INLINE peek #-} poke p (MDB_val sz pd) = do pokeByteOff p 0 sz pokeByteOff p (sizeOf (undefined :: Ptr ())) pd + {-# INLINE poke #-} -- environment flags ---------------------------------------------------------- diff --git a/ppad-lmdb.cabal b/ppad-lmdb.cabal @@ -1,7 +1,7 @@ cabal-version: 3.0 name: ppad-lmdb version: 0.1.0 -synopsis: Minimal bindings to LMDB +synopsis: Minimal bindings to LMDB. license: MIT license-file: LICENSE author: Jared Tobin @@ -10,11 +10,11 @@ category: Database build-type: Simple tested-with: GHC == 9.10.3 extra-doc-files: CHANGELOG - description: - Minimal bindings to LMDB (Lightning Memory-Mapped Database), an - embedded ACID key-value store. Links against the system liblmdb - via pkg-config; the bundled nix flake provides one. + Minimal bindings to + [LMDB](https://www.symas.com/lmdb) (Lightning Memory-Mapped + Database), an embedded ACID key-value store. Links against the system + liblmdb via pkg-config; the bundled nix flake provides one. flag llvm description: Use GHC's LLVM backend.