lmdb

Minimal LMDB bindings for Haskell.
git clone git://git.ppad.tech/lmdb.git
Log | Files | Refs | README | LICENSE

commit 0c973be28ac9a83e02ec64cf7811aeee03b3e7aa
Author: Jared Tobin <jared@jtobin.io>
Date:   Sun, 24 May 2026 16:17:57 -0230

init: minimal LMDB bindings

A first cut at ppad-lmdb. Wraps the system liblmdb (via
pkg-config, provided by pkgs.lmdb in the flake) and exposes a
safe, bracketed interface keyed on strict ByteStrings.

Surface (Database.LMDB):
- Env / open / close / withEnv with an EnvFlags record
- Phantom-typed transactions: withReadTxn (Txn RO) and
  withWriteTxn (Txn RW). Writing through an RO Txn fails to
  type-check
- openDbi, get / put / del
- Cursors: withCursor, cursorFirst / Last / Next / Prev,
  cursorSeek (MDB_SET_RANGE)
- LMDBException with the common cases tagged plus a catch-all

Database.LMDB.Internal exposes the raw foreign-import-capi
declarations and flag constants for power users.

Tests (tasty): six unit cases (roundtrip, missing key, del
truthiness, write-txn rollback on exception, cursor walk,
cursorSeek) plus two QuickCheck properties (put/get roundtrip,
cursor enumerates Map.toAscList).

Benches: criterion bench/Main.hs covers put/get/cursor at 1k/10k
entries; weigh bench/Weight.hs reports per-op allocations.

Note: sanitize flag intentionally omitted - we vendor no C, so
clang doesn't add the ASan runtime to the link line and the flag
would only give a false signal of safety.

Diffstat:
A.gitignore | 6++++++
ACHANGELOG | 6++++++
ALICENSE | 20++++++++++++++++++++
AREADME.md | 41+++++++++++++++++++++++++++++++++++++++++
Abench/Main.hs | 84+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abench/Weight.hs | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aflake.lock | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aflake.nix | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/Database/LMDB.hs | 348+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/Database/LMDB/Internal.hs | 276+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Appad-lmdb.cabal | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/Main.hs | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
12 files changed, 1278 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,6 @@ +dist-newstyle/ +result +result-doc +*.hi +*.o +.direnv/ diff --git a/CHANGELOG b/CHANGELOG @@ -0,0 +1,6 @@ +# Changelog + +- 0.1.0 (unreleased) + * Initial release: minimal bindings to OpenLDAP LMDB. Supports + environments, transactions (phantom-typed read-only vs read-write), + databases, get / put / del, and cursor-based range scans. diff --git a/LICENSE b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2026 Jared Tobin + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md @@ -0,0 +1,41 @@ +# lmdb + +![](https://img.shields.io/badge/license-MIT-brightgreen) + +Minimal Haskell bindings to [LMDB][lmdb] (Lightning Memory-Mapped +Database), an embedded ACID key-value store. LMDB is single-writer, +zero-copy on reads, MMAP-backed, and has no server process; this +library exposes just enough of its C API to use it as a persistent +key-value blob store. + +Links against the system `liblmdb` via `pkg-config`. The bundled +`flake.nix` provides one via `pkgs.lmdb`. + +## Usage + +A sample GHCi session: + +``` + > :set -XOverloadedStrings + > import qualified Database.LMDB as L + > + > -- open an environment + dbi, write a value, read it back + > L.withEnv "/tmp/mydb" L.defaultEnvFlags { L.envNoSubdir = True } $ \env -> do + > L.withWriteTxn env $ \txn -> do + > dbi <- L.openDbi txn Nothing True + > L.put txn dbi "hello" "world" + > L.withReadTxn env $ \txn -> do + > dbi <- L.openDbi txn Nothing False + > L.get txn dbi "hello" + Just "world" +``` + +Read-only transactions and read-write transactions are distinguished +at the type level via a phantom parameter, so writing from a read-only +transaction is a compile-time error. + +## Documentation + +Haddocks are exported from the `Database.LMDB` module. + +[lmdb]: https://www.symas.com/lmdb diff --git a/bench/Main.hs b/bench/Main.hs @@ -0,0 +1,84 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +import Control.Monad (forM_) +import Criterion.Main +import qualified Data.ByteString as BS +import qualified Data.ByteString.Builder as BB +import qualified Data.ByteString.Lazy as BSL +import qualified Database.LMDB as L +import System.Directory + ( createDirectory + , doesDirectoryExist + , getTemporaryDirectory + , removeDirectoryRecursive + ) +import System.FilePath ((</>)) + +main :: IO () +main = do + tmpRoot <- getTemporaryDirectory + let dir = tmpRoot </> "ppad-lmdb-bench" + exists <- doesDirectoryExist dir + if exists then removeDirectoryRecursive dir else pure () + createDirectory dir + let flags = L.defaultEnvFlags + { L.envMapSize = 128 * 1024 * 1024 + , L.envMaxDbs = 4 + } + L.withEnv dir flags $ \env -> do + -- pre-populate + L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing True + forM_ (kvs 10000) $ \(k, v) -> L.put txn dbi k v + defaultMain + [ bgroup "put" + [ bench "1k inserts (one txn)" $ whnfIO (insert env 1000) + , bench "10k inserts (one txn)" $ whnfIO (insert env 10000) + ] + , bgroup "get" + [ bench "1k random hits" $ whnfIO (readN env 1000) + , bench "10k random hits" $ whnfIO (readN env 10000) + ] + , bgroup "cursor" + [ bench "scan 10k entries" $ whnfIO (scanAll env) + ] + ] + removeDirectoryRecursive dir + +-- key/value helpers ---------------------------------------------------------- + +kvs :: Int -> [(BS.ByteString, BS.ByteString)] +kvs n = [ (mkKey i, mkVal i) | i <- [0 .. n - 1] ] + +mkKey :: Int -> BS.ByteString +mkKey i = BSL.toStrict (BB.toLazyByteString (BB.string7 "k" <> BB.intDec i)) + +mkVal :: Int -> BS.ByteString +mkVal i = BSL.toStrict (BB.toLazyByteString (BB.string7 "v" <> BB.intDec i)) + +-- workloads ------------------------------------------------------------------ + +insert :: L.Env -> Int -> IO () +insert env n = L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + forM_ (kvs n) $ \(k, v) -> L.put txn dbi k v + +readN :: L.Env -> Int -> IO () +readN env n = L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + forM_ [0 .. n - 1] $ \i -> do + _ <- L.get txn dbi (mkKey i) + pure () + +scanAll :: L.Env -> IO Int +scanAll env = L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.withCursor txn dbi $ \cur -> do + let go !acc Nothing = pure acc + go !acc (Just _) = L.cursorNext cur >>= go (acc + 1) + mfirst <- L.cursorFirst cur + go (0 :: Int) mfirst + diff --git a/bench/Weight.hs b/bench/Weight.hs @@ -0,0 +1,70 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +import Control.Monad (forM_) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Builder as BB +import qualified Data.ByteString.Lazy as BSL +import qualified Database.LMDB as L +import System.Directory + ( createDirectory + , doesDirectoryExist + , getTemporaryDirectory + , removeDirectoryRecursive + ) +import System.FilePath ((</>)) +import Weigh + +main :: IO () +main = do + tmpRoot <- getTemporaryDirectory + let dir = tmpRoot </> "ppad-lmdb-weigh" + exists <- doesDirectoryExist dir + if exists then removeDirectoryRecursive dir else pure () + createDirectory dir + let envFlags = L.defaultEnvFlags + { L.envMapSize = 64 * 1024 * 1024 + , L.envMaxDbs = 4 + } + L.withEnv dir envFlags $ \env -> do + L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing True + forM_ (kvs 1000) $ \(k, v) -> L.put txn dbi k v + mainWith $ do + io "1k put (one txn)" (insert env) 1000 + io "1k get (one txn)" (readN env) 1000 + io "scan 1k entries" scanAll env + existsAfter <- doesDirectoryExist dir + if existsAfter then removeDirectoryRecursive dir else pure () + +kvs :: Int -> [(BS.ByteString, BS.ByteString)] +kvs n = [ (mkKey i, mkVal i) | i <- [0 .. n - 1] ] + +mkKey :: Int -> BS.ByteString +mkKey i = BSL.toStrict (BB.toLazyByteString (BB.string7 "k" <> BB.intDec i)) + +mkVal :: Int -> BS.ByteString +mkVal i = BSL.toStrict (BB.toLazyByteString (BB.string7 "v" <> BB.intDec i)) + +insert :: L.Env -> Int -> IO () +insert env n = L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + forM_ (kvs n) $ \(k, v) -> L.put txn dbi k v + +readN :: L.Env -> Int -> IO () +readN env n = L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + forM_ [0 .. n - 1] $ \i -> do + _ <- L.get txn dbi (mkKey i) + pure () + +scanAll :: L.Env -> IO Int +scanAll env = L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.withCursor txn dbi $ \cur -> do + mfirst <- L.cursorFirst cur + let go !acc Nothing = pure acc + go !acc (Just _) = L.cursorNext cur >>= go (acc + 1) + go (0 :: Int) mfirst diff --git a/flake.lock b/flake.lock @@ -0,0 +1,88 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1766840161, + "narHash": "sha256-Ss/LHpJJsng8vz1Pe33RSGIWUOcqM1fjrehjUkdrWio=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "3edc4a30ed3903fdf6f90c837f961fa6b49582d1", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "ppad-nixpkgs": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1766932084, + "narHash": "sha256-GvVsbTfW+B7IQ9K/QP2xcXJAm1lhBin1jYZWNjOzT+o=", + "ref": "master", + "rev": "353e61763b959b960a55321a85423501e3e9ed7a", + "revCount": 2, + "type": "git", + "url": "git://git.ppad.tech/nixpkgs.git" + }, + "original": { + "ref": "master", + "type": "git", + "url": "git://git.ppad.tech/nixpkgs.git" + } + }, + "root": { + "inputs": { + "flake-utils": [ + "ppad-nixpkgs", + "flake-utils" + ], + "nixpkgs": [ + "ppad-nixpkgs", + "nixpkgs" + ], + "ppad-nixpkgs": "ppad-nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix @@ -0,0 +1,66 @@ +{ + description = "Minimal LMDB bindings"; + + inputs = { + ppad-nixpkgs = { + type = "git"; + url = "git://git.ppad.tech/nixpkgs.git"; + ref = "master"; + }; + flake-utils.follows = "ppad-nixpkgs/flake-utils"; + nixpkgs.follows = "ppad-nixpkgs/nixpkgs"; + }; + + outputs = { self, nixpkgs, flake-utils, ppad-nixpkgs }: + flake-utils.lib.eachDefaultSystem (system: + let + lib = "ppad-lmdb"; + + pkgs = import nixpkgs { inherit system; }; + hlib = pkgs.haskell.lib; + llvm = pkgs.llvmPackages_19.llvm; + clang = pkgs.llvmPackages_19.clang; + + hpkgs = pkgs.haskell.packages.ghc910.extend (new: old: { + ${lib} = new.callCabal2nix lib ./. { }; + }); + + cc = pkgs.stdenv.cc; + ghc = hpkgs.ghc; + cabal = hpkgs.cabal-install; + lmdb = pkgs.lmdb; + pkgconfig = pkgs.pkg-config; + in + { + packages.default = hpkgs.${lib}; + + packages.haddock = hpkgs.${lib}.doc; + + devShells.default = hpkgs.shellFor { + packages = p: [ + (hlib.doBenchmark p.${lib}) + ]; + + buildInputs = [ + cabal + cc + llvm + lmdb + pkgconfig + ]; + + doBenchmark = true; + + shellHook = '' + PS1="[${lib}] \w$ " + echo "entering ${system} shell, using" + echo "cc: $(${cc}/bin/cc --version | head -1)" + echo "ghc: $(${ghc}/bin/ghc --version)" + echo "cabal: $(${cabal}/bin/cabal --version | head -1)" + echo "llc: $(${llvm}/bin/llc --version | head -2 | tail -1)" + echo "lmdb: ${lmdb.version}" + ''; + }; + } + ); +} diff --git a/lib/Database/LMDB.hs b/lib/Database/LMDB.hs @@ -0,0 +1,348 @@ +{-# OPTIONS_HADDOCK prune #-} +{-# LANGUAGE EmptyDataDecls #-} + +-- | +-- Module: Database.LMDB +-- Copyright: (c) 2026 Jared Tobin +-- License: MIT +-- Maintainer: Jared Tobin <jared@ppad.tech> +-- +-- Minimal bindings to +-- [LMDB](https://www.symas.com/lmdb), an embedded ACID key-value +-- store. The upstream LMDB C source is vendored in this package; no +-- external @liblmdb@ is needed. +-- +-- LMDB has a single-writer, many-reader transaction model and uses +-- memory mapping for zero-copy reads. This module presents that as a +-- bracketed @IO@-only interface. +-- +-- Read-only and read-write transactions are distinguished at the type +-- level via a phantom parameter on 'Txn', so using a write primitive +-- (e.g. 'put') on a read transaction is a compile-time error. +-- +-- Most operations throw 'LMDBException' on error. 'get' is the +-- exception: it returns 'Nothing' on 'LMDBNotFound' since missing +-- keys are normal control flow. + +module Database.LMDB ( + -- * environments + Env + , EnvFlags (..) + , defaultEnvFlags + , open + , close + , withEnv + + -- * transactions + , Txn + , RO + , RW + , withReadTxn + , withWriteTxn + + -- * databases + , Dbi + , openDbi + + -- * key-value ops + , get + , put + , del + + -- * cursors + , Cursor + , withCursor + , cursorFirst + , cursorLast + , cursorNext + , cursorPrev + , cursorSeek + + -- * errors + , LMDBException (..) + ) where + +import Control.Exception + ( Exception, bracket, bracketOnError, mask, onException, throwIO) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Unsafe as BU +import Foreign.C.String (peekCString, withCString) +import Foreign.C.Types (CInt, CUInt) +import Foreign.Marshal.Alloc (alloca) +import Foreign.Marshal.Utils (with) +import Foreign.Ptr (Ptr, nullPtr) +import Foreign.Storable (peek) +import Database.LMDB.Internal + +-- handles -------------------------------------------------------------------- + +-- | An LMDB environment. Roughly an open database file (or, with +-- 'envNoSubdir' off, a directory containing one). +newtype Env = Env (Ptr MDB_env) + +-- | Phantom tag for read-only transactions. +data RO + +-- | Phantom tag for read-write transactions. +data RW + +-- | A transaction. The phantom @s@ distinguishes 'RO' from 'RW' so +-- write operations refuse to type-check against read transactions. +newtype Txn s = Txn (Ptr MDB_txn) + +-- | A database identifier within an environment. +newtype Dbi = Dbi MDB_dbi + +-- | A cursor for range-scanning a 'Dbi'. +newtype Cursor s = Cursor (Ptr MDB_cursor) + +-- environment flags ---------------------------------------------------------- + +-- | Configuration for 'open'. +data EnvFlags = EnvFlags + { envMapSize :: !Int + -- ^ map size in bytes; LMDB will refuse writes beyond this. The + -- default of 10 MiB is intentionally small — set it to something + -- appropriate for your workload before opening. + , envMaxDbs :: !Int + -- ^ maximum number of named sub-databases. Default 1. + , envNoSubdir :: !Bool + -- ^ if 'True', treat the path as a regular file rather than a + -- directory containing @data.mdb@/@lock.mdb@. + , envReadOnly :: !Bool + -- ^ open the environment read-only. + , envNoSync :: !Bool + -- ^ skip @fsync@ at commit time. Faster, but a crash may lose + -- the last transaction. The database remains internally + -- consistent. + } deriving (Show) + +-- | Default configuration: 10 MiB map, one db, directory layout, +-- read-write, syncing at commit. +defaultEnvFlags :: EnvFlags +defaultEnvFlags = EnvFlags + { envMapSize = 10 * 1024 * 1024 + , envMaxDbs = 1 + , envNoSubdir = False + , envReadOnly = False + , envNoSync = False + } + +envFlagBits :: EnvFlags -> CUInt +envFlagBits EnvFlags { envNoSubdir = nsd, envReadOnly = ro, envNoSync = ns } = + (if nsd then _MDB_NOSUBDIR else 0) + + (if ro then _MDB_RDONLY else 0) + + (if ns then _MDB_NOSYNC else 0) + +-- error handling ------------------------------------------------------------- + +-- | A typed wrapper around LMDB error codes. 'LMDBOther' carries the +-- raw error code plus the message from @mdb_strerror@. +data LMDBException + = LMDBKeyExist + | LMDBNotFound -- ^ only seen via 'Internal'; the safe + -- layer maps this to @Nothing@/@False@ + -- instead of throwing + | LMDBMapFull + | LMDBCorrupted + | LMDBPanic + | LMDBVersionMismatch + | LMDBOther !Int !String + deriving (Show) + +instance Exception LMDBException + +throwLmdb :: CInt -> IO a +throwLmdb code + | code == _MDB_KEYEXIST = throwIO LMDBKeyExist + | code == _MDB_NOTFOUND = throwIO LMDBNotFound + | code == _MDB_MAP_FULL = throwIO LMDBMapFull + | code == _MDB_CORRUPTED = throwIO LMDBCorrupted + | code == _MDB_PANIC = throwIO LMDBPanic + | code == _MDB_VERSION_MISMATCH = throwIO LMDBVersionMismatch + | otherwise = do + msg <- mdb_strerror code >>= peekCString + throwIO (LMDBOther (fromIntegral code) msg) + +check :: CInt -> IO () +check 0 = pure () +check c = throwLmdb c + +-- environment lifecycle ------------------------------------------------------ + +-- | Open an LMDB environment at the given path. The path must already +-- exist (as a directory unless 'envNoSubdir' is set, in which case +-- the file's parent directory must exist). +-- +-- Throws 'LMDBException' on failure. +open :: FilePath -> EnvFlags -> IO Env +open path flags = mask $ \restore -> do + envp <- alloca $ \pp -> do + check =<< mdb_env_create pp + peek pp + let env = Env envp + flip onException (close env) . restore $ do + check =<< mdb_env_set_mapsize envp + (fromIntegral (envMapSize flags)) + check =<< mdb_env_set_maxdbs envp + (fromIntegral (envMaxDbs flags)) + withCString path $ \cpath -> + check =<< mdb_env_open envp cpath (envFlagBits flags) 0o644 + pure env + +-- | Close an environment. Must not be called while transactions are +-- live. +close :: Env -> IO () +close (Env envp) = mdb_env_close envp + +-- | Bracketed environment lifecycle: open, run the action, close on +-- exit (including async exceptions). +withEnv :: FilePath -> EnvFlags -> (Env -> IO a) -> IO a +withEnv path flags = bracket (open path flags) close + +-- transactions --------------------------------------------------------------- + +beginTxn :: Env -> CUInt -> IO (Ptr MDB_txn) +beginTxn (Env envp) flags = alloca $ \pp -> do + check =<< mdb_txn_begin envp nullPtr flags pp + peek pp + +-- | Run an action in a read-only transaction. The transaction is +-- aborted on exit; read transactions have no work to commit. +withReadTxn :: Env -> (Txn RO -> IO a) -> IO a +withReadTxn env act = bracketOnError + (beginTxn env _MDB_RDONLY) + mdb_txn_abort + (\txnp -> do + r <- act (Txn txnp) + mdb_txn_abort txnp + pure r) + +-- | Run an action in a read-write transaction. Commits if the action +-- returns normally; aborts on exception. +withWriteTxn :: Env -> (Txn RW -> IO a) -> IO a +withWriteTxn env act = bracketOnError + (beginTxn env 0) + mdb_txn_abort + (\txnp -> do + r <- act (Txn txnp) + check =<< mdb_txn_commit txnp + pure r) + +-- databases ------------------------------------------------------------------ + +-- | Open a database within a transaction. Pass 'Nothing' for the +-- unnamed default database. If @create@ is 'True', the database is +-- created if it does not already exist — and the transaction must be +-- a read-write transaction. +openDbi :: Txn s -> Maybe BS.ByteString -> Bool -> IO Dbi +openDbi (Txn txnp) mname create = alloca $ \pp -> do + let flags = if create then _MDB_CREATE else 0 + go cstr = do + check =<< mdb_dbi_open txnp cstr flags pp + Dbi <$> peek pp + case mname of + Nothing -> go nullPtr + Just nm -> BU.unsafeUseAsCString nm go + +-- bytestring <-> MDB_val helpers -------------------------------------------- + +withBSAsVal :: BS.ByteString -> (Ptr MDB_val -> IO a) -> IO a +withBSAsVal bs k = + BU.unsafeUseAsCStringLen bs $ \(p, n) -> + with (MDB_val (fromIntegral n) p) k + +peekVal :: Ptr MDB_val -> IO BS.ByteString +peekVal vp = do + MDB_val sz pd <- peek vp + BS.packCStringLen (pd, fromIntegral sz) + +-- key-value ops -------------------------------------------------------------- + +-- | Look up a key. Returns 'Nothing' if not found. Other LMDB errors +-- throw 'LMDBException'. +-- +-- The returned 'BS.ByteString' is a copy of the data; LMDB's +-- memory-mapped buffer is not aliased into Haskell. +get :: Txn s -> Dbi -> BS.ByteString -> IO (Maybe BS.ByteString) +get (Txn txnp) (Dbi dbi) k = + withBSAsVal k $ \kp -> + alloca $ \vp -> do + code <- mdb_get txnp dbi kp vp + if code == _MDB_NOTFOUND + then pure Nothing + else do + check code + Just <$> peekVal vp + +-- | Insert or replace a key. Throws 'LMDBException' on failure. +put :: Txn RW -> Dbi -> BS.ByteString -> BS.ByteString -> IO () +put (Txn txnp) (Dbi dbi) k v = + withBSAsVal k $ \kp -> + withBSAsVal v $ \vp -> + check =<< mdb_put txnp dbi kp vp 0 + +-- | Delete a key. Returns 'True' if the key existed and was removed, +-- 'False' if it was already absent. +del :: Txn RW -> Dbi -> BS.ByteString -> IO Bool +del (Txn txnp) (Dbi dbi) k = + withBSAsVal k $ \kp -> do + code <- mdb_del txnp dbi kp nullPtr + if code == _MDB_NOTFOUND then pure False + else do + check code + pure True + +-- cursors -------------------------------------------------------------------- + +-- | Bracketed cursor lifecycle. +withCursor :: Txn s -> Dbi -> (Cursor s -> IO a) -> IO a +withCursor (Txn txnp) (Dbi dbi) act = bracket open' mdb_cursor_close use' + where + open' = alloca $ \pp -> do + check =<< mdb_cursor_open txnp dbi pp + peek pp + use' p = act (Cursor p) + +cursorAt + :: Cursor s + -> CInt -- MDB_cursor_op + -> Maybe BS.ByteString + -> IO (Maybe (BS.ByteString, BS.ByteString)) +cursorAt (Cursor cp) op mk = case mk of + Nothing -> alloca $ \kp -> alloca $ \vp -> step kp vp + Just bs -> withBSAsVal bs $ \kp -> alloca $ \vp -> step kp vp + where + step kp vp = do + code <- mdb_cursor_get cp kp vp op + if code == _MDB_NOTFOUND + then pure Nothing + else do + check code + k <- peekVal kp + v <- peekVal vp + pure (Just (k, v)) + +-- | Position at the first key/value pair, or 'Nothing' on an empty +-- database. +cursorFirst :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) +cursorFirst c = cursorAt c _MDB_FIRST Nothing + +-- | Position at the last key/value pair. +cursorLast :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) +cursorLast c = cursorAt c _MDB_LAST Nothing + +-- | Advance to the next pair. +cursorNext :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) +cursorNext c = cursorAt c _MDB_NEXT Nothing + +-- | Step back to the previous pair. +cursorPrev :: Cursor s -> IO (Maybe (BS.ByteString, BS.ByteString)) +cursorPrev c = cursorAt c _MDB_PREV Nothing + +-- | Position at the first key >= the given key. +cursorSeek + :: Cursor s + -> BS.ByteString + -> IO (Maybe (BS.ByteString, BS.ByteString)) +cursorSeek c k = cursorAt c _MDB_SET_RANGE (Just k) diff --git a/lib/Database/LMDB/Internal.hs b/lib/Database/LMDB/Internal.hs @@ -0,0 +1,276 @@ +{-# OPTIONS_HADDOCK not-home #-} +{-# LANGUAGE CApiFFI #-} + +-- | +-- Module: Database.LMDB.Internal +-- Copyright: (c) 2026 Jared Tobin +-- License: MIT +-- Maintainer: Jared Tobin <jared@ppad.tech> +-- +-- Raw FFI declarations and constants for the LMDB C API. This module +-- is exported for power-users; prefer "Database.LMDB" for the safe, +-- bracketed interface. + +module Database.LMDB.Internal ( + -- * opaque handles + MDB_env + , MDB_txn + , MDB_dbi + , MDB_cursor + + -- * value pair + , MDB_val (..) + + -- * environment flags + , _MDB_NOSUBDIR + , _MDB_NOSYNC + , _MDB_RDONLY + , _MDB_NOMETASYNC + , _MDB_NOTLS + + -- * database open flags + , _MDB_CREATE + + -- * put flags + , _MDB_NOOVERWRITE + , _MDB_APPEND + + -- * cursor ops + , _MDB_FIRST + , _MDB_LAST + , _MDB_NEXT + , _MDB_PREV + , _MDB_SET + , _MDB_SET_RANGE + , _MDB_GET_CURRENT + + -- * error codes + , _MDB_SUCCESS + , _MDB_KEYEXIST + , _MDB_NOTFOUND + , _MDB_MAP_FULL + , _MDB_CORRUPTED + , _MDB_PANIC + , _MDB_VERSION_MISMATCH + , _MDB_INVALID + , _MDB_BAD_TXN + , _MDB_BAD_VALSIZE + , _MDB_BAD_DBI + + -- * environment lifecycle + , mdb_env_create + , mdb_env_open + , mdb_env_close + , mdb_env_set_mapsize + , mdb_env_set_maxdbs + , mdb_env_sync + + -- * transactions + , mdb_txn_begin + , mdb_txn_commit + , mdb_txn_abort + + -- * databases + , mdb_dbi_open + , mdb_dbi_close + + -- * key-value ops + , mdb_get + , mdb_put + , mdb_del + + -- * cursors + , mdb_cursor_open + , mdb_cursor_close + , mdb_cursor_get + + -- * diagnostics + , mdb_strerror + ) where + +import Foreign.C.String (CString) +import Foreign.C.Types + (CInt(..), CUInt(..), CSize(..), CChar(..)) +import Foreign.Ptr (Ptr) +import Foreign.Storable (Storable(..)) +import System.Posix.Types (CMode(..)) + +-- opaque handles ------------------------------------------------------------- + +data MDB_env +data MDB_txn +data MDB_cursor + +-- | LMDB's database identifier is a plain @unsigned int@ value type, +-- not a pointer. +type MDB_dbi = CUInt + +-- value pair ----------------------------------------------------------------- + +-- | The key-or-value pair LMDB passes across the FFI boundary. +data MDB_val = MDB_val + { mvSize :: !CSize + , mvData :: !(Ptr CChar) + } + +instance Storable MDB_val where + sizeOf _ = 2 * sizeOf (undefined :: Ptr ()) + alignment _ = alignment (undefined :: Ptr ()) + peek p = do + sz <- peekByteOff p 0 + pd <- peekByteOff p (sizeOf (undefined :: Ptr ())) + pure (MDB_val sz pd) + poke p (MDB_val sz pd) = do + pokeByteOff p 0 sz + pokeByteOff p (sizeOf (undefined :: Ptr ())) pd + +-- environment flags ---------------------------------------------------------- + +_MDB_NOSUBDIR, _MDB_NOSYNC, _MDB_RDONLY, _MDB_NOMETASYNC, _MDB_NOTLS + :: CUInt +_MDB_NOSUBDIR = 0x4000 +_MDB_NOSYNC = 0x10000 +_MDB_RDONLY = 0x20000 +_MDB_NOMETASYNC = 0x40000 +_MDB_NOTLS = 0x200000 + +-- database open flags -------------------------------------------------------- + +_MDB_CREATE :: CUInt +_MDB_CREATE = 0x40000 + +-- put flags ------------------------------------------------------------------ + +_MDB_NOOVERWRITE, _MDB_APPEND :: CUInt +_MDB_NOOVERWRITE = 0x10 +_MDB_APPEND = 0x20000 + +-- cursor ops ----------------------------------------------------------------- + +-- These mirror the enum MDB_cursor_op declared in lmdb.h. We only +-- bind the operations the safe wrapper currently uses; add more as +-- needed. + +_MDB_FIRST, _MDB_LAST, _MDB_NEXT, _MDB_PREV + , _MDB_SET, _MDB_SET_RANGE, _MDB_GET_CURRENT :: CInt +_MDB_FIRST = 0 +_MDB_LAST = 6 +_MDB_NEXT = 8 +_MDB_PREV = 12 +_MDB_SET = 15 +_MDB_SET_RANGE = 17 -- index per MDB_cursor_op enum (post-DUP entries) +_MDB_GET_CURRENT = 4 + +-- error codes ---------------------------------------------------------------- + +_MDB_SUCCESS, _MDB_KEYEXIST, _MDB_NOTFOUND, _MDB_MAP_FULL + , _MDB_CORRUPTED, _MDB_PANIC, _MDB_VERSION_MISMATCH, _MDB_INVALID + , _MDB_BAD_TXN, _MDB_BAD_VALSIZE, _MDB_BAD_DBI :: CInt +_MDB_SUCCESS = 0 +_MDB_KEYEXIST = -30799 +_MDB_NOTFOUND = -30798 +_MDB_CORRUPTED = -30796 +_MDB_PANIC = -30795 +_MDB_VERSION_MISMATCH = -30794 +_MDB_INVALID = -30793 +_MDB_MAP_FULL = -30792 +_MDB_BAD_TXN = -30782 +_MDB_BAD_VALSIZE = -30781 +_MDB_BAD_DBI = -30780 + +-- foreign imports ------------------------------------------------------------ + +-- environment lifecycle + +foreign import capi "lmdb.h mdb_env_create" + mdb_env_create :: Ptr (Ptr MDB_env) -> IO CInt + +foreign import capi "lmdb.h mdb_env_open" + mdb_env_open + :: Ptr MDB_env -> CString -> CUInt -> CMode -> IO CInt + +foreign import capi "lmdb.h mdb_env_close" + mdb_env_close :: Ptr MDB_env -> IO () + +foreign import capi "lmdb.h mdb_env_set_mapsize" + mdb_env_set_mapsize :: Ptr MDB_env -> CSize -> IO CInt + +foreign import capi "lmdb.h mdb_env_set_maxdbs" + mdb_env_set_maxdbs :: Ptr MDB_env -> MDB_dbi -> IO CInt + +foreign import capi "lmdb.h mdb_env_sync" + mdb_env_sync :: Ptr MDB_env -> CInt -> IO CInt + +-- transactions + +foreign import capi "lmdb.h mdb_txn_begin" + mdb_txn_begin + :: Ptr MDB_env + -> Ptr MDB_txn -- parent (NULL for top-level) + -> CUInt -- flags + -> Ptr (Ptr MDB_txn) + -> IO CInt + +foreign import capi "lmdb.h mdb_txn_commit" + mdb_txn_commit :: Ptr MDB_txn -> IO CInt + +foreign import capi "lmdb.h mdb_txn_abort" + mdb_txn_abort :: Ptr MDB_txn -> IO () + +-- databases + +foreign import capi "lmdb.h mdb_dbi_open" + mdb_dbi_open + :: Ptr MDB_txn + -> CString -- name; NULL for the default unnamed db + -> CUInt -- flags + -> Ptr MDB_dbi + -> IO CInt + +foreign import capi "lmdb.h mdb_dbi_close" + mdb_dbi_close :: Ptr MDB_env -> MDB_dbi -> IO () + +-- key-value ops + +foreign import capi "lmdb.h mdb_get" + mdb_get + :: Ptr MDB_txn -> MDB_dbi -> Ptr MDB_val -> Ptr MDB_val -> IO CInt + +foreign import capi "lmdb.h mdb_put" + mdb_put + :: Ptr MDB_txn + -> MDB_dbi + -> Ptr MDB_val -- key + -> Ptr MDB_val -- data + -> CUInt -- flags + -> IO CInt + +foreign import capi "lmdb.h mdb_del" + mdb_del + :: Ptr MDB_txn + -> MDB_dbi + -> Ptr MDB_val -- key + -> Ptr MDB_val -- data; NULL except for MDB_DUPSORT + -> IO CInt + +-- cursors + +foreign import capi "lmdb.h mdb_cursor_open" + mdb_cursor_open + :: Ptr MDB_txn -> MDB_dbi -> Ptr (Ptr MDB_cursor) -> IO CInt + +foreign import capi "lmdb.h mdb_cursor_close" + mdb_cursor_close :: Ptr MDB_cursor -> IO () + +foreign import capi "lmdb.h mdb_cursor_get" + mdb_cursor_get + :: Ptr MDB_cursor + -> Ptr MDB_val -- key (in/out) + -> Ptr MDB_val -- data (out) + -> CInt -- MDB_cursor_op + -> IO CInt + +-- diagnostics + +foreign import capi "lmdb.h mdb_strerror" + mdb_strerror :: CInt -> IO CString diff --git a/ppad-lmdb.cabal b/ppad-lmdb.cabal @@ -0,0 +1,92 @@ +cabal-version: 3.0 +name: ppad-lmdb +version: 0.1.0 +synopsis: Minimal bindings to LMDB +license: MIT +license-file: LICENSE +author: Jared Tobin +maintainer: jared@ppad.tech +category: Database +build-type: Simple +tested-with: GHC == 9.10.3 +extra-doc-files: CHANGELOG + +description: + Minimal bindings to LMDB (Lightning Memory-Mapped Database), an + embedded ACID key-value store. Links against the system liblmdb + via pkg-config; the bundled nix flake provides one. + +flag llvm + description: Use GHC's LLVM backend. + default: False + manual: True + +source-repository head + type: git + location: git.ppad.tech/lmdb.git + +library + default-language: Haskell2010 + hs-source-dirs: lib + ghc-options: + -Wall + if flag(llvm) + ghc-options: -fllvm -O2 + exposed-modules: + Database.LMDB + Database.LMDB.Internal + build-depends: + base >= 4.9 && < 5 + , bytestring >= 0.9 && < 0.13 + pkgconfig-depends: lmdb + +test-suite lmdb-tests + type: exitcode-stdio-1.0 + default-language: Haskell2010 + hs-source-dirs: test + main-is: Main.hs + ghc-options: + -rtsopts -Wall -O2 + build-depends: + base + , bytestring + , containers + , directory + , filepath + , ppad-lmdb + , QuickCheck + , tasty + , tasty-hunit + , tasty-quickcheck + +benchmark lmdb-bench + type: exitcode-stdio-1.0 + default-language: Haskell2010 + hs-source-dirs: bench + main-is: Main.hs + ghc-options: + -rtsopts -O2 -Wall + build-depends: + base + , bytestring + , criterion + , directory + , filepath + , ppad-lmdb + +benchmark lmdb-weigh + type: exitcode-stdio-1.0 + default-language: Haskell2010 + hs-source-dirs: bench + main-is: Weight.hs + ghc-options: + -rtsopts -O2 -Wall -fno-warn-orphans + if flag(llvm) + ghc-options: -fllvm + build-depends: + base + , bytestring + , directory + , filepath + , ppad-lmdb + , weigh diff --git a/test/Main.hs b/test/Main.hs @@ -0,0 +1,181 @@ +{-# LANGUAGE OverloadedStrings #-} + +module Main where + +import qualified Control.Exception as E +import Control.Monad (forM_) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Char8 as BSC +import qualified Data.Map.Strict as Map +import qualified Database.LMDB as L +import System.Directory + ( createDirectory + , doesDirectoryExist + , getTemporaryDirectory + , removeDirectoryRecursive + ) +import System.FilePath ((</>)) +import Test.Tasty +import Test.Tasty.HUnit +import Test.Tasty.QuickCheck as QC + +main :: IO () +main = defaultMain $ testGroup "ppad-lmdb" + [ unitTests + , propTests + ] + +-- helpers -------------------------------------------------------------------- + +-- | Run an action against a fresh temporary db directory. The +-- directory is created (clean) before the action and removed after. +withTmpEnv :: L.EnvFlags -> (L.Env -> IO a) -> IO a +withTmpEnv envFlags k = do + tmpRoot <- getTemporaryDirectory + let dir = tmpRoot </> "ppad-lmdb-test" + exists <- doesDirectoryExist dir + if exists then removeDirectoryRecursive dir else pure () + createDirectory dir + result <- L.withEnv dir envFlags k + removeDirectoryRecursive dir + pure result + +-- shared bigger map for tests +flags :: L.EnvFlags +flags = L.defaultEnvFlags + { L.envMapSize = 64 * 1024 * 1024 + , L.envMaxDbs = 4 + } + +writeKV + :: L.Env -> [(BS.ByteString, BS.ByteString)] -> IO () +writeKV env kvs = L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing True + forM_ kvs $ \(k, v) -> L.put txn dbi k v + +-- unit tests ----------------------------------------------------------------- + +unitTests :: TestTree +unitTests = testGroup "unit" + [ testCase "put then get roundtrip" $ + withTmpEnv flags $ \env -> do + writeKV env [("hello", "world")] + v <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.get txn dbi "hello" + v @?= Just "world" + + , testCase "get on missing key returns Nothing" $ + withTmpEnv flags $ \env -> do + writeKV env [("a", "1")] + v <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.get txn dbi "absent" + v @?= Nothing + + , testCase "del returns True only when key existed" $ + withTmpEnv flags $ \env -> do + writeKV env [("a", "1"), ("b", "2")] + (existed, repeated, missing) <- L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + e <- L.del txn dbi "a" + e2 <- L.del txn dbi "a" + m <- L.del txn dbi "never-existed" + pure (e, e2, m) + existed @?= True + repeated @?= False + missing @?= False + + , testCase "write txn aborted on exception" $ + withTmpEnv flags $ \env -> do + -- attempt that throws after a write + let attempt = L.withWriteTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing True + L.put txn dbi "k" "v" + E.throwIO (userError "boom") + r <- E.try attempt :: IO (Either IOError ()) + case r of + Left _ -> pure () + Right _ -> assertFailure "expected the user error" + -- key must not be visible to a fresh read txn + v <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing True + L.get txn dbi "k" + v @?= Nothing + + , testCase "cursor walks keys in ascending order" $ + withTmpEnv flags $ \env -> do + let kvs = [ (BSC.pack [c], BSC.pack [c]) + | c <- "abcdefghij" ] + writeKV env kvs + seen <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.withCursor txn dbi (cursorWalk []) + seen @?= kvs + + , testCase "cursorSeek lands on first >= key" $ + withTmpEnv flags $ \env -> do + writeKV env [ ("a","1"), ("c","3"), ("e","5") ] + r <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.withCursor txn dbi $ \cur -> + L.cursorSeek cur "b" + r @?= Just ("c", "3") + ] + +-- walk a cursor, accumulating pairs from current position to end +cursorWalk + :: [(BS.ByteString, BS.ByteString)] + -> L.Cursor s + -> IO [(BS.ByteString, BS.ByteString)] +cursorWalk acc cur = do + step <- L.cursorFirst cur + go (reverse acc) step + where + go ys Nothing = pure (reverse ys) + go ys (Just kv) = do + next <- L.cursorNext cur + go (kv : ys) next + +-- properties ----------------------------------------------------------------- + +newtype Key = Key BS.ByteString deriving Show +newtype Val = Val BS.ByteString deriving Show + +-- LMDB rejects empty keys; restrict to non-empty bounded strings. +instance Arbitrary Key where + arbitrary = do + n <- QC.chooseInt (1, 32) + Key . BS.pack <$> QC.vectorOf n QC.arbitrary + +instance Arbitrary Val where + arbitrary = do + n <- QC.chooseInt (0, 64) + Val . BS.pack <$> QC.vectorOf n QC.arbitrary + +propTests :: TestTree +propTests = testGroup "props" + [ QC.testProperty "put / get roundtrip" $ \(Key k) (Val v) -> + QC.ioProperty $ withTmpEnv flags $ \env -> do + writeKV env [(k, v)] + got <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.get txn dbi k + pure (got == Just v) + + , QC.testProperty "cursor enumerates Map.toAscList" $ \kvs0 -> + let kvs = uniqueKeys [(k, v) | (Key k, Val v) <- kvs0] + in QC.ioProperty $ withTmpEnv flags $ \env -> do + writeKV env kvs + seen <- L.withReadTxn env $ \txn -> do + dbi <- L.openDbi txn Nothing False + L.withCursor txn dbi (cursorWalk []) + pure (seen == Map.toAscList (Map.fromList kvs)) + ] + +-- keep only the last write for each key, matching LMDB's overwrite +-- semantics under MDB_NOOVERWRITE-not-set +uniqueKeys + :: [(BS.ByteString, BS.ByteString)] + -> [(BS.ByteString, BS.ByteString)] +uniqueKeys = Map.toList . Map.fromList