bip39

BIP39 mnemonic codes in Haskell (docs.ppad.tech/bip39).
git clone git://git.ppad.tech/bip39.git
Log | Files | Refs | README | LICENSE

commit 7fa6968542ee4eee069d143be49570af43b6d20b
parent 4c2de18d0ac14594c060fe2d1e580ba8e97ce18c
Author: Jared Tobin <jared@jtobin.io>
Date:   Thu, 27 Feb 2025 12:14:20 +0400

lib: text api

Diffstat:
Mlib/Crypto/KDF/BIP39.hs | 90++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mtest/Main.hs | 13++++++++++---
Mtest/Vectors.hs | 6+++---
3 files changed, 61 insertions(+), 48 deletions(-)

diff --git a/lib/Crypto/KDF/BIP39.hs b/lib/Crypto/KDF/BIP39.hs @@ -34,31 +34,52 @@ import qualified Data.List as L import Prelude hiding (words) import qualified Data.Text as T import qualified Data.Text.Encoding as TE +import qualified Data.Text.ICU.Normalize2 as ICU import System.IO.Unsafe (unsafePerformIO) +-- XX get rid of this type newtype Mnemonic = Mnemonic BS.ByteString deriving Eq instance Show Mnemonic where show (Mnemonic bs) = show bs +newtype Wordlist = Wordlist (PA.Array T.Text) + fi :: (Integral a, Num b) => a -> b fi = fromIntegral {-# INLINE fi #-} +mnemonic :: BS.ByteString -> T.Text +mnemonic = _mnemonic english + +_mnemonic :: Wordlist -> BS.ByteString -> T.Text +_mnemonic (Wordlist wlist) entropy@(BI.PS _ _ l) + | l `rem` 4 /= 0 = error "ppad-bip39 (mnemonic): invalid entropy length" + | l < 16 = error "ppad-bip39 (mnemonic): invalid entropy length" + | l > 32 = error "ppad-bip39 (mnemonic): invalid entropy length" + | otherwise = + let has = SHA256.hash entropy + h = BU.unsafeHead has + n = l `quot` 4 + kek = h .&. (0b1111_1111 .<<. (8 - n)) -- top n bits + cat = entropy <> BS.singleton kek + in T.intercalate " " (words wlist cat) +{-# INLINE _mnemonic #-} + -- remaining, bits pool, number of bits in pool type Acc = (BS.ByteString, Word64, Int) -words :: PA.Array T.Text -> BS.ByteString -> [BS.ByteString] +words :: PA.Array T.Text -> BS.ByteString -> [T.Text] words wlist bs = L.unfoldr coalg (bs, 0, 0) where mask = 0b0111_1111_1111 - coalg :: Acc -> Maybe (BS.ByteString, Acc) + coalg :: Acc -> Maybe (T.Text, Acc) coalg (etc, acc, len) | len > 10 = let w11 = fi ((acc .>>. (len - 11)) .&. mask) -- take bits from pool nacc = acc .&. ((1 .<<. (len - 11)) - 1) -- adjust pool nlen = len - 11 -- track less bits - word = TE.encodeUtf8 (PA.indexArray wlist w11) + word = PA.indexArray wlist w11 in Just (word, (etc, nacc, nlen)) | not (BS.null etc) = let next = BU.unsafeHead etc @@ -70,96 +91,81 @@ words wlist bs = L.unfoldr coalg (bs, 0, 0) where Nothing {-# INLINE words #-} -mnemonic :: BS.ByteString -> Mnemonic -mnemonic = _mnemonic english - -_mnemonic :: PA.Array T.Text -> BS.ByteString -> Mnemonic -_mnemonic wlist entropy@(BI.PS _ _ l) - | l `rem` 4 /= 0 = error "ppad-bip39 (mnemonic): invalid entropy length" - | l < 16 = error "ppad-bip39 (mnemonic): invalid entropy length" - | l > 32 = error "ppad-bip39 (mnemonic): invalid entropy length" - | otherwise = - let has = SHA256.hash entropy - h = BU.unsafeHead has - n = l `quot` 4 - kek = h .&. (0b1111_1111 .<<. (8 - n)) -- top n bits - cat = entropy <> BS.singleton kek - in Mnemonic (BS.intercalate " " (words wlist cat)) -{-# INLINE _mnemonic #-} - -seed :: BS.ByteString -> BS.ByteString -> BS.ByteString -seed mnem pass = PBKDF.derive SHA512.hmac mnem salt 2048 64 where +-- XX check that this is a valid mnemonic! +seed :: T.Text -> BS.ByteString -> BS.ByteString +seed mnem pass = PBKDF.derive SHA512.hmac bs salt 2048 64 where salt = "mnemonic" <> pass + bs = TE.encodeUtf8 (ICU.nfkd mnem) -- wordlists ------------------------------------------------------------------ -english :: PA.Array T.Text +english :: Wordlist english = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/english.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE english #-} -chinese_traditional :: PA.Array T.Text +chinese_traditional :: Wordlist chinese_traditional = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/chinese_traditional.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE chinese_traditional #-} -chinese_simplified :: PA.Array T.Text +chinese_simplified :: Wordlist chinese_simplified = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/chinese_simplified.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE chinese_simplified #-} -korean :: PA.Array T.Text +korean :: Wordlist korean = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/korean.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE korean #-} -french :: PA.Array T.Text +french :: Wordlist french = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/french.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE french #-} -spanish :: PA.Array T.Text +spanish :: Wordlist spanish = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/spanish.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE spanish #-} -czech :: PA.Array T.Text +czech :: Wordlist czech = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/czech.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE czech #-} -italian :: PA.Array T.Text +italian :: Wordlist italian = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/italian.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE italian #-} -portuguese :: PA.Array T.Text +portuguese :: Wordlist portuguese = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/portuguese.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE portuguese #-} -japanese :: PA.Array T.Text +japanese :: Wordlist japanese = unsafePerformIO $ do wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/japanese.txt") let ls = T.lines wlist - pure (PA.arrayFromList ls) + pure (Wordlist (PA.arrayFromList ls)) {-# NOINLINE japanese #-} diff --git a/test/Main.hs b/test/Main.hs @@ -6,6 +6,7 @@ module Main where import qualified Crypto.HDKey.BIP32 as BIP32 import qualified Crypto.KDF.BIP39 as BIP39 import qualified Data.Aeson as A +import qualified Data.Text.ICU.Normalize2 as ICU import qualified Data.Text.IO as TIO import Test.Tasty import Test.Tasty.HUnit @@ -65,15 +66,21 @@ execute wlist V.Bip39Test {..} = do mnem = bt_mnemonic seed = bt_seed xprv = bt_xprv - BIP39.Mnemonic out_mnem = BIP39._mnemonic wl entr + out_mnem = BIP39._mnemonic wl entr + giv_seed = BIP39.seed mnem "TREZOR" out_seed = BIP39.seed out_mnem "TREZOR" out_xprv = case BIP32.master out_seed of Just hd -> BIP32.xprv hd Nothing -> error "bang (bip32)" t_msg = mempty testGroup t_msg [ - testCase "mnemonic" $ assertEqual mempty mnem out_mnem - , testCase "seed" $ assertEqual mempty seed out_seed + -- we always output (NFKD) normalized UTF8, but test inputs may not be + -- normalized in this fashion + testCase "mnemonic" $ assertEqual mempty (ICU.nfkd mnem) out_mnem + -- testing from the given mnemonic ensures we're normalizing properly + -- before seed calculation + , testCase "seed (from given mnemonic)" $ assertEqual mempty seed giv_seed + , testCase "seed (from derived mnemonic)" $ assertEqual mempty seed out_seed , testCase "xprv" $ assertEqual mempty xprv out_xprv ] where diff --git a/test/Vectors.hs b/test/Vectors.hs @@ -15,7 +15,7 @@ import qualified Data.Text.Encoding as TE import qualified Data.Vector as V data Vectors = Vectors { - v_english :: ![Bip39Test] + v_english :: ![Bip39Test] , v_chinese_traditional :: ![Bip39Test] , v_chinese_simplified :: ![Bip39Test] , v_french :: ![Bip39Test] @@ -42,7 +42,7 @@ instance A.FromJSON Vectors where data Bip39Test = Bip39Test { bt_entropy :: !BS.ByteString - , bt_mnemonic :: !BS.ByteString + , bt_mnemonic :: !T.Text , bt_seed :: !BS.ByteString , bt_xprv :: !BS.ByteString } deriving Show @@ -58,7 +58,7 @@ instance A.FromJSON Bip39Test where A.String t -> decodehex t _ -> error "bang (entropy)" bt_mnemonic = case m V.! 1 of - A.String t -> TE.encodeUtf8 t + A.String t -> t _ -> error "bang (mnemonic)" bt_seed = case m V.! 2 of A.String t -> decodehex t