commit 7fa6968542ee4eee069d143be49570af43b6d20b
parent 4c2de18d0ac14594c060fe2d1e580ba8e97ce18c
Author: Jared Tobin <jared@jtobin.io>
Date: Thu, 27 Feb 2025 12:14:20 +0400
lib: text api
Diffstat:
3 files changed, 61 insertions(+), 48 deletions(-)
diff --git a/lib/Crypto/KDF/BIP39.hs b/lib/Crypto/KDF/BIP39.hs
@@ -34,31 +34,52 @@ import qualified Data.List as L
import Prelude hiding (words)
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
+import qualified Data.Text.ICU.Normalize2 as ICU
import System.IO.Unsafe (unsafePerformIO)
+-- XX get rid of this type
newtype Mnemonic = Mnemonic BS.ByteString
deriving Eq
instance Show Mnemonic where
show (Mnemonic bs) = show bs
+newtype Wordlist = Wordlist (PA.Array T.Text)
+
fi :: (Integral a, Num b) => a -> b
fi = fromIntegral
{-# INLINE fi #-}
+mnemonic :: BS.ByteString -> T.Text
+mnemonic = _mnemonic english
+
+_mnemonic :: Wordlist -> BS.ByteString -> T.Text
+_mnemonic (Wordlist wlist) entropy@(BI.PS _ _ l)
+ | l `rem` 4 /= 0 = error "ppad-bip39 (mnemonic): invalid entropy length"
+ | l < 16 = error "ppad-bip39 (mnemonic): invalid entropy length"
+ | l > 32 = error "ppad-bip39 (mnemonic): invalid entropy length"
+ | otherwise =
+ let has = SHA256.hash entropy
+ h = BU.unsafeHead has
+ n = l `quot` 4
+ kek = h .&. (0b1111_1111 .<<. (8 - n)) -- top n bits
+ cat = entropy <> BS.singleton kek
+ in T.intercalate " " (words wlist cat)
+{-# INLINE _mnemonic #-}
+
-- remaining, bits pool, number of bits in pool
type Acc = (BS.ByteString, Word64, Int)
-words :: PA.Array T.Text -> BS.ByteString -> [BS.ByteString]
+words :: PA.Array T.Text -> BS.ByteString -> [T.Text]
words wlist bs = L.unfoldr coalg (bs, 0, 0) where
mask = 0b0111_1111_1111
- coalg :: Acc -> Maybe (BS.ByteString, Acc)
+ coalg :: Acc -> Maybe (T.Text, Acc)
coalg (etc, acc, len)
| len > 10 =
let w11 = fi ((acc .>>. (len - 11)) .&. mask) -- take bits from pool
nacc = acc .&. ((1 .<<. (len - 11)) - 1) -- adjust pool
nlen = len - 11 -- track less bits
- word = TE.encodeUtf8 (PA.indexArray wlist w11)
+ word = PA.indexArray wlist w11
in Just (word, (etc, nacc, nlen))
| not (BS.null etc) =
let next = BU.unsafeHead etc
@@ -70,96 +91,81 @@ words wlist bs = L.unfoldr coalg (bs, 0, 0) where
Nothing
{-# INLINE words #-}
-mnemonic :: BS.ByteString -> Mnemonic
-mnemonic = _mnemonic english
-
-_mnemonic :: PA.Array T.Text -> BS.ByteString -> Mnemonic
-_mnemonic wlist entropy@(BI.PS _ _ l)
- | l `rem` 4 /= 0 = error "ppad-bip39 (mnemonic): invalid entropy length"
- | l < 16 = error "ppad-bip39 (mnemonic): invalid entropy length"
- | l > 32 = error "ppad-bip39 (mnemonic): invalid entropy length"
- | otherwise =
- let has = SHA256.hash entropy
- h = BU.unsafeHead has
- n = l `quot` 4
- kek = h .&. (0b1111_1111 .<<. (8 - n)) -- top n bits
- cat = entropy <> BS.singleton kek
- in Mnemonic (BS.intercalate " " (words wlist cat))
-{-# INLINE _mnemonic #-}
-
-seed :: BS.ByteString -> BS.ByteString -> BS.ByteString
-seed mnem pass = PBKDF.derive SHA512.hmac mnem salt 2048 64 where
+-- XX check that this is a valid mnemonic!
+seed :: T.Text -> BS.ByteString -> BS.ByteString
+seed mnem pass = PBKDF.derive SHA512.hmac bs salt 2048 64 where
salt = "mnemonic" <> pass
+ bs = TE.encodeUtf8 (ICU.nfkd mnem)
-- wordlists ------------------------------------------------------------------
-english :: PA.Array T.Text
+english :: Wordlist
english = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/english.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE english #-}
-chinese_traditional :: PA.Array T.Text
+chinese_traditional :: Wordlist
chinese_traditional = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/chinese_traditional.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE chinese_traditional #-}
-chinese_simplified :: PA.Array T.Text
+chinese_simplified :: Wordlist
chinese_simplified = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/chinese_simplified.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE chinese_simplified #-}
-korean :: PA.Array T.Text
+korean :: Wordlist
korean = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/korean.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE korean #-}
-french :: PA.Array T.Text
+french :: Wordlist
french = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/french.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE french #-}
-spanish :: PA.Array T.Text
+spanish :: Wordlist
spanish = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/spanish.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE spanish #-}
-czech :: PA.Array T.Text
+czech :: Wordlist
czech = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/czech.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE czech #-}
-italian :: PA.Array T.Text
+italian :: Wordlist
italian = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/italian.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE italian #-}
-portuguese :: PA.Array T.Text
+portuguese :: Wordlist
portuguese = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/portuguese.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE portuguese #-}
-japanese :: PA.Array T.Text
+japanese :: Wordlist
japanese = unsafePerformIO $ do
wlist <- fmap TE.decodeUtf8 (BS.readFile "etc/japanese.txt")
let ls = T.lines wlist
- pure (PA.arrayFromList ls)
+ pure (Wordlist (PA.arrayFromList ls))
{-# NOINLINE japanese #-}
diff --git a/test/Main.hs b/test/Main.hs
@@ -6,6 +6,7 @@ module Main where
import qualified Crypto.HDKey.BIP32 as BIP32
import qualified Crypto.KDF.BIP39 as BIP39
import qualified Data.Aeson as A
+import qualified Data.Text.ICU.Normalize2 as ICU
import qualified Data.Text.IO as TIO
import Test.Tasty
import Test.Tasty.HUnit
@@ -65,15 +66,21 @@ execute wlist V.Bip39Test {..} = do
mnem = bt_mnemonic
seed = bt_seed
xprv = bt_xprv
- BIP39.Mnemonic out_mnem = BIP39._mnemonic wl entr
+ out_mnem = BIP39._mnemonic wl entr
+ giv_seed = BIP39.seed mnem "TREZOR"
out_seed = BIP39.seed out_mnem "TREZOR"
out_xprv = case BIP32.master out_seed of
Just hd -> BIP32.xprv hd
Nothing -> error "bang (bip32)"
t_msg = mempty
testGroup t_msg [
- testCase "mnemonic" $ assertEqual mempty mnem out_mnem
- , testCase "seed" $ assertEqual mempty seed out_seed
+ -- we always output (NFKD) normalized UTF8, but test inputs may not be
+ -- normalized in this fashion
+ testCase "mnemonic" $ assertEqual mempty (ICU.nfkd mnem) out_mnem
+ -- testing from the given mnemonic ensures we're normalizing properly
+ -- before seed calculation
+ , testCase "seed (from given mnemonic)" $ assertEqual mempty seed giv_seed
+ , testCase "seed (from derived mnemonic)" $ assertEqual mempty seed out_seed
, testCase "xprv" $ assertEqual mempty xprv out_xprv
]
where
diff --git a/test/Vectors.hs b/test/Vectors.hs
@@ -15,7 +15,7 @@ import qualified Data.Text.Encoding as TE
import qualified Data.Vector as V
data Vectors = Vectors {
- v_english :: ![Bip39Test]
+ v_english :: ![Bip39Test]
, v_chinese_traditional :: ![Bip39Test]
, v_chinese_simplified :: ![Bip39Test]
, v_french :: ![Bip39Test]
@@ -42,7 +42,7 @@ instance A.FromJSON Vectors where
data Bip39Test = Bip39Test {
bt_entropy :: !BS.ByteString
- , bt_mnemonic :: !BS.ByteString
+ , bt_mnemonic :: !T.Text
, bt_seed :: !BS.ByteString
, bt_xprv :: !BS.ByteString
} deriving Show
@@ -58,7 +58,7 @@ instance A.FromJSON Bip39Test where
A.String t -> decodehex t
_ -> error "bang (entropy)"
bt_mnemonic = case m V.! 1 of
- A.String t -> TE.encodeUtf8 t
+ A.String t -> t
_ -> error "bang (mnemonic)"
bt_seed = case m V.! 2 of
A.String t -> decodehex t