Python Bindings

Yokan provides comprehensive Python bindings for both server and client libraries, allowing you to build and interact with Yokan databases from Python applications.

Installation

To use Yokan’s Python bindings, install Yokan with the +python variant in Spack:

spack install mochi-yokan+python

The Python bindings are available in the mochi.yokan package.

Quick Start

Here’s a simple example of using Yokan from Python:

from mochi.margo import Engine
from mochi.yokan.server import Provider
from mochi.yokan.client import Client

# Create a Margo engine
engine = Engine('tcp')

# Start a Yokan provider
provider = Provider(
    engine=engine,
    provider_id=42,
    config='{"database":{"type":"map"}}'
)

# Create a client
client = Client(engine=engine)

# Get a database handle
db = client.make_database_handle(
    address=engine.addr(),
    provider_id=42
)

# Put a key/value pair
db.put(key="greeting", value="Hello, Yokan!")

# Get the value back
size = db.length(key="greeting")
value = bytearray(size)
db.get(key="greeting", value=value)
print(f"Retrieved: {value.decode()}")

# Count total entries
count = db.count()
print(f"Total entries: {count}")

# Cleanup
engine.finalize()

This example demonstrates:

  1. Creating a Margo engine

  2. Starting a Yokan provider

  3. Creating a client and database handle

  4. Performing basic put/get operations

Server API

The mochi.yokan.server module provides the Provider class for creating Yokan providers:

from mochi.margo import Engine
from mochi.yokan.server import Provider
import json

# Create a Margo engine
engine = Engine('tcp')

# Configure a Yokan provider with RocksDB backend
config = {
    "database": {
        "type": "map"
    }
}

# Start the provider
provider = Provider(
    engine=engine,
    provider_id=42,
    config=json.dumps(config)
)

print(f"Yokan provider started at {engine.addr()}")
print(f"Provider ID: 42")
print("Provider is now accepting requests...")

# In a real application, you would wait here
# For this example, we'll just finalize
#engine.wait_for_finalize()
engine.finalize()

Provider configuration uses the same JSON format as the C++ API, supporting all backend types and configuration options.

Client API

The mochi.yokan.client module provides the Client and database handle for interacting with Yokan providers:

Basic Operations

from mochi.margo import Engine
from mochi.yokan.server import Provider
from mochi.yokan.client import Client

def main(engine):
    provider = Provider(engine=engine, provider_id=42,
                        config='{"database":{"type":"map"}}')
    client = Client(engine=engine)
    db = client.make_database_handle(address=engine.addr(), provider_id=42)

    # Put operation
    db.put(key="user:1", value="Alice")
    print("Stored user:1 = Alice")

    # Exists operation
    exists = db.exists(key="user:1")
    print(f"Key exists: {exists}")

    # Length operation
    length = db.length(key="user:1")
    print(f"Value length: {length} bytes")

    # Get operation
    value = bytearray(length)
    db.get(key="user:1", value=value)
    print(f"Retrieved: {value.decode()}")

    # Erase operation
    db.erase(key="user:1")
    print("Key erased")

    # Verify erasure
    exists_after = db.exists(key="user:1")
    print(f"Key exists after erase: {exists_after}")

    engine.finalize()

with Engine('tcp') as engine:
    main(engine)

The client API supports:

  • put(key, value, mode=0): Store a key/value pair

  • get(key, value, mode=0): Retrieve a value by key

  • exists(key, mode=0): Check if a key exists

  • length(key, mode=0): Get the size of a value (returns None if key doesn’t exist)

  • erase(key, mode=0): Delete a key/value pair

  • count(mode=0): Count total key/value pairs

Note

The get method does not return the value, instead an appropriately-sized buffer needs to be passed in which the value will be stored. This is because Yokan optimizes operations by doing RDMA directly to the target memory.

Batch Operations

For efficiency, use batch operations when working with multiple keys:

from mochi.margo import Engine
from mochi.yokan.server import Provider
from mochi.yokan.client import Client

engine = Engine('tcp')
provider = Provider(engine=engine, provider_id=42,
                   config='{"database":{"type":"map"}}')
client = Client(engine=engine)
db = client.make_database_handle(address=engine.addr(), provider_id=42)

# Put multiple key/value pairs at once
keys = ["user:1", "user:2", "user:3"]
values = ["Alice", "Bob", "Carol"]
db.put_multi(list(zip(keys, values)))
print(f"Stored {len(keys)} key/value pairs")

# Check existence of multiple keys
new_keys = keys + ["user:4", "user:5"]
existence = db.exists_multi(keys=new_keys)
print(f"Existence check: {existence}")  # [True, True, True, False, False]

# Get lengths of multiple values
lengths = db.length_multi(keys=keys)
print(f"Value lengths: {lengths}")

# Get multiple values at once
retrieved_values = [bytearray(l) for l in lengths]
db.get_multi(list(zip(keys, retrieved_values)))
print(f"Retrieved: {retrieved_values}")

# Erase multiple keys
db.erase_multi(keys=keys)
print(f"Erased {len(keys)} keys")

# Verify
count = db.count()
print(f"Remaining entries: {count}")

engine.finalize()

Batch operations include:

  • put_multi(pairs, mode=0)

  • get_multi(pairs, mode=0)

  • exists_multi(keys, mode=0)

  • length_multi(keys, mode=0)

  • erase_multi(keys, mode=0)

Note

The put_multi and get_multi methods take a list of pairs, with the first element of the pair being the key, and the second being the value, or a destination buffer for the value.

List Operations

Yokan provides powerful list operations for iterating through key/value pairs:

from mochi.margo import Engine
from mochi.yokan.server import Provider
from mochi.yokan.client import Client
import mochi.yokan.mode as mode

engine = Engine('tcp')
provider = Provider(engine=engine, provider_id=42,
                   config='{"database":{"type":"map"}}')
client = Client(engine=engine)
db = client.make_database_handle(address=engine.addr(), provider_id=42)

# Populate database with test data
for i in range(20):
    key = f"item:{i:03d}"
    value = f"value_{i}"
    db.put(key=key, value=value)

# List up to 5 keys with a given prefix
keys = [bytearray(16) for i  in range(5)]
lengths = db.list_keys(
    keys,
    from_key="item:006",  # Starting point
    filter="item:00",     # Prefix
    mode=mode.YOKAN_MODE_DEFAULT
)
print(f"Keys starting with 'item:00'")
for k, l in zip(keys, lengths):
    print(f"  - {k[:l].decode()}")

# List keys and values together
keyvals = [[bytearray(16), bytearray(16)] for i in range(5)]
lengths = db.list_keyvals(
    keyvals,
    from_key="item:003",
    filter="",
    mode=mode.YOKAN_MODE_INCLUSIVE  # Include the starting key
)
print(f"\nKey/value pairs from 'item:003':")
for (key, value), (ksize, vsize) in zip(keyvals, lengths):
    print(f"  - {key[:ksize].decode()} = {value[:vsize].decode()}")

# List keys with a filter (remove prefix from results)
keys = [bytearray(16) for i  in range(5)]
lengths = db.list_keys(
    keys,
    from_key="item:006",
    filter="item:00",
    mode=mode.YOKAN_MODE_NO_PREFIX
)
print(f"Keys starting with 'item:00', with prefix removed")
for k, l in zip(keys, lengths):
    print(f"  - {k[:l].decode()}")

# List keys and values together, using a single buffer for each
keys = bytearray(128)
values = bytearray(128)
lengths = db.list_keyvals_packed(
    keys, values,
    count=5,
    from_key="item:003",
    filter="",
    mode=mode.YOKAN_MODE_INCLUSIVE  # Include the starting key
)
print(f"Packed key/value retrieved:")
key_offset = 0
val_offset = 0
for kl, vl in lengths:
    print(f"  - {keys[key_offset:key_offset+kl]} = {values[val_offset:val_offset+vl]}")
    key_offset += kl
    val_offset += vl

engine.finalize()

The Buffer Protocol

All functions in the Python bindings accept either strings or any object that implements the buffer protocol.

Strings may only be used as input (e.g. the key in put and get, and the value in put, but not the value in get).

Working with Modes

Yokan’s mode system is available in Python through the mochi.yokan.mode module:

from mochi.margo import Engine
from mochi.yokan.exception import Exception
from mochi.yokan.server import Provider
from mochi.yokan.client import Client
import mochi.yokan.mode as mode

engine = Engine('tcp')
provider = Provider(engine=engine, provider_id=42,
                    config='{"database":{"type":"map"}}')
client = Client(engine=engine)
db = client.make_database_handle(address=engine.addr(), provider_id=42)

# APPEND mode: Append to existing value
db.put(key="log", value="Entry 1\n")
db.put(key="log", value="Entry 2\n", mode=mode.YOKAN_MODE_APPEND)
db.put(key="log", value="Entry 3\n", mode=mode.YOKAN_MODE_APPEND)
value = bytearray(24)
db.get(key="log", value=value)
print(f"Appended log:\n{value.decode()}")

# CONSUME mode: Get and erase in one operation
db.put(key="task", value="process_this")
value = bytearray(12)
db.get(key="task", value=value, mode=mode.YOKAN_MODE_CONSUME)
print(f"Consumed: {value}")
print(f"Still exists: {db.exists(key='task')}")  # False

# NEW_ONLY mode: Only put if key doesn't exist
try:
    db.put(key="counter", value="1", mode=mode.YOKAN_MODE_NEW_ONLY)
except Exception as e:
    print(f"This should not be reached")
print("First put succeeded")
try:
    db.put(key="counter", value="2", mode=mode.YOKAN_MODE_NEW_ONLY)
except Exception as e:
    print(f"Second put failed (expected): {e}")

# EXIST_ONLY mode: Only put if key already exists
try:
    db.put(key="new_key", value="value", mode=mode.YOKAN_MODE_EXIST_ONLY)
except Exception:
    print("Put failed on non-existent key (expected)")


# Combining modes
db.put(key="multi_mode", value="data")
value = bytearray(4)
db.get(key="multi_mode", value=value, mode=mode.YOKAN_MODE_CONSUME | mode.YOKAN_MODE_NO_RDMA)
print(f"Consumed with NO_RDMA: {value}")

engine.finalize()

Available modes have the same names as in C/C++. Modes can be combined using bitwise OR: mode.YOKAN_MODE_WAIT | Mode.YOKAN_MODE_CONSUME

Document Store Operations

Yokan also supports document storage with the collection API:

from mochi.margo import Engine
from mochi.yokan.server import Provider
from mochi.yokan.client import Client
import json

engine = Engine('tcp')
provider = Provider(engine=engine, provider_id=42,
                   config='{"database":{"type":"map"}}')
client = Client(engine=engine)
db = client.make_database_handle(address=engine.addr(), provider_id=42)

# Create a collection
db.create_collection(name="users")

# Check that the collection exists
exists = db.collection_exists(name="users")
print(f"Collection 'users' exists: {exists}")

# Get a collection handle
coll = db.open_collection(name="users")

# Store JSON documents
doc1 = {"name": "Alice", "age": 30, "city": "Boston"}
doc2 = {"name": "Bob", "age": 25, "city": "New York"}
doc3 = {"name": "Carol", "age": 35, "city": "Boston"}

# Store documents and get their IDs
id1 = coll.store(document=json.dumps(doc1))
id2 = coll.store(document=json.dumps(doc2))
id3 = coll.store(document=json.dumps(doc3))

print(f"Stored 3 documents with IDs: {id1}, {id2}, {id3}")

# Load a document by ID
loaded_doc = bytearray(64)
length = coll.load(id=id1, buffer=loaded_doc)
print(f"Loaded document: {loaded_doc[:length].decode()}")

# Update a document
updated_doc = {"name": "Alice", "age": 31, "city": "Boston"}
coll.update(id=id1, document=json.dumps(updated_doc))
print(f"\nUpdated document {id1}")

# List all documents
buffers = [bytearray(64) for i in range(3)]
doc_lengths = coll.list_docs(start_id=0, buffers=buffers)
print(doc_lengths)
print(f"\nAll documents:")
for (i, l), b in zip(doc_lengths, buffers):
    print(f"{i} - {b[:l]}")

# Erase a document
coll.erase(id=id2)
print(f"Erased document {id2}")

# Count documents
count = coll.size()
print(f"Total documents: {count}")

engine.finalize()

Collections provide:

  • JSON document storage

  • Query capabilities

  • Document IDs

  • Batch operations

Error Handling

The Python bindings raise mochi.yokan.exception.Exception for both client and server errors:

from mochi.margo import Engine
from mochi.yokan.exception import Exception
from mochi.yokan.server import Provider
from mochi.yokan.client import Client
import mochi.yokan.mode as mode

engine = Engine('tcp')
provider = Provider(engine=engine, provider_id=42,
                    config='{"database":{"type":"map"}}')
client = Client(engine=engine)
db = client.make_database_handle(address=engine.addr(), provider_id=42)

value = bytearray(16)

# Example 1: Handling missing keys
try:
    db.get(key="nonexistent_key", value=value)
except Exception as e:
    print(f"Expected error - key not found: {e}")

# Example 2: Checking before getting
if db.exists(key="safe_key"):
    db.get(key="safe_key", value=value)
else:
    print("Key doesn't exist")

# Example 3: Handling unsupported modes
# (This would fail if the backend doesn't support WAIT mode)
try:
    db.put(key="test", value="data")
    # Some backends may not support all modes
    # db.get(key="test", value=value, mode=mode.SOME_UNSUPPORTED_MODE)
    print("Mode check: using supported modes only")
except Exception as e:
    print(f"Mode not supported: {e}")

engine.finalize()

This Exception class has a code integer attribute that corresponds to one of the YOKAN_ERR_* error codes. These error codes are available as global variables in the mochi.yokan.exception module.

Note

The length method returns None instead of raising an exception when a key doesn’t exist. Similarly, length_multi returns None for missing keys in its result list. Use exists or check for None to handle missing keys gracefully.

Always wrap Yokan operations in try/except blocks to handle:

  • Missing keys (for operations like get and erase)

  • Network errors

  • Unsupported modes

  • Invalid configurations

Performance Tips

  1. Use batch operations when working with multiple keys to reduce network round-trips

  2. Use buffer protocol objects instead of strings for large binary data

  3. Preallocate buffers when retrieving multiple values to avoid allocations

  4. Choose appropriate batch sizes for list operations based on your data size

  5. Use YOKAN_MODE_NO_RDMA for small key/value pairs to avoid RDMA overhead

  6. Consider backend selection based on your workload (in-memory vs persistent)

  7. Use CONSUME mode when you need get-and-delete atomicity

Building Higher-Level APIs

The Python bindings are designed to be low-level and performance-oriented. We encourage users to build higher-level APIs for their use cases:

from mochi.margo import Engine
from mochi.yokan.exception import Exception
from mochi.yokan.server import Provider
from mochi.yokan.client import Client
import json
from typing import Any, Optional, Dict

class YokanDict:
    """
    Higher-level dictionary-like interface to Yokan.
    Provides Pythonic access with automatic JSON serialization.
    """

    def __init__(self, db, prefix: str = ""):
        self.db = db
        self.prefix = prefix

    def _make_key(self, key: str) -> str:
        """Add prefix to key."""
        return f"{self.prefix}{key}" if self.prefix else key

    def __setitem__(self, key: str, value: Any):
        """Set a value (automatically serializes to JSON)."""
        json_value = json.dumps(value)
        self.db.put(key=self._make_key(key), value=json_value)

    def __getitem__(self, key: str) -> Any:
        """Get a value (automatically deserializes from JSON)."""
        full_key = self._make_key(key)
        length = self.db.length(key=full_key)
        if length is None:
            raise KeyError(key)
        value = bytearray(length)
        self.db.get(key=full_key, value=value)
        return json.loads(value.decode())

    def __delitem__(self, key: str):
        """Delete a value."""
        try:
            self.db.erase(key=self._make_key(key))
        except Exception:
            raise KeyError(key)

    def __contains__(self, key: str) -> bool:
        """Check if key exists."""
        return self.db.exists(key=self._make_key(key))

    def get(self, key: str, default: Any = None) -> Any:
        """Get a value with default fallback."""
        try:
            return self[key]
        except KeyError:
            return default

    def update(self, items: Dict[str, Any]):
        """Update multiple items at once."""
        keyvals = [(self._make_key(k), json.dumps(v)) for k, v in items.items()]
        self.db.put_multi(keyvals)

    def clear(self):
        """Clear all items with this prefix."""
        # Note: This is a simplified implementation
        # A real implementation would use list operations
        pass


# Example usage
engine = Engine('tcp')
provider = Provider(engine=engine, provider_id=42,
                    config='{"database":{"type":"map"}}')
client = Client(engine=engine)
db = client.make_database_handle(address=engine.addr(), provider_id=42)

# Create a higher-level dictionary interface
yokan_dict = YokanDict(db, prefix="app:")

# Use it like a regular Python dictionary
yokan_dict["user:1"] = {"name": "Alice", "age": 30}
yokan_dict["user:2"] = {"name": "Bob", "age": 25}
yokan_dict["config"] = {"theme": "dark", "notifications": True}

print("Stored data using dict-like interface")

# Retrieve data
user1 = yokan_dict["user:1"]
print(f"User 1: {user1}")

# Check existence
if "config" in yokan_dict:
    config = yokan_dict["config"]
    print(f"Config: {config}")

# Get with default
user3 = yokan_dict.get("user:3", {"name": "Unknown", "age": 0})
print(f"User 3 (default): {user3}")

# Batch update
yokan_dict.update({
    "user:3": {"name": "Carol", "age": 35},
    "user:4": {"name": "Dave", "age": 40}
})
print("Batch updated 2 users")

engine.finalize()

This allows you to:

  • Add application-specific abstractions

  • Implement custom caching

  • Add type checking and validation

  • Create domain-specific interfaces