Document storage

Yokan provides a second set of client functions to use a database as a document store. Contrary to a key/value store, a document store associates values (or “documents”) with a unique, 64-bit identifier chosen by the database, and incrementally increasing from 0. Documents are organized into named collections within a database.

All the Yokan backends that are sorted offer a document-storage interface on top of them.

Important

It is recommended not to use both the document storage and the key/value storage interface on the same database. Since the document storage functionalities rely on storing key/value pairs, modifying without a good understanding of how Yokan maps documents to key/value pairs could corrupt the document storage.

Manipulating collections

The following code shows how to create a collection, check whether a collection exists, get the collection size, the last identifier, and drop a collection.

client.c (show/hide)

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <margo.h>
#include <yokan/client.h>
#include <yokan/database.h>
#include <yokan/collection.h>

int main(int argc, char** argv)
{
    if(argc != 3) {
        fprintf(stderr, "Usage: %s <address> <provider id>\n", argv[0]);
        exit(-1);
    }
    margo_instance_id mid = margo_init("na+sm", MARGO_CLIENT_MODE, 0, 0);
    assert(mid);

    uint16_t provider_id = atoi(argv[2]);
    hg_addr_t server_addr = HG_ADDR_NULL;
    hg_return_t hret = margo_addr_lookup(mid, argv[1], &server_addr);
    assert(hret == HG_SUCCESS);

    yk_return_t ret;
    yk_client_t client = YOKAN_CLIENT_NULL;

    ret = yk_client_init(mid, &client);
    assert(ret == YOKAN_SUCCESS);

    yk_database_handle_t db_handle = YOKAN_DATABASE_HANDLE_NULL;
    ret = yk_database_handle_create(
        client, server_addr, provider_id, true, &db_handle);
    assert(ret == YOKAN_SUCCESS);

    /* create a collection in the database */
    ret = yk_collection_create(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT);
    assert(ret == YOKAN_SUCCESS);

    /* check that a collection exists */
    uint8_t flag;
    ret = yk_collection_exists(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, &flag);
    assert(ret == YOKAN_SUCCESS);
    assert(flag);

    /* get the size of the collection */
    size_t size;
    ret = yk_collection_size(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, &size);
    assert(ret == YOKAN_SUCCESS);
    assert(size == 0);

    /* get the last id from the collection
     * (i.e. the id the next stored document will have)
     */
    yk_id_t last_id;
    ret = yk_collection_last_id(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, &last_id);
    assert(ret == YOKAN_SUCCESS);
    assert(size == 0);

    /* drop the collection */
    ret = yk_collection_drop(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_database_handle_release(db_handle);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_client_finalize(client);
    assert(ret == YOKAN_SUCCESS);

    margo_finalize(mid);

    return 0;
}

These functions are extensively documented in the yokan/collection.h header. Contrary to database creation and management, collection creation and management functions are available to the client library.

Accessing single documents

The following code shows how to store, load, update, get the length of, and erase a document.

client.c (show/hide)

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <margo.h>
#include <yokan/client.h>
#include <yokan/database.h>
#include <yokan/collection.h>

int main(int argc, char** argv)
{
    if(argc != 3) {
        fprintf(stderr, "Usage: %s <address> <provider id>\n", argv[0]);
        exit(-1);
    }
    margo_instance_id mid = margo_init("na+sm", MARGO_CLIENT_MODE, 0, 0);
    assert(mid);

    uint16_t provider_id = atoi(argv[2]);
    hg_addr_t server_addr = HG_ADDR_NULL;
    hg_return_t hret = margo_addr_lookup(mid, argv[1], &server_addr);
    assert(hret == HG_SUCCESS);

    yk_return_t ret;
    yk_client_t client = YOKAN_CLIENT_NULL;

    ret = yk_client_init(mid, &client);
    assert(ret == YOKAN_SUCCESS);

    yk_database_handle_t db_handle = YOKAN_DATABASE_HANDLE_NULL;
    ret = yk_database_handle_create(
        client, server_addr, provider_id, true, &db_handle);
    assert(ret == YOKAN_SUCCESS);

    /* create a collection in the database */
    ret = yk_collection_create(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT);
    assert(ret == YOKAN_SUCCESS);

    const char* document = "This is a document";
    size_t doc_size = strlen(document);

    /* store the document, getting a yk_id_t back */
    yk_id_t id;
    ret = yk_doc_store(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, document, doc_size, &id);
    assert(ret == YOKAN_SUCCESS);
    printf("Document has id %lu\n", id);

    /* load the document back */
    char buffer[128];
    memset(buffer, 0, 128);
    size_t buf_size = 128;
    ret = yk_doc_load(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, id, buffer, &buf_size);
    assert(ret == YOKAN_SUCCESS);
    assert(strcmp(document, buffer) == 0);

    /* get the length of a document */
    size_t length;
    ret = yk_doc_length(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, id, &length);
    assert(ret == YOKAN_SUCCESS);
    assert(length == doc_size);

    /* update the content of the document */
    const char* updated_document = "Updated document";
    doc_size = strlen(updated_document);
    ret = yk_doc_update(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, id, updated_document, doc_size);
    assert(ret == YOKAN_SUCCESS);

    /* erase a document */
    ret = yk_doc_erase(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, id);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_database_handle_release(db_handle);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_client_finalize(client);
    assert(ret == YOKAN_SUCCESS);

    margo_finalize(mid);

    return 0;
}

The yk_id_t type is typedef-ed as a uint64_t.

Note

For small documents (less than a few KB), it is recommended to use the YOKAN_MODE_NO_RDMA mode, which will pack the document into the RPC message instead of relying on RDMA.

Note

Just like in the key/value storage interface, if you have multiple documents to access, we recommend using the functions bellow, which are designed to work on batches of documents.

Accessing multiple documents

The following shows how to access a batch of documents when the buffers used for each document are not contiguous in memory.

client-multi.c (show/hide)

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <margo.h>
#include <yokan/client.h>
#include <yokan/database.h>
#include <yokan/collection.h>

int main(int argc, char** argv)
{
    if(argc != 3) {
        fprintf(stderr, "Usage: %s <address> <provider id>\n", argv[0]);
        exit(-1);
    }
    margo_instance_id mid = margo_init("na+sm", MARGO_CLIENT_MODE, 0, 0);
    assert(mid);

    uint16_t provider_id = atoi(argv[2]);
    hg_addr_t server_addr = HG_ADDR_NULL;
    hg_return_t hret = margo_addr_lookup(mid, argv[1], &server_addr);
    assert(hret == HG_SUCCESS);

    yk_return_t ret;
    yk_client_t client = YOKAN_CLIENT_NULL;

    ret = yk_client_init(mid, &client);
    assert(ret == YOKAN_SUCCESS);

    yk_database_handle_t db_handle = YOKAN_DATABASE_HANDLE_NULL;
    ret = yk_database_handle_create(
        client, server_addr, provider_id, true, &db_handle);
    assert(ret == YOKAN_SUCCESS);

    /* create a collection in the database */
    ret = yk_collection_create(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT);
    assert(ret == YOKAN_SUCCESS);

    const char* docs[4] = {
        "This is a document",
        "This is another one",
        "And another one",
        "And the last"};
    size_t doc_sizes[4];
    for(unsigned i = 0; i < 4; i++)
        doc_sizes[i] = strlen(docs[i]);

    /* store the documents, getting yk_id_t back */
    yk_id_t ids[4];
    ret = yk_doc_store_multi(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 4, (const void * const*)docs,
            doc_sizes, ids);
    assert(ret == YOKAN_SUCCESS);

    /* load the documents back */
    char* buffers[4];
    size_t buf_sizes[4];
    for(unsigned i = 0; i < 4; i++) {
        buffers[i] = calloc(1, 128);
        buf_sizes[i] = 128;
    }
    ret = yk_doc_load_multi(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 4, ids, (void * const*)buffers, buf_sizes);
    assert(ret == YOKAN_SUCCESS);

    /* get the length of a bunch of documents */
    size_t lengths[4];
    ret = yk_doc_length_multi(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 4, ids, lengths);
    assert(ret == YOKAN_SUCCESS);

    /* update the content of the document */
    const char* updated_docs[2] = {
        "Updated first document",
        "New third document"
    };
    yk_id_t ids_to_update[2] = { 0, 2 };
    size_t updated_doc_sizes[2] = { strlen(updated_docs[0]), strlen(updated_docs[1]) };
    ret = yk_doc_update_multi(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 2, ids_to_update,
            (const void * const*)updated_docs, updated_doc_sizes);
    assert(ret == YOKAN_SUCCESS);

    /* erase a document */
    yk_id_t ids_to_erase[3] = {0, 1, 3};
    ret = yk_doc_erase_multi(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 3, ids_to_erase);
    assert(ret == YOKAN_SUCCESS);

    /* list documents */
    buf_sizes[0] = 128;
    buf_sizes[1] = 128;
    yk_id_t listed_ids[2];
    ret = yk_doc_list(db_handle, "my_collection",
            YOKAN_MODE_INCLUSIVE, 1, NULL, 0, 2,
            listed_ids, (void * const*)buffers, buf_sizes);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_database_handle_release(db_handle);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_client_finalize(client);
    assert(ret == YOKAN_SUCCESS);

    margo_finalize(mid);

    return 0;
}

A second set of functions is provided to work with documents packed contiguously in memory. We strongly advise to use these “packed” functions for better performance.

client-packed.c (show/hide)

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <margo.h>
#include <yokan/client.h>
#include <yokan/database.h>
#include <yokan/collection.h>

int main(int argc, char** argv)
{
    if(argc != 3) {
        fprintf(stderr, "Usage: %s <address> <provider id>\n", argv[0]);
        exit(-1);
    }
    margo_instance_id mid = margo_init("na+sm", MARGO_CLIENT_MODE, 0, 0);
    assert(mid);

    uint16_t provider_id = atoi(argv[2]);
    hg_addr_t server_addr = HG_ADDR_NULL;
    hg_return_t hret = margo_addr_lookup(mid, argv[1], &server_addr);
    assert(hret == HG_SUCCESS);

    yk_return_t ret;
    yk_client_t client = YOKAN_CLIENT_NULL;

    ret = yk_client_init(mid, &client);
    assert(ret == YOKAN_SUCCESS);

    yk_database_handle_t db_handle = YOKAN_DATABASE_HANDLE_NULL;
    ret = yk_database_handle_create(
        client, server_addr, provider_id, true, &db_handle);
    assert(ret == YOKAN_SUCCESS);

    /* create a collection in the database */
    ret = yk_collection_create(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT);
    assert(ret == YOKAN_SUCCESS);

    const char* docs =
        "This is a document"
        "This is another one"
        "And another one"
        "And the last";
    size_t doc_sizes[4] = {18, 19, 15, 13};

    /* store the documents, getting yk_id_t back */
    yk_id_t ids[4];
    ret = yk_doc_store_packed(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 4, (const void*)docs,
            doc_sizes, ids);
    assert(ret == YOKAN_SUCCESS);

    /* load the documents back */
    char* buffer[4*128];
    size_t buf_sizes[4];
    ret = yk_doc_load_packed(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 4, ids, 4*128, (void*)buffer, buf_sizes);
    assert(ret == YOKAN_SUCCESS);

    /* update the content of the document */
    const char* updated_docs =
        "Updated first document"
        "New third document";
    size_t updated_doc_sizes[2] = {22, 18};
    yk_id_t ids_to_update[2] = { 0, 2 };
    ret = yk_doc_update_packed(db_handle, "my_collection",
            YOKAN_MODE_DEFAULT, 2, ids_to_update,
            (const void*)updated_docs, updated_doc_sizes);
    assert(ret == YOKAN_SUCCESS);

    /* list documents */
    yk_id_t listed_ids[2];
    ret = yk_doc_list_packed(db_handle, "my_collection",
            YOKAN_MODE_INCLUSIVE, 1, NULL, 0, 2,
            listed_ids, 4*128, (void*)buffer, buf_sizes);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_database_handle_release(db_handle);
    assert(ret == YOKAN_SUCCESS);

    ret = yk_client_finalize(client);
    assert(ret == YOKAN_SUCCESS);

    margo_finalize(mid);

    return 0;
}

Important

The YOKAN_MODE_NO_RDMA will not work with the _multi version of these functions, but will work with the _packed version. This mode may be more efficient, in particular when documents are small or when the server is facing a high degree of concurrency.