Bulk Write Operations#

This tutorial explains how to take advantage of MongoDB C driver bulk write operation features. Executing write operations in batches reduces the number of network round trips, increasing write throughput.

Bulk Insert#

First we need to fetch a bulk operation handle from the mongoc_collection_t.

mongoc_bulk_operation_t *bulk =
   mongoc_collection_create_bulk_operation_with_opts (collection, NULL);

We can now start inserting documents to the bulk operation. These will be buffered until we execute the operation.

The bulk operation will coalesce insertions as a single batch for each consecutive call to mongoc_bulk_operation_insert(). This creates a pipelined effect when possible.

To execute the bulk operation and receive the result we call mongoc_bulk_operation_execute().

bulk1.c#
#include <assert.h>
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk1 (mongoc_collection_t *collection)
{
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *doc;
   bson_t reply;
   char *str;
   bool ret;
   int i;

   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, NULL);

   for (i = 0; i < 10000; i++) {
      doc = BCON_NEW ("i", BCON_INT32 (i));
      mongoc_bulk_operation_insert (bulk, doc);
      bson_destroy (doc);
   }

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      fprintf (stderr, "Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = "mongodb://localhost/?appname=bulk1-example";
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, "test", "test");

   bulk1 (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

Example reply document:

{"nInserted"   : 10000,
 "nMatched"    : 0,
 "nModified"   : 0,
 "nRemoved"    : 0,
 "nUpserted"   : 0,
 "writeErrors" : []
 "writeConcernErrors" : [] }

Mixed Bulk Write Operations#

MongoDB C driver also supports executing mixed bulk write operations. A batch of insert, update, and remove operations can be executed together using the bulk write operations API.

Ordered Bulk Write Operations#

Ordered bulk write operations are batched and sent to the server in the order provided for serial execution. The reply document describes the type and count of operations performed.

bulk2.c#
#include <assert.h>
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk2 (mongoc_collection_t *collection)
{
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *query;
   bson_t *doc;
   bson_t *opts;
   bson_t reply;
   char *str;
   bool ret;
   int i;

   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, NULL);

   /* Remove everything */
   query = bson_new ();
   mongoc_bulk_operation_remove (bulk, query);
   bson_destroy (query);

   /* Add a few documents */
   for (i = 1; i < 4; i++) {
      doc = BCON_NEW ("_id", BCON_INT32 (i));
      mongoc_bulk_operation_insert (bulk, doc);
      bson_destroy (doc);
   }

   /* {_id: 1} => {$set: {foo: "bar"}} */
   query = BCON_NEW ("_id", BCON_INT32 (1));
   doc = BCON_NEW ("$set", "{", "foo", BCON_UTF8 ("bar"), "}");
   mongoc_bulk_operation_update_many_with_opts (bulk, query, doc, NULL, &error);
   bson_destroy (query);
   bson_destroy (doc);

   /* {_id: 4} => {'$inc': {'j': 1}} (upsert) */
   opts = BCON_NEW ("upsert", BCON_BOOL (true));
   query = BCON_NEW ("_id", BCON_INT32 (4));
   doc = BCON_NEW ("$inc", "{", "j", BCON_INT32 (1), "}");
   mongoc_bulk_operation_update_many_with_opts (bulk, query, doc, opts, &error);
   bson_destroy (query);
   bson_destroy (doc);
   bson_destroy (opts);

   /* replace {j:1} with {j:2} */
   query = BCON_NEW ("j", BCON_INT32 (1));
   doc = BCON_NEW ("j", BCON_INT32 (2));
   mongoc_bulk_operation_replace_one_with_opts (bulk, query, doc, NULL, &error);
   bson_destroy (query);
   bson_destroy (doc);

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = "mongodb://localhost/?appname=bulk2-example";
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, "test", "test");

   bulk2 (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

Example reply document:

{ "nInserted"   : 3,
  "nMatched"    : 2,
  "nModified"   : 2,
  "nRemoved"    : 10000,
  "nUpserted"   : 1,
  "upserted"    : [{"index" : 5, "_id" : 4}],
  "writeErrors" : []
  "writeConcernErrors" : [] }

The index field in the upserted array is the 0-based index of the upsert operation; in this example, the sixth operation of the overall bulk operation was an upsert, so its index is 5.

Unordered Bulk Write Operations#

Unordered bulk write operations are batched and sent to the server in arbitrary order where they may be executed in parallel. Any errors that occur are reported after all operations are attempted.

In the next example the first and third operations fail due to the unique constraint on _id. Since we are doing unordered execution the second and fourth operations succeed.

bulk3.c#
#include <assert.h>
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk3 (mongoc_collection_t *collection)
{
   bson_t opts = BSON_INITIALIZER;
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *query;
   bson_t *doc;
   bson_t reply;
   char *str;
   bool ret;

   /* false indicates unordered */
   BSON_APPEND_BOOL (&opts, "ordered", false);
   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, &opts);
   bson_destroy (&opts);

   /* Add a document */
   doc = BCON_NEW ("_id", BCON_INT32 (1));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   /* remove {_id: 2} */
   query = BCON_NEW ("_id", BCON_INT32 (2));
   mongoc_bulk_operation_remove_one (bulk, query);
   bson_destroy (query);

   /* insert {_id: 3} */
   doc = BCON_NEW ("_id", BCON_INT32 (3));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   /* replace {_id:4} {'i': 1} */
   query = BCON_NEW ("_id", BCON_INT32 (4));
   doc = BCON_NEW ("i", BCON_INT32 (1));
   mongoc_bulk_operation_replace_one (bulk, query, doc, false);
   bson_destroy (query);
   bson_destroy (doc);

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
   bson_destroy (&opts);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = "mongodb://localhost/?appname=bulk3-example";
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, "test", "test");

   bulk3 (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

Example reply document:

{ "nInserted"    : 0,
  "nMatched"     : 1,
  "nModified"    : 1,
  "nRemoved"     : 1,
  "nUpserted"    : 0,
  "writeErrors"  : [
    { "index"  : 0,
      "code"   : 11000,
      "errmsg" : "E11000 duplicate key error index: test.test.$_id_ dup key: { : 1 }" },
    { "index"  : 2,
      "code"   : 11000,
      "errmsg" : "E11000 duplicate key error index: test.test.$_id_ dup key: { : 3 }" } ],
  "writeConcernErrors" : [] }

Error: E11000 duplicate key error index: test.test.$_id_ dup key: { : 1 }

The bson_error_t domain is MONGOC_ERROR_COMMAND and its code is 11000.

Bulk Operation Bypassing Document Validation#

This feature is only available when using MongoDB 3.2 and later.

By default bulk operations are validated against the schema, if any is defined. In certain cases however it may be necessary to bypass the document validation.

bulk5.c#
#include <assert.h>
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk5_fail (mongoc_collection_t *collection)
{
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *doc;
   bson_t reply;
   char *str;
   bool ret;

   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, NULL);

   /* Two inserts */
   doc = BCON_NEW ("_id", BCON_INT32 (31));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   doc = BCON_NEW ("_id", BCON_INT32 (32));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   /* The above documents do not comply to the schema validation rules
    * we created previously, so this will result in an error */
   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
}

static void
bulk5_success (mongoc_collection_t *collection)
{
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *doc;
   bson_t reply;
   char *str;
   bool ret;

   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, NULL);

   /* Allow this document to bypass document validation.
    * NOTE: When authentication is enabled, the authenticated user must have
    * either the "dbadmin" or "restore" roles to bypass document validation */
   mongoc_bulk_operation_set_bypass_document_validation (bulk, true);

   /* Two inserts */
   doc = BCON_NEW ("_id", BCON_INT32 (31));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   doc = BCON_NEW ("_id", BCON_INT32 (32));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
}

int
main (void)
{
   bson_t *options;
   bson_error_t error;
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   mongoc_database_t *database;
   const char *uri_string = "mongodb://localhost/?appname=bulk5-example";
   mongoc_uri_t *uri;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   database = mongoc_client_get_database (client, "testasdf");

   /* Create schema validator */
   options = BCON_NEW (
      "validator", "{", "number", "{", "$gte", BCON_INT32 (5), "}", "}");
   collection =
      mongoc_database_create_collection (database, "collname", options, &error);

   if (collection) {
      bulk5_fail (collection);
      bulk5_success (collection);
      mongoc_collection_destroy (collection);
   } else {
      fprintf (stderr, "Couldn't create collection: '%s'\n", error.message);
   }

   bson_free (options);
   mongoc_uri_destroy (uri);
   mongoc_database_destroy (database);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

Running the above example will result in:

{ "nInserted" : 0,
  "nMatched" : 0,
  "nModified" : 0,
  "nRemoved" : 0,
  "nUpserted" : 0,
  "writeErrors" : [
    { "index" : 0,
      "code" : 121,
      "errmsg" : "Document failed validation" } ] }

Error: Document failed validation

{ "nInserted" : 2,
  "nMatched" : 0,
  "nModified" : 0,
  "nRemoved" : 0,
  "nUpserted" : 0,
  "writeErrors" : [] }

The bson_error_t domain is MONGOC_ERROR_COMMAND.

Bulk Operation Write Concerns#

By default bulk operations are executed with the write_concern of the collection they are executed against. A custom write concern can be passed to the mongoc_collection_create_bulk_operation_with_opts() method. Write concern errors (e.g. wtimeout) will be reported after all operations are attempted, regardless of execution order.

bulk4.c#
#include <assert.h>
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk4 (mongoc_collection_t *collection)
{
   bson_t opts = BSON_INITIALIZER;
   mongoc_write_concern_t *wc;
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *doc;
   bson_t reply;
   char *str;
   bool ret;

   wc = mongoc_write_concern_new ();
   mongoc_write_concern_set_w (wc, 4);
   mongoc_write_concern_set_wtimeout_int64 (wc, 100); /* milliseconds */
   mongoc_write_concern_append (wc, &opts);

   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, &opts);

   /* Two inserts */
   doc = BCON_NEW ("_id", BCON_INT32 (10));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   doc = BCON_NEW ("_id", BCON_INT32 (11));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
   mongoc_write_concern_destroy (wc);
   bson_destroy (&opts);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = "mongodb://localhost/?appname=bulk4-example";
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, "test", "test");

   bulk4 (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

Example reply document and error message:

{ "nInserted"    : 2,
  "nMatched"     : 0,
  "nModified"    : 0,
  "nRemoved"     : 0,
  "nUpserted"    : 0,
  "writeErrors"  : [],
  "writeConcernErrors" : [
    { "code"   : 64,
      "errmsg" : "waiting for replication timed out" }
] }

Error: waiting for replication timed out

The bson_error_t domain is MONGOC_ERROR_WRITE_CONCERN if there are write concern errors and no write errors. Write errors indicate failed operations, so they take precedence over write concern errors, which mean merely that the write concern is not satisfied yet.

Setting Collation Order#

This feature is only available when using MongoDB 3.4 and later.

bulk-collation.c#
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk_collation (mongoc_collection_t *collection)
{
   mongoc_bulk_operation_t *bulk;
   bson_t *opts;
   bson_t *doc;
   bson_t *selector;
   bson_t *update;
   bson_error_t error;
   bson_t reply;
   char *str;
   uint32_t ret;

   /* insert {_id: "one"} and {_id: "One"} */
   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, NULL);
   doc = BCON_NEW ("_id", BCON_UTF8 ("one"));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   doc = BCON_NEW ("_id", BCON_UTF8 ("One"));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   /* "One" normally sorts before "one"; make "one" come first */
   opts = BCON_NEW ("collation",
                    "{",
                    "locale",
                    BCON_UTF8 ("en_US"),
                    "caseFirst",
                    BCON_UTF8 ("lower"),
                    "}");

   /* set x=1 on the document with _id "One", which now sorts after "one" */
   update = BCON_NEW ("$set", "{", "x", BCON_INT64 (1), "}");
   selector = BCON_NEW ("_id", "{", "$gt", BCON_UTF8 ("one"), "}");
   mongoc_bulk_operation_update_one_with_opts (
      bulk, selector, update, opts, &error);

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   bson_destroy (update);
   bson_destroy (selector);
   bson_destroy (opts);
   mongoc_bulk_operation_destroy (bulk);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = "mongodb://localhost/?appname=bulk-collation";
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, "db", "collection");
   bulk_collation (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

Running the above example will result in:

{ "nInserted" : 2,
   "nMatched" : 1,
   "nModified" : 1,
   "nRemoved" : 0,
   "nUpserted" : 0,
   "writeErrors" : [  ]
}

Unacknowledged Bulk Writes#

Set “w” to zero for an unacknowledged write. The driver sends unacknowledged writes using the legacy opcodes OP_INSERT, OP_UPDATE, and OP_DELETE.

bulk6.c#
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
bulk6 (mongoc_collection_t *collection)
{
   bson_t opts = BSON_INITIALIZER;
   mongoc_write_concern_t *wc;
   mongoc_bulk_operation_t *bulk;
   bson_error_t error;
   bson_t *doc;
   bson_t *selector;
   bson_t reply;
   char *str;
   bool ret;

   wc = mongoc_write_concern_new ();
   mongoc_write_concern_set_w (wc, 0);
   mongoc_write_concern_append (wc, &opts);

   bulk = mongoc_collection_create_bulk_operation_with_opts (collection, &opts);

   doc = BCON_NEW ("_id", BCON_INT32 (10));
   mongoc_bulk_operation_insert (bulk, doc);
   bson_destroy (doc);

   selector = BCON_NEW ("_id", BCON_INT32 (11));
   mongoc_bulk_operation_remove_one (bulk, selector);
   bson_destroy (selector);

   ret = mongoc_bulk_operation_execute (bulk, &reply, &error);

   str = bson_as_canonical_extended_json (&reply, NULL);
   printf ("%s\n", str);
   bson_free (str);

   if (!ret) {
      printf ("Error: %s\n", error.message);
   }

   bson_destroy (&reply);
   mongoc_bulk_operation_destroy (bulk);
   mongoc_write_concern_destroy (wc);
   bson_destroy (&opts);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = "mongodb://localhost/?appname=bulk6-example";
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               "failed to parse URI: %s\n"
               "error message:       %s\n",
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, "test", "test");

   bulk6 (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

The reply document is empty:

{ }

Further Reading#

See the Driver Bulk API Spec, which describes bulk write operations for all MongoDB drivers.