private String getCollectionFromNamespace(String namespace) {
   if (namespace.startsWith(definition.getMongoDb())
       && CharMatcher.is('.').countIn(namespace) == 1) {
     return namespace.substring(definition.getMongoDb().length() + 1);
   }
   logger.info("Cannot get collection from namespace [{}]", namespace);
   return null;
 }
 private DBObject applyFieldFilter(DBObject object) {
   if (object instanceof GridFSFile) {
     GridFSFile file = (GridFSFile) object;
     DBObject metadata = file.getMetaData();
     if (metadata != null) {
       file.setMetaData(applyFieldFilter(metadata));
     }
   } else {
     object = MongoDBHelper.applyExcludeFields(object, definition.getExcludeFields());
     object = MongoDBHelper.applyIncludeFields(object, definition.getIncludeFields());
   }
   return object;
 }
 private void isRiverStale(DBCursor cursor, Timestamp<?> time) throws SlurperException {
   if (cursor == null || time == null) {
     return;
   }
   if (definition.getInitialTimestamp() != null && time.equals(definition.getInitialTimestamp())) {
     return;
   }
   DBObject entry = cursor.next();
   Timestamp<?> oplogTimestamp = Timestamp.on(entry);
   if (!time.equals(oplogTimestamp)) {
     MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.RIVER_STALE);
     throw new SlurperException("River out of sync with oplog.rs collection");
   }
 }
 private void processAdminCommandOplogEntry(
     final DBObject entry, final Timestamp<?> startTimestamp) throws InterruptedException {
   if (logger.isTraceEnabled()) {
     logger.trace("processAdminCommandOplogEntry - [{}]", entry);
   }
   DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT);
   if (definition.isImportAllCollections()) {
     if (object.containsField(MongoDBRiver.OPLOG_RENAME_COLLECTION_COMMAND_OPERATION)
         && object.containsField(MongoDBRiver.OPLOG_TO)) {
       String to = object.get(MongoDBRiver.OPLOG_TO).toString();
       if (to.startsWith(definition.getMongoDb())) {
         String newCollection = getCollectionFromNamespace(to);
         DBCollection coll = slurpedDb.getCollection(newCollection);
         doInitialImport(coll);
       }
     }
   }
 }
  private void addToStream(
      final Operation operation,
      final Timestamp<?> currentTimestamp,
      final DBObject data,
      final String collection)
      throws InterruptedException {
    if (logger.isTraceEnabled()) {
      logger.trace(
          "addToStream - operation [{}], currentTimestamp [{}], data [{}], collection [{}]",
          operation,
          currentTimestamp,
          data,
          collection);
    }

    if (operation == Operation.DROP_DATABASE) {
      if (definition.isImportAllCollections()) {
        for (String name : slurpedDb.getCollectionNames()) {
          context
              .getStream()
              .put(
                  new MongoDBRiver.QueueEntry(
                      currentTimestamp, Operation.DROP_COLLECTION, data, name));
        }
      } else {
        context
            .getStream()
            .put(
                new MongoDBRiver.QueueEntry(
                    currentTimestamp, Operation.DROP_COLLECTION, data, collection));
      }
    } else {
      context
          .getStream()
          .put(new MongoDBRiver.QueueEntry(currentTimestamp, operation, data, collection));
    }
  }
 public Slurper(
     List<ServerAddress> mongoServers,
     MongoDBRiverDefinition definition,
     SharedContext context,
     Client client) {
   this.definition = definition;
   this.context = context;
   this.client = client;
   this.mongo = new MongoClient(mongoServers, definition.getMongoClientOptions());
   this.findKeys = new BasicDBObject();
   this.gridfsOplogNamespace =
       definition.getMongoOplogNamespace() + MongoDBRiver.GRIDFS_FILES_SUFFIX;
   this.cmdOplogNamespace = definition.getMongoDb() + "." + MongoDBRiver.OPLOG_NAMESPACE_COMMAND;
   if (definition.getExcludeFields() != null) {
     for (String key : definition.getExcludeFields()) {
       findKeys.put(key, 0);
     }
   } else if (definition.getIncludeFields() != null) {
     for (String key : definition.getIncludeFields()) {
       findKeys.put(key, 1);
     }
   }
 }
  @Override
  public void run() {
    while (context.getStatus() == Status.RUNNING) {
      try {
        if (!assignCollections()) {
          break; // failed to assign oplogCollection or
          // slurpedCollection
        }

        Timestamp<?> startTimestamp = null;
        if (!definition.isSkipInitialImport()) {
          if (!riverHasIndexedFromOplog() && definition.getInitialTimestamp() == null) {
            if (!isIndexEmpty()) {
              MongoDBRiverHelper.setRiverStatus(
                  client, definition.getRiverName(), Status.INITIAL_IMPORT_FAILED);
              break;
            }
            if (definition.isImportAllCollections()) {
              for (String name : slurpedDb.getCollectionNames()) {
                DBCollection collection = slurpedDb.getCollection(name);
                startTimestamp = doInitialImport(collection);
              }
            } else {
              DBCollection collection = slurpedDb.getCollection(definition.getMongoCollection());
              startTimestamp = doInitialImport(collection);
            }
          }
        } else {
          logger.info("Skip initial import from collection {}", definition.getMongoCollection());
        }

        // Slurp from oplog
        DBCursor cursor = null;
        try {
          cursor = oplogCursor(startTimestamp);
          if (cursor == null) {
            cursor = processFullOplog();
          }
          while (cursor.hasNext()) {
            DBObject item = cursor.next();
            startTimestamp = processOplogEntry(item, startTimestamp);
          }
          logger.debug("Before waiting for 500 ms");
          Thread.sleep(500);
        } catch (MongoException.CursorNotFound e) {
          logger.info(
              "Cursor {} has been closed. About to open a new cusor.", cursor.getCursorId());
          logger.debug("Total document inserted [{}]", totalDocuments.get());
        } catch (SlurperException sEx) {
          logger.warn("Exception in slurper", sEx);
          break;
        } catch (Exception ex) {
          logger.warn("Exception while looping in cursor", ex);
          Thread.currentThread().interrupt();
          break;
        } finally {
          if (cursor != null) {
            logger.trace("Closing oplog cursor");
            cursor.close();
          }
        }
      } catch (MongoInterruptedException mIEx) {
        logger.warn("Mongo driver has been interrupted", mIEx);
        if (mongo != null) {
          mongo.close();
          mongo = null;
        }
        Thread.currentThread().interrupt();
        break;
      } catch (MongoException e) {
        logger.error("Mongo gave an exception", e);
        try {
          Thread.sleep(5000);
        } catch (InterruptedException iEx) {
        }
      } catch (NoSuchElementException e) {
        logger.warn("A mongoDB cursor bug ?", e);
      } catch (InterruptedException e) {
        logger.info("river-mongodb slurper interrupted");
        Thread.currentThread().interrupt();
        break;
      }
    }
  }
 private String addInsertToStream(final Timestamp<?> currentTimestamp, final DBObject data)
     throws InterruptedException {
   return addInsertToStream(currentTimestamp, data, definition.getMongoCollection());
 }
  private boolean isValidOplogEntry(final DBObject entry, final Timestamp<?> startTimestamp) {
    if (MongoDBRiver.OPLOG_NOOP_OPERATION.equals(entry.get(MongoDBRiver.OPLOG_OPERATION))) {
      logger.debug("[No-op Oplog Entry] - can be ignored. {}", entry);
      return false;
    }
    String namespace = (String) entry.get(MongoDBRiver.OPLOG_NAMESPACE);
    // Initial support for sharded collection -
    // https://jira.mongodb.org/browse/SERVER-4333
    // Not interested in operation from migration or sharding
    if (entry.containsField(MongoDBRiver.OPLOG_FROM_MIGRATE)
        && ((BasicBSONObject) entry).getBoolean(MongoDBRiver.OPLOG_FROM_MIGRATE)) {
      logger.debug(
          "[Invalid Oplog Entry] - from migration or sharding operation. Can be ignored. {}",
          entry);
      return false;
    }
    // Not interested by chunks - skip all
    if (namespace.endsWith(MongoDBRiver.GRIDFS_CHUNKS_SUFFIX)) {
      return false;
    }

    if (startTimestamp != null) {
      Timestamp<?> oplogTimestamp = Timestamp.on(entry);
      if (Timestamp.compare(oplogTimestamp, startTimestamp) < 0) {
        logger.debug(
            "[Invalid Oplog Entry] - entry timestamp [{}] before startTimestamp [{}]",
            entry,
            startTimestamp);
        return false;
      }
    }

    boolean validNamespace = false;
    if (definition.isMongoGridFS()) {
      validNamespace = gridfsOplogNamespace.equals(namespace);
    } else {
      if (definition.isImportAllCollections()) {
        // Skip temp entry generated by map / reduce
        if (namespace.startsWith(definition.getMongoDb())
            && !namespace.startsWith(definition.getMongoDb() + ".tmp.mr")) {
          validNamespace = true;
        }
      } else {
        if (definition.getMongoOplogNamespace().equals(namespace)) {
          validNamespace = true;
        }
      }
      if (cmdOplogNamespace.equals(namespace)) {
        validNamespace = true;
      }

      if (MongoDBRiver.OPLOG_ADMIN_COMMAND.equals(namespace)) {
        validNamespace = true;
      }
    }
    if (!validNamespace) {
      logger.debug("[Invalid Oplog Entry] - namespace [{}] is not valid", namespace);
      return false;
    }
    String operation = (String) entry.get(MongoDBRiver.OPLOG_OPERATION);
    if (!oplogOperations.contains(operation)) {
      logger.debug("[Invalid Oplog Entry] - operation [{}] is not valid", operation);
      return false;
    }

    // TODO: implement a better solution
    if (definition.getMongoOplogFilter() != null) {
      DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT);
      BasicDBObject filter = definition.getMongoOplogFilter();
      if (!filterMatch(filter, object)) {
        logger.debug(
            "[Invalid Oplog Entry] - filter [{}] does not match object [{}]", filter, object);
        return false;
      }
    }
    return true;
  }
  private Timestamp<?> processOplogEntry(final DBObject entry, final Timestamp<?> startTimestamp)
      throws InterruptedException {
    // To support transactions, TokuMX wraps one or more operations in a single oplog entry, in a
    // list.
    // As long as clients are not transaction-aware, we can pretty safely assume there will only be
    // one operation in the list.
    // Supporting genuine multi-operation transactions will require a bit more logic here.
    flattenOps(entry);

    if (!isValidOplogEntry(entry, startTimestamp)) {
      return startTimestamp;
    }
    Operation operation = Operation.fromString(entry.get(MongoDBRiver.OPLOG_OPERATION).toString());
    String namespace = entry.get(MongoDBRiver.OPLOG_NAMESPACE).toString();
    String collection = null;
    Timestamp<?> oplogTimestamp = Timestamp.on(entry);
    DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT);

    if (definition.isImportAllCollections()) {
      if (namespace.startsWith(definition.getMongoDb()) && !namespace.equals(cmdOplogNamespace)) {
        collection = getCollectionFromNamespace(namespace);
      }
    } else {
      collection = definition.getMongoCollection();
    }

    if (namespace.equals(cmdOplogNamespace)) {
      if (object.containsField(MongoDBRiver.OPLOG_DROP_COMMAND_OPERATION)) {
        operation = Operation.DROP_COLLECTION;
        if (definition.isImportAllCollections()) {
          collection = object.get(MongoDBRiver.OPLOG_DROP_COMMAND_OPERATION).toString();
          if (collection.startsWith("tmp.mr.")) {
            return startTimestamp;
          }
        }
      }
      if (object.containsField(MongoDBRiver.OPLOG_DROP_DATABASE_COMMAND_OPERATION)) {
        operation = Operation.DROP_DATABASE;
      }
    }

    logger.trace("namespace: {} - operation: {}", namespace, operation);
    if (namespace.equals(MongoDBRiver.OPLOG_ADMIN_COMMAND)) {
      if (operation == Operation.COMMAND) {
        processAdminCommandOplogEntry(entry, startTimestamp);
        return startTimestamp;
      }
    }

    if (logger.isTraceEnabled()) {
      logger.trace("MongoDB object deserialized: {}", object.toString());
      logger.trace("collection: {}", collection);
      logger.trace("oplog entry - namespace [{}], operation [{}]", namespace, operation);
      logger.trace("oplog processing item {}", entry);
    }

    String objectId = getObjectIdFromOplogEntry(entry);
    if (operation == Operation.DELETE) {
      // Include only _id in data, as vanilla MongoDB does, so transformation scripts won't be
      // broken by Toku
      if (object.containsField(MongoDBRiver.MONGODB_ID_FIELD)) {
        if (object.keySet().size() > 1) {
          entry.put(
              MongoDBRiver.OPLOG_OBJECT,
              object = new BasicDBObject(MongoDBRiver.MONGODB_ID_FIELD, objectId));
        }
      } else {
        throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD);
      }
    }

    if (definition.isMongoGridFS()
        && namespace.endsWith(MongoDBRiver.GRIDFS_FILES_SUFFIX)
        && (operation == Operation.INSERT || operation == Operation.UPDATE)) {
      if (objectId == null) {
        throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD);
      }
      GridFS grid = new GridFS(mongo.getDB(definition.getMongoDb()), collection);
      GridFSDBFile file = grid.findOne(new ObjectId(objectId));
      if (file != null) {
        logger.info("Caught file: {} - {}", file.getId(), file.getFilename());
        object = file;
      } else {
        logger.warn("Cannot find file from id: {}", objectId);
      }
    }

    if (object instanceof GridFSDBFile) {
      if (objectId == null) {
        throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD);
      }
      if (logger.isTraceEnabled()) {
        logger.trace("Add attachment: {}", objectId);
      }
      addToStream(operation, oplogTimestamp, applyFieldFilter(object), collection);
    } else {
      if (operation == Operation.UPDATE) {
        DBObject update = (DBObject) entry.get(MongoDBRiver.OPLOG_UPDATE);
        logger.debug("Updated item: {}", update);
        addQueryToStream(operation, oplogTimestamp, update, collection);
      } else {
        if (operation == Operation.INSERT) {
          addInsertToStream(oplogTimestamp, applyFieldFilter(object), collection);
        } else {
          addToStream(operation, oplogTimestamp, applyFieldFilter(object), collection);
        }
      }
    }
    return oplogTimestamp;
  }
  protected boolean assignCollections() {
    DB adminDb = mongo.getDB(MongoDBRiver.MONGODB_ADMIN_DATABASE);
    oplogDb = mongo.getDB(MongoDBRiver.MONGODB_LOCAL_DATABASE);

    if (!definition.getMongoAdminUser().isEmpty()
        && !definition.getMongoAdminPassword().isEmpty()) {
      logger.info(
          "Authenticate {} with {}",
          MongoDBRiver.MONGODB_ADMIN_DATABASE,
          definition.getMongoAdminUser());

      CommandResult cmd =
          adminDb.authenticateCommand(
              definition.getMongoAdminUser(), definition.getMongoAdminPassword().toCharArray());
      if (!cmd.ok()) {
        logger.error(
            "Autenticatication failed for {}: {}",
            MongoDBRiver.MONGODB_ADMIN_DATABASE,
            cmd.getErrorMessage());
        // Can still try with mongoLocal credential if provided.
        // return false;
      }
      oplogDb = adminDb.getMongo().getDB(MongoDBRiver.MONGODB_LOCAL_DATABASE);
    }

    if (!definition.getMongoLocalUser().isEmpty()
        && !definition.getMongoLocalPassword().isEmpty()
        && !oplogDb.isAuthenticated()) {
      logger.info(
          "Authenticate {} with {}",
          MongoDBRiver.MONGODB_LOCAL_DATABASE,
          definition.getMongoLocalUser());
      CommandResult cmd =
          oplogDb.authenticateCommand(
              definition.getMongoLocalUser(), definition.getMongoLocalPassword().toCharArray());
      if (!cmd.ok()) {
        logger.error(
            "Autenticatication failed for {}: {}",
            MongoDBRiver.MONGODB_LOCAL_DATABASE,
            cmd.getErrorMessage());
        return false;
      }
    }

    Set<String> collections = oplogDb.getCollectionNames();
    if (!collections.contains(MongoDBRiver.OPLOG_COLLECTION)) {
      logger.error(
          "Cannot find "
              + MongoDBRiver.OPLOG_COLLECTION
              + " collection. Please check this link: http://goo.gl/2x5IW");
      return false;
    }
    oplogCollection = oplogDb.getCollection(MongoDBRiver.OPLOG_COLLECTION);

    slurpedDb = mongo.getDB(definition.getMongoDb());
    if (!definition.getMongoAdminUser().isEmpty()
        && !definition.getMongoAdminPassword().isEmpty()
        && adminDb.isAuthenticated()) {
      slurpedDb = adminDb.getMongo().getDB(definition.getMongoDb());
    }

    // Not necessary as local user has access to all databases.
    // http://docs.mongodb.org/manual/reference/local-database/
    // if (!mongoDbUser.isEmpty() && !mongoDbPassword.isEmpty()
    // && !slurpedDb.isAuthenticated()) {
    // logger.info("Authenticate {} with {}", mongoDb, mongoDbUser);
    // CommandResult cmd = slurpedDb.authenticateCommand(mongoDbUser,
    // mongoDbPassword.toCharArray());
    // if (!cmd.ok()) {
    // logger.error("Authentication failed for {}: {}",
    // mongoDb, cmd.getErrorMessage());
    // return false;
    // }
    // }
    // slurpedCollection =
    // slurpedDb.getCollection(definition.getMongoCollection());
    // if (definition.isImportAllCollections()) {
    // for (String collection : slurpedDb.getCollectionNames()) {
    // slurpedCollections.put(collection,
    // slurpedDb.getCollection(collection));
    // }
    // } else {
    // slurpedCollections.put(definition.getMongoCollection(),
    // slurpedDb.getCollection(definition.getMongoCollection()));
    // }

    return true;
  }
  /**
   * Does an initial sync the same way MongoDB does. https://groups.google.com/
   * forum/?fromgroups=#!topic/mongodb-user/sOKlhD_E2ns
   *
   * @return the last oplog timestamp before the import began
   * @throws InterruptedException if the blocking queue stream is interrupted while waiting
   */
  protected Timestamp<?> doInitialImport(DBCollection collection) throws InterruptedException {
    // TODO: ensure the index type is empty
    // DBCollection slurpedCollection =
    // slurpedDb.getCollection(definition.getMongoCollection());

    logger.info("MongoDBRiver is beginning initial import of " + collection.getFullName());
    Timestamp<?> startTimestamp = getCurrentOplogTimestamp();
    boolean inProgress = true;
    String lastId = null;
    while (inProgress) {
      DBCursor cursor = null;
      try {
        if (definition.isDisableIndexRefresh()) {
          updateIndexRefresh(definition.getIndexName(), -1L);
        }
        if (!definition.isMongoGridFS()) {
          logger.info("Collection {} - count: {}", collection.getName(), collection.count());
          long count = 0;
          cursor =
              collection.find(
                  getFilterForInitialImport(definition.getMongoCollectionFilter(), lastId));
          while (cursor.hasNext()) {
            DBObject object = cursor.next();
            count++;
            if (cursor.hasNext()) {
              lastId = addInsertToStream(null, applyFieldFilter(object), collection.getName());
            } else {
              logger.debug("Last entry for initial import - add timestamp: {}", startTimestamp);
              lastId =
                  addInsertToStream(startTimestamp, applyFieldFilter(object), collection.getName());
            }
          }
          inProgress = false;
          logger.info("Number documents indexed: {}", count);
        } else {
          // TODO: To be optimized.
          // https://github.com/mongodb/mongo-java-driver/pull/48#issuecomment-25241988
          // possible option: Get the object id list from .fs
          // collection
          // then call GriDFS.findOne
          GridFS grid =
              new GridFS(mongo.getDB(definition.getMongoDb()), definition.getMongoCollection());

          cursor = grid.getFileList();
          while (cursor.hasNext()) {
            DBObject object = cursor.next();
            if (object instanceof GridFSDBFile) {
              GridFSDBFile file =
                  grid.findOne(new ObjectId(object.get(MongoDBRiver.MONGODB_ID_FIELD).toString()));
              if (cursor.hasNext()) {
                lastId = addInsertToStream(null, file);
              } else {
                logger.debug("Last entry for initial import - add timestamp: {}", startTimestamp);
                lastId = addInsertToStream(startTimestamp, file);
              }
            }
          }
          inProgress = false;
        }
      } catch (MongoException.CursorNotFound e) {
        logger.info(
            "Initial import - Cursor {} has been closed. About to open a new cusor.",
            cursor.getCursorId());
        logger.debug("Total document inserted [{}]", totalDocuments.get());
      } finally {
        if (cursor != null) {
          logger.trace("Closing initial import cursor");
          cursor.close();
        }
        if (definition.isDisableIndexRefresh()) {
          updateIndexRefresh(definition.getIndexName(), TimeValue.timeValueSeconds(1));
        }
      }
    }
    return startTimestamp;
  }