private String getCollectionFromNamespace(String namespace) { if (namespace.startsWith(definition.getMongoDb()) && CharMatcher.is('.').countIn(namespace) == 1) { return namespace.substring(definition.getMongoDb().length() + 1); } logger.info("Cannot get collection from namespace [{}]", namespace); return null; }
private DBObject applyFieldFilter(DBObject object) { if (object instanceof GridFSFile) { GridFSFile file = (GridFSFile) object; DBObject metadata = file.getMetaData(); if (metadata != null) { file.setMetaData(applyFieldFilter(metadata)); } } else { object = MongoDBHelper.applyExcludeFields(object, definition.getExcludeFields()); object = MongoDBHelper.applyIncludeFields(object, definition.getIncludeFields()); } return object; }
private void isRiverStale(DBCursor cursor, Timestamp<?> time) throws SlurperException { if (cursor == null || time == null) { return; } if (definition.getInitialTimestamp() != null && time.equals(definition.getInitialTimestamp())) { return; } DBObject entry = cursor.next(); Timestamp<?> oplogTimestamp = Timestamp.on(entry); if (!time.equals(oplogTimestamp)) { MongoDBRiverHelper.setRiverStatus(client, definition.getRiverName(), Status.RIVER_STALE); throw new SlurperException("River out of sync with oplog.rs collection"); } }
private void processAdminCommandOplogEntry( final DBObject entry, final Timestamp<?> startTimestamp) throws InterruptedException { if (logger.isTraceEnabled()) { logger.trace("processAdminCommandOplogEntry - [{}]", entry); } DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT); if (definition.isImportAllCollections()) { if (object.containsField(MongoDBRiver.OPLOG_RENAME_COLLECTION_COMMAND_OPERATION) && object.containsField(MongoDBRiver.OPLOG_TO)) { String to = object.get(MongoDBRiver.OPLOG_TO).toString(); if (to.startsWith(definition.getMongoDb())) { String newCollection = getCollectionFromNamespace(to); DBCollection coll = slurpedDb.getCollection(newCollection); doInitialImport(coll); } } } }
private void addToStream( final Operation operation, final Timestamp<?> currentTimestamp, final DBObject data, final String collection) throws InterruptedException { if (logger.isTraceEnabled()) { logger.trace( "addToStream - operation [{}], currentTimestamp [{}], data [{}], collection [{}]", operation, currentTimestamp, data, collection); } if (operation == Operation.DROP_DATABASE) { if (definition.isImportAllCollections()) { for (String name : slurpedDb.getCollectionNames()) { context .getStream() .put( new MongoDBRiver.QueueEntry( currentTimestamp, Operation.DROP_COLLECTION, data, name)); } } else { context .getStream() .put( new MongoDBRiver.QueueEntry( currentTimestamp, Operation.DROP_COLLECTION, data, collection)); } } else { context .getStream() .put(new MongoDBRiver.QueueEntry(currentTimestamp, operation, data, collection)); } }
public Slurper( List<ServerAddress> mongoServers, MongoDBRiverDefinition definition, SharedContext context, Client client) { this.definition = definition; this.context = context; this.client = client; this.mongo = new MongoClient(mongoServers, definition.getMongoClientOptions()); this.findKeys = new BasicDBObject(); this.gridfsOplogNamespace = definition.getMongoOplogNamespace() + MongoDBRiver.GRIDFS_FILES_SUFFIX; this.cmdOplogNamespace = definition.getMongoDb() + "." + MongoDBRiver.OPLOG_NAMESPACE_COMMAND; if (definition.getExcludeFields() != null) { for (String key : definition.getExcludeFields()) { findKeys.put(key, 0); } } else if (definition.getIncludeFields() != null) { for (String key : definition.getIncludeFields()) { findKeys.put(key, 1); } } }
@Override public void run() { while (context.getStatus() == Status.RUNNING) { try { if (!assignCollections()) { break; // failed to assign oplogCollection or // slurpedCollection } Timestamp<?> startTimestamp = null; if (!definition.isSkipInitialImport()) { if (!riverHasIndexedFromOplog() && definition.getInitialTimestamp() == null) { if (!isIndexEmpty()) { MongoDBRiverHelper.setRiverStatus( client, definition.getRiverName(), Status.INITIAL_IMPORT_FAILED); break; } if (definition.isImportAllCollections()) { for (String name : slurpedDb.getCollectionNames()) { DBCollection collection = slurpedDb.getCollection(name); startTimestamp = doInitialImport(collection); } } else { DBCollection collection = slurpedDb.getCollection(definition.getMongoCollection()); startTimestamp = doInitialImport(collection); } } } else { logger.info("Skip initial import from collection {}", definition.getMongoCollection()); } // Slurp from oplog DBCursor cursor = null; try { cursor = oplogCursor(startTimestamp); if (cursor == null) { cursor = processFullOplog(); } while (cursor.hasNext()) { DBObject item = cursor.next(); startTimestamp = processOplogEntry(item, startTimestamp); } logger.debug("Before waiting for 500 ms"); Thread.sleep(500); } catch (MongoException.CursorNotFound e) { logger.info( "Cursor {} has been closed. About to open a new cusor.", cursor.getCursorId()); logger.debug("Total document inserted [{}]", totalDocuments.get()); } catch (SlurperException sEx) { logger.warn("Exception in slurper", sEx); break; } catch (Exception ex) { logger.warn("Exception while looping in cursor", ex); Thread.currentThread().interrupt(); break; } finally { if (cursor != null) { logger.trace("Closing oplog cursor"); cursor.close(); } } } catch (MongoInterruptedException mIEx) { logger.warn("Mongo driver has been interrupted", mIEx); if (mongo != null) { mongo.close(); mongo = null; } Thread.currentThread().interrupt(); break; } catch (MongoException e) { logger.error("Mongo gave an exception", e); try { Thread.sleep(5000); } catch (InterruptedException iEx) { } } catch (NoSuchElementException e) { logger.warn("A mongoDB cursor bug ?", e); } catch (InterruptedException e) { logger.info("river-mongodb slurper interrupted"); Thread.currentThread().interrupt(); break; } } }
private String addInsertToStream(final Timestamp<?> currentTimestamp, final DBObject data) throws InterruptedException { return addInsertToStream(currentTimestamp, data, definition.getMongoCollection()); }
private boolean isValidOplogEntry(final DBObject entry, final Timestamp<?> startTimestamp) { if (MongoDBRiver.OPLOG_NOOP_OPERATION.equals(entry.get(MongoDBRiver.OPLOG_OPERATION))) { logger.debug("[No-op Oplog Entry] - can be ignored. {}", entry); return false; } String namespace = (String) entry.get(MongoDBRiver.OPLOG_NAMESPACE); // Initial support for sharded collection - // https://jira.mongodb.org/browse/SERVER-4333 // Not interested in operation from migration or sharding if (entry.containsField(MongoDBRiver.OPLOG_FROM_MIGRATE) && ((BasicBSONObject) entry).getBoolean(MongoDBRiver.OPLOG_FROM_MIGRATE)) { logger.debug( "[Invalid Oplog Entry] - from migration or sharding operation. Can be ignored. {}", entry); return false; } // Not interested by chunks - skip all if (namespace.endsWith(MongoDBRiver.GRIDFS_CHUNKS_SUFFIX)) { return false; } if (startTimestamp != null) { Timestamp<?> oplogTimestamp = Timestamp.on(entry); if (Timestamp.compare(oplogTimestamp, startTimestamp) < 0) { logger.debug( "[Invalid Oplog Entry] - entry timestamp [{}] before startTimestamp [{}]", entry, startTimestamp); return false; } } boolean validNamespace = false; if (definition.isMongoGridFS()) { validNamespace = gridfsOplogNamespace.equals(namespace); } else { if (definition.isImportAllCollections()) { // Skip temp entry generated by map / reduce if (namespace.startsWith(definition.getMongoDb()) && !namespace.startsWith(definition.getMongoDb() + ".tmp.mr")) { validNamespace = true; } } else { if (definition.getMongoOplogNamespace().equals(namespace)) { validNamespace = true; } } if (cmdOplogNamespace.equals(namespace)) { validNamespace = true; } if (MongoDBRiver.OPLOG_ADMIN_COMMAND.equals(namespace)) { validNamespace = true; } } if (!validNamespace) { logger.debug("[Invalid Oplog Entry] - namespace [{}] is not valid", namespace); return false; } String operation = (String) entry.get(MongoDBRiver.OPLOG_OPERATION); if (!oplogOperations.contains(operation)) { logger.debug("[Invalid Oplog Entry] - operation [{}] is not valid", operation); return false; } // TODO: implement a better solution if (definition.getMongoOplogFilter() != null) { DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT); BasicDBObject filter = definition.getMongoOplogFilter(); if (!filterMatch(filter, object)) { logger.debug( "[Invalid Oplog Entry] - filter [{}] does not match object [{}]", filter, object); return false; } } return true; }
private Timestamp<?> processOplogEntry(final DBObject entry, final Timestamp<?> startTimestamp) throws InterruptedException { // To support transactions, TokuMX wraps one or more operations in a single oplog entry, in a // list. // As long as clients are not transaction-aware, we can pretty safely assume there will only be // one operation in the list. // Supporting genuine multi-operation transactions will require a bit more logic here. flattenOps(entry); if (!isValidOplogEntry(entry, startTimestamp)) { return startTimestamp; } Operation operation = Operation.fromString(entry.get(MongoDBRiver.OPLOG_OPERATION).toString()); String namespace = entry.get(MongoDBRiver.OPLOG_NAMESPACE).toString(); String collection = null; Timestamp<?> oplogTimestamp = Timestamp.on(entry); DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT); if (definition.isImportAllCollections()) { if (namespace.startsWith(definition.getMongoDb()) && !namespace.equals(cmdOplogNamespace)) { collection = getCollectionFromNamespace(namespace); } } else { collection = definition.getMongoCollection(); } if (namespace.equals(cmdOplogNamespace)) { if (object.containsField(MongoDBRiver.OPLOG_DROP_COMMAND_OPERATION)) { operation = Operation.DROP_COLLECTION; if (definition.isImportAllCollections()) { collection = object.get(MongoDBRiver.OPLOG_DROP_COMMAND_OPERATION).toString(); if (collection.startsWith("tmp.mr.")) { return startTimestamp; } } } if (object.containsField(MongoDBRiver.OPLOG_DROP_DATABASE_COMMAND_OPERATION)) { operation = Operation.DROP_DATABASE; } } logger.trace("namespace: {} - operation: {}", namespace, operation); if (namespace.equals(MongoDBRiver.OPLOG_ADMIN_COMMAND)) { if (operation == Operation.COMMAND) { processAdminCommandOplogEntry(entry, startTimestamp); return startTimestamp; } } if (logger.isTraceEnabled()) { logger.trace("MongoDB object deserialized: {}", object.toString()); logger.trace("collection: {}", collection); logger.trace("oplog entry - namespace [{}], operation [{}]", namespace, operation); logger.trace("oplog processing item {}", entry); } String objectId = getObjectIdFromOplogEntry(entry); if (operation == Operation.DELETE) { // Include only _id in data, as vanilla MongoDB does, so transformation scripts won't be // broken by Toku if (object.containsField(MongoDBRiver.MONGODB_ID_FIELD)) { if (object.keySet().size() > 1) { entry.put( MongoDBRiver.OPLOG_OBJECT, object = new BasicDBObject(MongoDBRiver.MONGODB_ID_FIELD, objectId)); } } else { throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD); } } if (definition.isMongoGridFS() && namespace.endsWith(MongoDBRiver.GRIDFS_FILES_SUFFIX) && (operation == Operation.INSERT || operation == Operation.UPDATE)) { if (objectId == null) { throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD); } GridFS grid = new GridFS(mongo.getDB(definition.getMongoDb()), collection); GridFSDBFile file = grid.findOne(new ObjectId(objectId)); if (file != null) { logger.info("Caught file: {} - {}", file.getId(), file.getFilename()); object = file; } else { logger.warn("Cannot find file from id: {}", objectId); } } if (object instanceof GridFSDBFile) { if (objectId == null) { throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD); } if (logger.isTraceEnabled()) { logger.trace("Add attachment: {}", objectId); } addToStream(operation, oplogTimestamp, applyFieldFilter(object), collection); } else { if (operation == Operation.UPDATE) { DBObject update = (DBObject) entry.get(MongoDBRiver.OPLOG_UPDATE); logger.debug("Updated item: {}", update); addQueryToStream(operation, oplogTimestamp, update, collection); } else { if (operation == Operation.INSERT) { addInsertToStream(oplogTimestamp, applyFieldFilter(object), collection); } else { addToStream(operation, oplogTimestamp, applyFieldFilter(object), collection); } } } return oplogTimestamp; }
protected boolean assignCollections() { DB adminDb = mongo.getDB(MongoDBRiver.MONGODB_ADMIN_DATABASE); oplogDb = mongo.getDB(MongoDBRiver.MONGODB_LOCAL_DATABASE); if (!definition.getMongoAdminUser().isEmpty() && !definition.getMongoAdminPassword().isEmpty()) { logger.info( "Authenticate {} with {}", MongoDBRiver.MONGODB_ADMIN_DATABASE, definition.getMongoAdminUser()); CommandResult cmd = adminDb.authenticateCommand( definition.getMongoAdminUser(), definition.getMongoAdminPassword().toCharArray()); if (!cmd.ok()) { logger.error( "Autenticatication failed for {}: {}", MongoDBRiver.MONGODB_ADMIN_DATABASE, cmd.getErrorMessage()); // Can still try with mongoLocal credential if provided. // return false; } oplogDb = adminDb.getMongo().getDB(MongoDBRiver.MONGODB_LOCAL_DATABASE); } if (!definition.getMongoLocalUser().isEmpty() && !definition.getMongoLocalPassword().isEmpty() && !oplogDb.isAuthenticated()) { logger.info( "Authenticate {} with {}", MongoDBRiver.MONGODB_LOCAL_DATABASE, definition.getMongoLocalUser()); CommandResult cmd = oplogDb.authenticateCommand( definition.getMongoLocalUser(), definition.getMongoLocalPassword().toCharArray()); if (!cmd.ok()) { logger.error( "Autenticatication failed for {}: {}", MongoDBRiver.MONGODB_LOCAL_DATABASE, cmd.getErrorMessage()); return false; } } Set<String> collections = oplogDb.getCollectionNames(); if (!collections.contains(MongoDBRiver.OPLOG_COLLECTION)) { logger.error( "Cannot find " + MongoDBRiver.OPLOG_COLLECTION + " collection. Please check this link: http://goo.gl/2x5IW"); return false; } oplogCollection = oplogDb.getCollection(MongoDBRiver.OPLOG_COLLECTION); slurpedDb = mongo.getDB(definition.getMongoDb()); if (!definition.getMongoAdminUser().isEmpty() && !definition.getMongoAdminPassword().isEmpty() && adminDb.isAuthenticated()) { slurpedDb = adminDb.getMongo().getDB(definition.getMongoDb()); } // Not necessary as local user has access to all databases. // http://docs.mongodb.org/manual/reference/local-database/ // if (!mongoDbUser.isEmpty() && !mongoDbPassword.isEmpty() // && !slurpedDb.isAuthenticated()) { // logger.info("Authenticate {} with {}", mongoDb, mongoDbUser); // CommandResult cmd = slurpedDb.authenticateCommand(mongoDbUser, // mongoDbPassword.toCharArray()); // if (!cmd.ok()) { // logger.error("Authentication failed for {}: {}", // mongoDb, cmd.getErrorMessage()); // return false; // } // } // slurpedCollection = // slurpedDb.getCollection(definition.getMongoCollection()); // if (definition.isImportAllCollections()) { // for (String collection : slurpedDb.getCollectionNames()) { // slurpedCollections.put(collection, // slurpedDb.getCollection(collection)); // } // } else { // slurpedCollections.put(definition.getMongoCollection(), // slurpedDb.getCollection(definition.getMongoCollection())); // } return true; }
/** * Does an initial sync the same way MongoDB does. https://groups.google.com/ * forum/?fromgroups=#!topic/mongodb-user/sOKlhD_E2ns * * @return the last oplog timestamp before the import began * @throws InterruptedException if the blocking queue stream is interrupted while waiting */ protected Timestamp<?> doInitialImport(DBCollection collection) throws InterruptedException { // TODO: ensure the index type is empty // DBCollection slurpedCollection = // slurpedDb.getCollection(definition.getMongoCollection()); logger.info("MongoDBRiver is beginning initial import of " + collection.getFullName()); Timestamp<?> startTimestamp = getCurrentOplogTimestamp(); boolean inProgress = true; String lastId = null; while (inProgress) { DBCursor cursor = null; try { if (definition.isDisableIndexRefresh()) { updateIndexRefresh(definition.getIndexName(), -1L); } if (!definition.isMongoGridFS()) { logger.info("Collection {} - count: {}", collection.getName(), collection.count()); long count = 0; cursor = collection.find( getFilterForInitialImport(definition.getMongoCollectionFilter(), lastId)); while (cursor.hasNext()) { DBObject object = cursor.next(); count++; if (cursor.hasNext()) { lastId = addInsertToStream(null, applyFieldFilter(object), collection.getName()); } else { logger.debug("Last entry for initial import - add timestamp: {}", startTimestamp); lastId = addInsertToStream(startTimestamp, applyFieldFilter(object), collection.getName()); } } inProgress = false; logger.info("Number documents indexed: {}", count); } else { // TODO: To be optimized. // https://github.com/mongodb/mongo-java-driver/pull/48#issuecomment-25241988 // possible option: Get the object id list from .fs // collection // then call GriDFS.findOne GridFS grid = new GridFS(mongo.getDB(definition.getMongoDb()), definition.getMongoCollection()); cursor = grid.getFileList(); while (cursor.hasNext()) { DBObject object = cursor.next(); if (object instanceof GridFSDBFile) { GridFSDBFile file = grid.findOne(new ObjectId(object.get(MongoDBRiver.MONGODB_ID_FIELD).toString())); if (cursor.hasNext()) { lastId = addInsertToStream(null, file); } else { logger.debug("Last entry for initial import - add timestamp: {}", startTimestamp); lastId = addInsertToStream(startTimestamp, file); } } } inProgress = false; } } catch (MongoException.CursorNotFound e) { logger.info( "Initial import - Cursor {} has been closed. About to open a new cusor.", cursor.getCursorId()); logger.debug("Total document inserted [{}]", totalDocuments.get()); } finally { if (cursor != null) { logger.trace("Closing initial import cursor"); cursor.close(); } if (definition.isDisableIndexRefresh()) { updateIndexRefresh(definition.getIndexName(), TimeValue.timeValueSeconds(1)); } } } return startTimestamp; }