@Override public void run() { while (context.getStatus() == Status.RUNNING) { try { if (!assignCollections()) { break; // failed to assign oplogCollection or // slurpedCollection } Timestamp<?> startTimestamp = null; if (!definition.isSkipInitialImport()) { if (!riverHasIndexedFromOplog() && definition.getInitialTimestamp() == null) { if (!isIndexEmpty()) { MongoDBRiverHelper.setRiverStatus( client, definition.getRiverName(), Status.INITIAL_IMPORT_FAILED); break; } if (definition.isImportAllCollections()) { for (String name : slurpedDb.getCollectionNames()) { DBCollection collection = slurpedDb.getCollection(name); startTimestamp = doInitialImport(collection); } } else { DBCollection collection = slurpedDb.getCollection(definition.getMongoCollection()); startTimestamp = doInitialImport(collection); } } } else { logger.info("Skip initial import from collection {}", definition.getMongoCollection()); } // Slurp from oplog DBCursor cursor = null; try { cursor = oplogCursor(startTimestamp); if (cursor == null) { cursor = processFullOplog(); } while (cursor.hasNext()) { DBObject item = cursor.next(); startTimestamp = processOplogEntry(item, startTimestamp); } logger.debug("Before waiting for 500 ms"); Thread.sleep(500); } catch (MongoException.CursorNotFound e) { logger.info( "Cursor {} has been closed. About to open a new cusor.", cursor.getCursorId()); logger.debug("Total document inserted [{}]", totalDocuments.get()); } catch (SlurperException sEx) { logger.warn("Exception in slurper", sEx); break; } catch (Exception ex) { logger.warn("Exception while looping in cursor", ex); Thread.currentThread().interrupt(); break; } finally { if (cursor != null) { logger.trace("Closing oplog cursor"); cursor.close(); } } } catch (MongoInterruptedException mIEx) { logger.warn("Mongo driver has been interrupted", mIEx); if (mongo != null) { mongo.close(); mongo = null; } Thread.currentThread().interrupt(); break; } catch (MongoException e) { logger.error("Mongo gave an exception", e); try { Thread.sleep(5000); } catch (InterruptedException iEx) { } } catch (NoSuchElementException e) { logger.warn("A mongoDB cursor bug ?", e); } catch (InterruptedException e) { logger.info("river-mongodb slurper interrupted"); Thread.currentThread().interrupt(); break; } } }
private Timestamp<?> processOplogEntry(final DBObject entry, final Timestamp<?> startTimestamp) throws InterruptedException { // To support transactions, TokuMX wraps one or more operations in a single oplog entry, in a // list. // As long as clients are not transaction-aware, we can pretty safely assume there will only be // one operation in the list. // Supporting genuine multi-operation transactions will require a bit more logic here. flattenOps(entry); if (!isValidOplogEntry(entry, startTimestamp)) { return startTimestamp; } Operation operation = Operation.fromString(entry.get(MongoDBRiver.OPLOG_OPERATION).toString()); String namespace = entry.get(MongoDBRiver.OPLOG_NAMESPACE).toString(); String collection = null; Timestamp<?> oplogTimestamp = Timestamp.on(entry); DBObject object = (DBObject) entry.get(MongoDBRiver.OPLOG_OBJECT); if (definition.isImportAllCollections()) { if (namespace.startsWith(definition.getMongoDb()) && !namespace.equals(cmdOplogNamespace)) { collection = getCollectionFromNamespace(namespace); } } else { collection = definition.getMongoCollection(); } if (namespace.equals(cmdOplogNamespace)) { if (object.containsField(MongoDBRiver.OPLOG_DROP_COMMAND_OPERATION)) { operation = Operation.DROP_COLLECTION; if (definition.isImportAllCollections()) { collection = object.get(MongoDBRiver.OPLOG_DROP_COMMAND_OPERATION).toString(); if (collection.startsWith("tmp.mr.")) { return startTimestamp; } } } if (object.containsField(MongoDBRiver.OPLOG_DROP_DATABASE_COMMAND_OPERATION)) { operation = Operation.DROP_DATABASE; } } logger.trace("namespace: {} - operation: {}", namespace, operation); if (namespace.equals(MongoDBRiver.OPLOG_ADMIN_COMMAND)) { if (operation == Operation.COMMAND) { processAdminCommandOplogEntry(entry, startTimestamp); return startTimestamp; } } if (logger.isTraceEnabled()) { logger.trace("MongoDB object deserialized: {}", object.toString()); logger.trace("collection: {}", collection); logger.trace("oplog entry - namespace [{}], operation [{}]", namespace, operation); logger.trace("oplog processing item {}", entry); } String objectId = getObjectIdFromOplogEntry(entry); if (operation == Operation.DELETE) { // Include only _id in data, as vanilla MongoDB does, so transformation scripts won't be // broken by Toku if (object.containsField(MongoDBRiver.MONGODB_ID_FIELD)) { if (object.keySet().size() > 1) { entry.put( MongoDBRiver.OPLOG_OBJECT, object = new BasicDBObject(MongoDBRiver.MONGODB_ID_FIELD, objectId)); } } else { throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD); } } if (definition.isMongoGridFS() && namespace.endsWith(MongoDBRiver.GRIDFS_FILES_SUFFIX) && (operation == Operation.INSERT || operation == Operation.UPDATE)) { if (objectId == null) { throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD); } GridFS grid = new GridFS(mongo.getDB(definition.getMongoDb()), collection); GridFSDBFile file = grid.findOne(new ObjectId(objectId)); if (file != null) { logger.info("Caught file: {} - {}", file.getId(), file.getFilename()); object = file; } else { logger.warn("Cannot find file from id: {}", objectId); } } if (object instanceof GridFSDBFile) { if (objectId == null) { throw new NullPointerException(MongoDBRiver.MONGODB_ID_FIELD); } if (logger.isTraceEnabled()) { logger.trace("Add attachment: {}", objectId); } addToStream(operation, oplogTimestamp, applyFieldFilter(object), collection); } else { if (operation == Operation.UPDATE) { DBObject update = (DBObject) entry.get(MongoDBRiver.OPLOG_UPDATE); logger.debug("Updated item: {}", update); addQueryToStream(operation, oplogTimestamp, update, collection); } else { if (operation == Operation.INSERT) { addInsertToStream(oplogTimestamp, applyFieldFilter(object), collection); } else { addToStream(operation, oplogTimestamp, applyFieldFilter(object), collection); } } } return oplogTimestamp; }
private String addInsertToStream(final Timestamp<?> currentTimestamp, final DBObject data) throws InterruptedException { return addInsertToStream(currentTimestamp, data, definition.getMongoCollection()); }
/** * Does an initial sync the same way MongoDB does. https://groups.google.com/ * forum/?fromgroups=#!topic/mongodb-user/sOKlhD_E2ns * * @return the last oplog timestamp before the import began * @throws InterruptedException if the blocking queue stream is interrupted while waiting */ protected Timestamp<?> doInitialImport(DBCollection collection) throws InterruptedException { // TODO: ensure the index type is empty // DBCollection slurpedCollection = // slurpedDb.getCollection(definition.getMongoCollection()); logger.info("MongoDBRiver is beginning initial import of " + collection.getFullName()); Timestamp<?> startTimestamp = getCurrentOplogTimestamp(); boolean inProgress = true; String lastId = null; while (inProgress) { DBCursor cursor = null; try { if (definition.isDisableIndexRefresh()) { updateIndexRefresh(definition.getIndexName(), -1L); } if (!definition.isMongoGridFS()) { logger.info("Collection {} - count: {}", collection.getName(), collection.count()); long count = 0; cursor = collection.find( getFilterForInitialImport(definition.getMongoCollectionFilter(), lastId)); while (cursor.hasNext()) { DBObject object = cursor.next(); count++; if (cursor.hasNext()) { lastId = addInsertToStream(null, applyFieldFilter(object), collection.getName()); } else { logger.debug("Last entry for initial import - add timestamp: {}", startTimestamp); lastId = addInsertToStream(startTimestamp, applyFieldFilter(object), collection.getName()); } } inProgress = false; logger.info("Number documents indexed: {}", count); } else { // TODO: To be optimized. // https://github.com/mongodb/mongo-java-driver/pull/48#issuecomment-25241988 // possible option: Get the object id list from .fs // collection // then call GriDFS.findOne GridFS grid = new GridFS(mongo.getDB(definition.getMongoDb()), definition.getMongoCollection()); cursor = grid.getFileList(); while (cursor.hasNext()) { DBObject object = cursor.next(); if (object instanceof GridFSDBFile) { GridFSDBFile file = grid.findOne(new ObjectId(object.get(MongoDBRiver.MONGODB_ID_FIELD).toString())); if (cursor.hasNext()) { lastId = addInsertToStream(null, file); } else { logger.debug("Last entry for initial import - add timestamp: {}", startTimestamp); lastId = addInsertToStream(startTimestamp, file); } } } inProgress = false; } } catch (MongoException.CursorNotFound e) { logger.info( "Initial import - Cursor {} has been closed. About to open a new cusor.", cursor.getCursorId()); logger.debug("Total document inserted [{}]", totalDocuments.get()); } finally { if (cursor != null) { logger.trace("Closing initial import cursor"); cursor.close(); } if (definition.isDisableIndexRefresh()) { updateIndexRefresh(definition.getIndexName(), TimeValue.timeValueSeconds(1)); } } } return startTimestamp; }