@SuppressWarnings("unchecked") private void doDelta() { addStatusMessage("Delta Dump started"); VariableResolverImpl resolver = getVariableResolver(); if (document.deleteQuery != null) { writer.deleteByQuery(document.deleteQuery); } addStatusMessage("Identifying Delta"); LOG.info("Starting delta collection."); Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>(); Set<Map<String, Object>> allPks = collectDelta(root, resolver, deletedKeys); if (stop.get()) return; addStatusMessage("Deltas Obtained"); addStatusMessage("Building documents"); if (!deletedKeys.isEmpty()) { allPks.removeAll(deletedKeys); deleteAll(deletedKeys); // Make sure that documents are not re-created } deletedKeys = null; statusMessages.put("Total Changed Documents", allPks.size()); VariableResolverImpl vri = getVariableResolver(); Iterator<Map<String, Object>> pkIter = allPks.iterator(); while (pkIter.hasNext()) { Map<String, Object> map = pkIter.next(); vri.addNamespace(DataConfig.IMPORTER_NS_SHORT + ".delta", map); buildDocument(vri, null, map, root, true, null); pkIter.remove(); // check for abort if (stop.get()) break; } if (!stop.get()) { LOG.info("Delta Import completed successfully"); } }
public VariableResolverImpl getVariableResolver() { try { VariableResolverImpl resolver = null; if (dataImporter != null && dataImporter.getCore() != null) { resolver = new VariableResolverImpl( dataImporter.getCore().getResourceLoader().getCoreProperties()); } else resolver = new VariableResolverImpl(); Map<String, Object> indexerNamespace = new HashMap<String, Object>(); if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) { indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME)); } else { // set epoch indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH)); } indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime()); indexerNamespace.put("request", requestParameters.requestParams); indexerNamespace.put("functions", functionsNamespace); for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) { String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY; String lastIndex = persistedProperties.getProperty(key); if (lastIndex != null) { indexerNamespace.put(key, lastIndex); } else { indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH)); } } resolver.addNamespace(DataConfig.IMPORTER_NS_SHORT, indexerNamespace); resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace); return resolver; } catch (Exception e) { wrapAndThrow(SEVERE, e); // unreachable statement return null; } }
private void getModifiedParentRows( VariableResolverImpl resolver, String entity, EntityProcessor entityProcessor, Set<Map<String, Object>> parentKeyList) { try { while (true) { Map<String, Object> parentRow = entityProcessor.nextModifiedParentRowKey(); if (parentRow == null) break; parentKeyList.add(parentRow); importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return; } } finally { resolver.removeNamespace(entity); } }
/** * Collects unique keys of all Solr documents for whom one or more source tables have been changed * since the last indexed time. * * <p>Note: In our definition, unique key of Solr document is the primary key of the top level * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml * * @return an iterator to the list of keys for which Solr documents should be updated. */ @SuppressWarnings("unchecked") public Set<Map<String, Object>> collectDelta( DataConfig.Entity entity, VariableResolverImpl resolver, Set<Map<String, Object>> deletedRows) { // someone called abort if (stop.get()) return new HashSet(); EntityProcessor entityProcessor = getEntityProcessor(entity); ContextImpl context1 = new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this); entityProcessor.init(context1); Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>(); if (entity.entities != null) { for (DataConfig.Entity entity1 : entity.entities) { // this ensures that we start from the leaf nodes myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows)); // someone called abort if (stop.get()) return new HashSet(); } } // identifying the modified rows for this entity Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>(); LOG.info("Running ModifiedRowKey() for Entity: " + entity.name); // get the modified rows in this entity while (true) { Map<String, Object> row = entityProcessor.nextModifiedRowKey(); if (row == null) break; deltaSet.put(row.get(entity.getPk()).toString(), row); importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return new HashSet(); } // get the deleted rows for this entity Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>(); while (true) { Map<String, Object> row = entityProcessor.nextDeletedRowKey(); if (row == null) break; deletedSet.add(row); // Remove deleted rows from the delta rows String deletedRowPk = row.get(entity.getPk()).toString(); if (deltaSet.containsKey(deletedRowPk)) { deltaSet.remove(deletedRowPk); } importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return new HashSet(); } LOG.info( "Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size()); LOG.info( "Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size()); myModifiedPks.addAll(deltaSet.values()); Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>(); // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified // because of these // propogate up the changes in the chain if (entity.parentEntity != null) { // identifying deleted rows with deltas for (Map<String, Object> row : myModifiedPks) { getModifiedParentRows( resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); // check for abort if (stop.get()) return new HashSet(); } // running the same for deletedrows for (Map<String, Object> row : deletedSet) { getModifiedParentRows( resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); // check for abort if (stop.get()) return new HashSet(); } } LOG.info("Completed parentDeltaQuery for Entity: " + entity.name); if (entity.isDocRoot) deletedRows.addAll(deletedSet); // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true" return entity.parentEntity == null ? myModifiedPks : new HashSet<Map<String, Object>>(parentKeyList); }
@SuppressWarnings("unchecked") private void buildDocument( VariableResolverImpl vr, DocWrapper doc, Map<String, Object> pk, DataConfig.Entity entity, boolean isRoot, ContextImpl parentCtx) { EntityProcessorWrapper entityProcessor = getEntityProcessor(entity); ContextImpl ctx = new ContextImpl( entity, vr, null, pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP, session, parentCtx, this); entityProcessor.init(ctx); Context.CURRENT_CONTEXT.set(ctx); if (requestParameters.start > 0) { writer.log(SolrWriter.DISABLE_LOGGING, null, null); } if (verboseDebug) { writer.log(SolrWriter.START_ENTITY, entity.name, null); } int seenDocCount = 0; try { while (true) { if (stop.get()) return; if (importStatistics.docCount.get() > (requestParameters.start + requestParameters.rows)) break; try { seenDocCount++; if (seenDocCount > requestParameters.start) { writer.log(SolrWriter.ENABLE_LOGGING, null, null); } if (verboseDebug && entity.isDocRoot) { writer.log(SolrWriter.START_DOC, entity.name, null); } if (doc == null && entity.isDocRoot) { doc = new DocWrapper(); ctx.setDoc(doc); DataConfig.Entity e = entity; while (e.parentEntity != null) { addFields( e.parentEntity, doc, (Map<String, Object>) vr.resolve(e.parentEntity.name), vr); e = e.parentEntity; } } Map<String, Object> arow = entityProcessor.nextRow(); if (arow == null) { break; } // Support for start parameter in debug mode if (entity.isDocRoot) { if (seenDocCount <= requestParameters.start) continue; if (seenDocCount > requestParameters.start + requestParameters.rows) { LOG.info("Indexing stopped at docCount = " + importStatistics.docCount); break; } } if (verboseDebug) { writer.log(SolrWriter.ENTITY_OUT, entity.name, arow); } importStatistics.rowsCount.incrementAndGet(); if (doc != null) { handleSpecialCommands(arow, doc); addFields(entity, doc, arow, vr); } if (entity.entities != null) { vr.addNamespace(entity.name, arow); for (DataConfig.Entity child : entity.entities) { buildDocument(vr, doc, child.isDocRoot ? pk : null, child, false, ctx); } vr.removeNamespace(entity.name); } /*The child entities would have changed the CURRENT_CONTEXT. So when they are done, set it back to the old. * */ Context.CURRENT_CONTEXT.set(ctx); if (entity.isDocRoot) { if (stop.get()) return; if (!doc.isEmpty()) { boolean result = writer.upload(doc); doc = null; if (result) { importStatistics.docCount.incrementAndGet(); } else { importStatistics.failedDocCount.incrementAndGet(); } } } } catch (DataImportHandlerException e) { if (verboseDebug) { writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e); } if (e.getErrCode() == DataImportHandlerException.SKIP_ROW) { continue; } if (isRoot) { if (e.getErrCode() == DataImportHandlerException.SKIP) { importStatistics.skipDocCount.getAndIncrement(); doc = null; } else { LOG.error("Exception while processing: " + entity.name + " document : " + doc, e); } if (e.getErrCode() == DataImportHandlerException.SEVERE) throw e; } else throw e; } catch (Throwable t) { if (verboseDebug) { writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, t); } throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t); } finally { if (verboseDebug) { writer.log(SolrWriter.ROW_END, entity.name, null); if (entity.isDocRoot) writer.log(SolrWriter.END_DOC, null, null); Context.CURRENT_CONTEXT.remove(); } } } } finally { if (verboseDebug) { writer.log(SolrWriter.END_ENTITY, null, null); } entityProcessor.destroy(); } }