private void deleteAll(Set<Map<String, Object>> deletedKeys) { LOG.info("Deleting stale documents "); Iterator<Map<String, Object>> iter = deletedKeys.iterator(); while (iter.hasNext()) { Map<String, Object> map = iter.next(); String keyName = root.isDocRoot ? root.getPk() : root.getSchemaPk(); Object key = map.get(keyName); if (key == null) { LOG.warn("no key was available for deleteted pk query. keyName = " + keyName); continue; } writer.deleteDoc(key); importStatistics.deletedDocCount.incrementAndGet(); iter.remove(); } }
/** * Collects unique keys of all Solr documents for whom one or more source tables have been changed * since the last indexed time. * * <p>Note: In our definition, unique key of Solr document is the primary key of the top level * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml * * @return an iterator to the list of keys for which Solr documents should be updated. */ @SuppressWarnings("unchecked") public Set<Map<String, Object>> collectDelta( DataConfig.Entity entity, VariableResolverImpl resolver, Set<Map<String, Object>> deletedRows) { // someone called abort if (stop.get()) return new HashSet(); EntityProcessor entityProcessor = getEntityProcessor(entity); ContextImpl context1 = new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this); entityProcessor.init(context1); Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>(); if (entity.entities != null) { for (DataConfig.Entity entity1 : entity.entities) { // this ensures that we start from the leaf nodes myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows)); // someone called abort if (stop.get()) return new HashSet(); } } // identifying the modified rows for this entity Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>(); LOG.info("Running ModifiedRowKey() for Entity: " + entity.name); // get the modified rows in this entity while (true) { Map<String, Object> row = entityProcessor.nextModifiedRowKey(); if (row == null) break; deltaSet.put(row.get(entity.getPk()).toString(), row); importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return new HashSet(); } // get the deleted rows for this entity Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>(); while (true) { Map<String, Object> row = entityProcessor.nextDeletedRowKey(); if (row == null) break; deletedSet.add(row); // Remove deleted rows from the delta rows String deletedRowPk = row.get(entity.getPk()).toString(); if (deltaSet.containsKey(deletedRowPk)) { deltaSet.remove(deletedRowPk); } importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return new HashSet(); } LOG.info( "Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size()); LOG.info( "Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size()); myModifiedPks.addAll(deltaSet.values()); Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>(); // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified // because of these // propogate up the changes in the chain if (entity.parentEntity != null) { // identifying deleted rows with deltas for (Map<String, Object> row : myModifiedPks) { getModifiedParentRows( resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); // check for abort if (stop.get()) return new HashSet(); } // running the same for deletedrows for (Map<String, Object> row : deletedSet) { getModifiedParentRows( resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); // check for abort if (stop.get()) return new HashSet(); } } LOG.info("Completed parentDeltaQuery for Entity: " + entity.name); if (entity.isDocRoot) deletedRows.addAll(deletedSet); // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true" return entity.parentEntity == null ? myModifiedPks : new HashSet<Map<String, Object>>(parentKeyList); }