Example #1
0
 private void deleteAll(Set<Map<String, Object>> deletedKeys) {
   LOG.info("Deleting stale documents ");
   Iterator<Map<String, Object>> iter = deletedKeys.iterator();
   while (iter.hasNext()) {
     Map<String, Object> map = iter.next();
     String keyName = root.isDocRoot ? root.getPk() : root.getSchemaPk();
     Object key = map.get(keyName);
     if (key == null) {
       LOG.warn("no key was available for deleteted pk query. keyName = " + keyName);
       continue;
     }
     writer.deleteDoc(key);
     importStatistics.deletedDocCount.incrementAndGet();
     iter.remove();
   }
 }
Example #2
0
 private EntityProcessorWrapper getEntityProcessor(DataConfig.Entity entity) {
   if (entity.processor != null) return entity.processor;
   EntityProcessor entityProcessor = null;
   if (entity.proc == null) {
     entityProcessor = new SqlEntityProcessor();
   } else {
     try {
       entityProcessor =
           (EntityProcessor) loadClass(entity.proc, dataImporter.getCore()).newInstance();
     } catch (Exception e) {
       wrapAndThrow(
           SEVERE, e, "Unable to load EntityProcessor implementation for entity:" + entity.name);
     }
   }
   return entity.processor = new EntityProcessorWrapper(entityProcessor, this);
 }
Example #3
0
  /**
   * Collects unique keys of all Solr documents for whom one or more source tables have been changed
   * since the last indexed time.
   *
   * <p>Note: In our definition, unique key of Solr document is the primary key of the top level
   * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml
   *
   * @return an iterator to the list of keys for which Solr documents should be updated.
   */
  @SuppressWarnings("unchecked")
  public Set<Map<String, Object>> collectDelta(
      DataConfig.Entity entity,
      VariableResolverImpl resolver,
      Set<Map<String, Object>> deletedRows) {
    // someone called abort
    if (stop.get()) return new HashSet();

    EntityProcessor entityProcessor = getEntityProcessor(entity);
    ContextImpl context1 =
        new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this);
    entityProcessor.init(context1);

    Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>();

    if (entity.entities != null) {

      for (DataConfig.Entity entity1 : entity.entities) {
        // this ensures that we start from the leaf nodes
        myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows));
        // someone called abort
        if (stop.get()) return new HashSet();
      }
    }
    // identifying the modified rows for this entity

    Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
    LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
    // get the modified rows in this entity
    while (true) {
      Map<String, Object> row = entityProcessor.nextModifiedRowKey();

      if (row == null) break;

      deltaSet.put(row.get(entity.getPk()).toString(), row);
      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }
    // get the deleted rows for this entity
    Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
    while (true) {
      Map<String, Object> row = entityProcessor.nextDeletedRowKey();
      if (row == null) break;

      deletedSet.add(row);

      // Remove deleted rows from the delta rows
      String deletedRowPk = row.get(entity.getPk()).toString();
      if (deltaSet.containsKey(deletedRowPk)) {
        deltaSet.remove(deletedRowPk);
      }

      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }

    LOG.info(
        "Completed ModifiedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deltaSet.size());
    LOG.info(
        "Completed DeletedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deletedSet.size());

    myModifiedPks.addAll(deltaSet.values());
    Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
    // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified
    // because of these
    // propogate up the changes in the chain
    if (entity.parentEntity != null) {
      // identifying deleted rows with deltas

      for (Map<String, Object> row : myModifiedPks) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
      // running the same for deletedrows
      for (Map<String, Object> row : deletedSet) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
    }
    LOG.info("Completed parentDeltaQuery for Entity: " + entity.name);
    if (entity.isDocRoot) deletedRows.addAll(deletedSet);

    // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true"
    return entity.parentEntity == null
        ? myModifiedPks
        : new HashSet<Map<String, Object>>(parentKeyList);
  }