示例#1
0
 public VariableResolverImpl getVariableResolver() {
   try {
     VariableResolverImpl resolver = null;
     if (dataImporter != null && dataImporter.getCore() != null) {
       resolver =
           new VariableResolverImpl(
               dataImporter.getCore().getResourceLoader().getCoreProperties());
     } else resolver = new VariableResolverImpl();
     Map<String, Object> indexerNamespace = new HashMap<String, Object>();
     if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) {
       indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME));
     } else {
       // set epoch
       indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
     }
     indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
     indexerNamespace.put("request", requestParameters.requestParams);
     indexerNamespace.put("functions", functionsNamespace);
     for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) {
       String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY;
       String lastIndex = persistedProperties.getProperty(key);
       if (lastIndex != null) {
         indexerNamespace.put(key, lastIndex);
       } else {
         indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
       }
     }
     resolver.addNamespace(DataConfig.IMPORTER_NS_SHORT, indexerNamespace);
     resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace);
     return resolver;
   } catch (Exception e) {
     wrapAndThrow(SEVERE, e);
     // unreachable statement
     return null;
   }
 }
示例#2
0
  @SuppressWarnings("unchecked")
  private void doDelta() {
    addStatusMessage("Delta Dump started");
    VariableResolverImpl resolver = getVariableResolver();

    if (document.deleteQuery != null) {
      writer.deleteByQuery(document.deleteQuery);
    }

    addStatusMessage("Identifying Delta");
    LOG.info("Starting delta collection.");
    Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>();
    Set<Map<String, Object>> allPks = collectDelta(root, resolver, deletedKeys);
    if (stop.get()) return;
    addStatusMessage("Deltas Obtained");
    addStatusMessage("Building documents");
    if (!deletedKeys.isEmpty()) {
      allPks.removeAll(deletedKeys);
      deleteAll(deletedKeys);
      // Make sure that documents are not re-created
    }
    deletedKeys = null;

    statusMessages.put("Total Changed Documents", allPks.size());
    VariableResolverImpl vri = getVariableResolver();
    Iterator<Map<String, Object>> pkIter = allPks.iterator();
    while (pkIter.hasNext()) {
      Map<String, Object> map = pkIter.next();
      vri.addNamespace(DataConfig.IMPORTER_NS_SHORT + ".delta", map);
      buildDocument(vri, null, map, root, true, null);
      pkIter.remove();
      // check for abort
      if (stop.get()) break;
    }

    if (!stop.get()) {
      LOG.info("Delta Import completed successfully");
    }
  }
示例#3
0
  /**
   * Collects unique keys of all Solr documents for whom one or more source tables have been changed
   * since the last indexed time.
   *
   * <p>Note: In our definition, unique key of Solr document is the primary key of the top level
   * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml
   *
   * @return an iterator to the list of keys for which Solr documents should be updated.
   */
  @SuppressWarnings("unchecked")
  public Set<Map<String, Object>> collectDelta(
      DataConfig.Entity entity,
      VariableResolverImpl resolver,
      Set<Map<String, Object>> deletedRows) {
    // someone called abort
    if (stop.get()) return new HashSet();

    EntityProcessor entityProcessor = getEntityProcessor(entity);
    ContextImpl context1 =
        new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this);
    entityProcessor.init(context1);

    Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>();

    if (entity.entities != null) {

      for (DataConfig.Entity entity1 : entity.entities) {
        // this ensures that we start from the leaf nodes
        myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows));
        // someone called abort
        if (stop.get()) return new HashSet();
      }
    }
    // identifying the modified rows for this entity

    Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
    LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
    // get the modified rows in this entity
    while (true) {
      Map<String, Object> row = entityProcessor.nextModifiedRowKey();

      if (row == null) break;

      deltaSet.put(row.get(entity.getPk()).toString(), row);
      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }
    // get the deleted rows for this entity
    Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
    while (true) {
      Map<String, Object> row = entityProcessor.nextDeletedRowKey();
      if (row == null) break;

      deletedSet.add(row);

      // Remove deleted rows from the delta rows
      String deletedRowPk = row.get(entity.getPk()).toString();
      if (deltaSet.containsKey(deletedRowPk)) {
        deltaSet.remove(deletedRowPk);
      }

      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }

    LOG.info(
        "Completed ModifiedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deltaSet.size());
    LOG.info(
        "Completed DeletedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deletedSet.size());

    myModifiedPks.addAll(deltaSet.values());
    Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
    // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified
    // because of these
    // propogate up the changes in the chain
    if (entity.parentEntity != null) {
      // identifying deleted rows with deltas

      for (Map<String, Object> row : myModifiedPks) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
      // running the same for deletedrows
      for (Map<String, Object> row : deletedSet) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
    }
    LOG.info("Completed parentDeltaQuery for Entity: " + entity.name);
    if (entity.isDocRoot) deletedRows.addAll(deletedSet);

    // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true"
    return entity.parentEntity == null
        ? myModifiedPks
        : new HashSet<Map<String, Object>>(parentKeyList);
  }
示例#4
0
  @SuppressWarnings("unchecked")
  private void buildDocument(
      VariableResolverImpl vr,
      DocWrapper doc,
      Map<String, Object> pk,
      DataConfig.Entity entity,
      boolean isRoot,
      ContextImpl parentCtx) {

    EntityProcessorWrapper entityProcessor = getEntityProcessor(entity);

    ContextImpl ctx =
        new ContextImpl(
            entity,
            vr,
            null,
            pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP,
            session,
            parentCtx,
            this);
    entityProcessor.init(ctx);
    Context.CURRENT_CONTEXT.set(ctx);

    if (requestParameters.start > 0) {
      writer.log(SolrWriter.DISABLE_LOGGING, null, null);
    }

    if (verboseDebug) {
      writer.log(SolrWriter.START_ENTITY, entity.name, null);
    }

    int seenDocCount = 0;

    try {
      while (true) {
        if (stop.get()) return;
        if (importStatistics.docCount.get() > (requestParameters.start + requestParameters.rows))
          break;
        try {
          seenDocCount++;

          if (seenDocCount > requestParameters.start) {
            writer.log(SolrWriter.ENABLE_LOGGING, null, null);
          }

          if (verboseDebug && entity.isDocRoot) {
            writer.log(SolrWriter.START_DOC, entity.name, null);
          }
          if (doc == null && entity.isDocRoot) {
            doc = new DocWrapper();
            ctx.setDoc(doc);
            DataConfig.Entity e = entity;
            while (e.parentEntity != null) {
              addFields(
                  e.parentEntity, doc, (Map<String, Object>) vr.resolve(e.parentEntity.name), vr);
              e = e.parentEntity;
            }
          }

          Map<String, Object> arow = entityProcessor.nextRow();
          if (arow == null) {
            break;
          }

          // Support for start parameter in debug mode
          if (entity.isDocRoot) {
            if (seenDocCount <= requestParameters.start) continue;
            if (seenDocCount > requestParameters.start + requestParameters.rows) {
              LOG.info("Indexing stopped at docCount = " + importStatistics.docCount);
              break;
            }
          }

          if (verboseDebug) {
            writer.log(SolrWriter.ENTITY_OUT, entity.name, arow);
          }
          importStatistics.rowsCount.incrementAndGet();
          if (doc != null) {
            handleSpecialCommands(arow, doc);
            addFields(entity, doc, arow, vr);
          }
          if (entity.entities != null) {
            vr.addNamespace(entity.name, arow);
            for (DataConfig.Entity child : entity.entities) {
              buildDocument(vr, doc, child.isDocRoot ? pk : null, child, false, ctx);
            }
            vr.removeNamespace(entity.name);
          }
          /*The child entities would have changed the CURRENT_CONTEXT. So when they are done, set it back to the old.
           *
           */
          Context.CURRENT_CONTEXT.set(ctx);

          if (entity.isDocRoot) {
            if (stop.get()) return;
            if (!doc.isEmpty()) {
              boolean result = writer.upload(doc);
              doc = null;
              if (result) {
                importStatistics.docCount.incrementAndGet();
              } else {
                importStatistics.failedDocCount.incrementAndGet();
              }
            }
          }

        } catch (DataImportHandlerException e) {
          if (verboseDebug) {
            writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e);
          }
          if (e.getErrCode() == DataImportHandlerException.SKIP_ROW) {
            continue;
          }
          if (isRoot) {
            if (e.getErrCode() == DataImportHandlerException.SKIP) {
              importStatistics.skipDocCount.getAndIncrement();
              doc = null;
            } else {
              LOG.error("Exception while processing: " + entity.name + " document : " + doc, e);
            }
            if (e.getErrCode() == DataImportHandlerException.SEVERE) throw e;
          } else throw e;
        } catch (Throwable t) {
          if (verboseDebug) {
            writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, t);
          }
          throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t);
        } finally {
          if (verboseDebug) {
            writer.log(SolrWriter.ROW_END, entity.name, null);
            if (entity.isDocRoot) writer.log(SolrWriter.END_DOC, null, null);
            Context.CURRENT_CONTEXT.remove();
          }
        }
      }
    } finally {
      if (verboseDebug) {
        writer.log(SolrWriter.END_ENTITY, null, null);
      }
      entityProcessor.destroy();
    }
  }