Exemplo n.º 1
1
  /** Test method for {@link EvaluatorBag#getDateFormatEvaluator()}. */
  @Test
  public void testGetDateFormatEvaluator() {
    Evaluator dateFormatEval = EvaluatorBag.getDateFormatEvaluator();
    ContextImpl context =
        new ContextImpl(null, resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null, null);
    Context.CURRENT_CONTEXT.set(context);
    try {
      Calendar calendar = new GregorianCalendar();
      calendar.add(Calendar.DAY_OF_YEAR, -2);

      assertEquals(
          new SimpleDateFormat("yyyy-MM-dd HH:mm").format(calendar.getTime()),
          dateFormatEval.evaluate("'NOW-2DAYS','yyyy-MM-dd HH:mm'", Context.CURRENT_CONTEXT.get()));

      calendar = new GregorianCalendar();
      Date date = calendar.getTime();

      Map<String, Object> map = new HashMap<String, Object>();
      map.put("key", date);
      resolver.addNamespace("A", map);

      assertEquals(
          new SimpleDateFormat("yyyy-MM-dd HH:mm").format(date),
          dateFormatEval.evaluate("A.key, 'yyyy-MM-dd HH:mm'", Context.CURRENT_CONTEXT.get()));
    } finally {
      Context.CURRENT_CONTEXT.remove();
    }
  }
Exemplo n.º 2
0
  @Test
  @SuppressWarnings("unchecked")
  public void testTransformRow_MultipleNumbers() throws Exception {
    List fields = new ArrayList();
    fields.add(createMap(DataImporter.COLUMN, "inputs"));
    fields.add(
        createMap(
            DataImporter.COLUMN,
            "outputs",
            RegexTransformer.SRC_COL_NAME,
            "inputs",
            NumberFormatTransformer.FORMAT_STYLE,
            NumberFormatTransformer.NUMBER));

    List inputs = new ArrayList();
    inputs.add("123" + GROUPING_SEP + "567");
    inputs.add("245" + GROUPING_SEP + "678");
    Map row = createMap("inputs", inputs);

    VariableResolverImpl resolver = new VariableResolverImpl();
    resolver.addNamespace("e", row);

    Context context = getContext(null, resolver, null, Context.FULL_DUMP, fields, null);
    new NumberFormatTransformer().transformRow(row, context);

    List output = new ArrayList();
    output.add(new Long(123567));
    output.add(new Long(245678));
    Map outputRow = createMap("inputs", inputs, "outputs", output);

    assertEquals(outputRow, row);
  }
Exemplo n.º 3
0
 @Test
 public void testEscapeSolrQueryFunction() {
   final VariableResolverImpl resolver = new VariableResolverImpl();
   ContextImpl context =
       new ContextImpl(null, resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null, null);
   Context.CURRENT_CONTEXT.set(context);
   try {
     Map m = new HashMap();
     m.put("query", "c:t");
     resolver.addNamespace(
         "dataimporter.functions",
         EvaluatorBag.getFunctionsNamespace(Collections.EMPTY_LIST, null));
     resolver.addNamespace("e", m);
     String s = resolver.replaceTokens("${dataimporter.functions.escapeQueryChars(e.query)}");
     org.junit.Assert.assertEquals("c\\:t", s);
   } finally {
     Context.CURRENT_CONTEXT.remove();
   }
 }
Exemplo n.º 4
0
 @Test
 public void parseParams() {
   Map m = new HashMap();
   m.put("b", "B");
   VariableResolverImpl vr = new VariableResolverImpl();
   vr.addNamespace("a", m);
   List l = EvaluatorBag.parseParams(" 1 , a.b, 'hello!', 'ds,o,u\'za',", vr);
   Assert.assertEquals(new Double(1), l.get(0));
   Assert.assertEquals("B", ((EvaluatorBag.VariableWrapper) l.get(1)).resolve());
   Assert.assertEquals("hello!", l.get(2));
   Assert.assertEquals("ds,o,u'za", l.get(3));
 }
Exemplo n.º 5
0
 public VariableResolverImpl getVariableResolver() {
   try {
     VariableResolverImpl resolver = null;
     if (dataImporter != null && dataImporter.getCore() != null) {
       resolver =
           new VariableResolverImpl(
               dataImporter.getCore().getResourceLoader().getCoreProperties());
     } else resolver = new VariableResolverImpl();
     Map<String, Object> indexerNamespace = new HashMap<String, Object>();
     if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) {
       indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME));
     } else {
       // set epoch
       indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
     }
     indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
     indexerNamespace.put("request", requestParameters.requestParams);
     indexerNamespace.put("functions", functionsNamespace);
     for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) {
       String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY;
       String lastIndex = persistedProperties.getProperty(key);
       if (lastIndex != null) {
         indexerNamespace.put(key, lastIndex);
       } else {
         indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
       }
     }
     resolver.addNamespace(DataConfig.IMPORTER_NS_SHORT, indexerNamespace);
     resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace);
     return resolver;
   } catch (Exception e) {
     wrapAndThrow(SEVERE, e);
     // unreachable statement
     return null;
   }
 }
Exemplo n.º 6
0
  private void runTests(Map<String, String> tests, Evaluator evaluator) {
    ContextImpl ctx =
        new ContextImpl(null, resolver, null, Context.FULL_DUMP, Collections.EMPTY_MAP, null, null);
    Context.CURRENT_CONTEXT.set(ctx);
    try {
      for (Map.Entry<String, String> entry : tests.entrySet()) {
        Map<String, Object> values = new HashMap<String, Object>();
        values.put("key", entry.getKey());
        resolver.addNamespace("A", values);

        String expected = (String) entry.getValue();
        String actual = evaluator.evaluate("A.key", ctx);
        assertEquals(expected, actual);
      }
    } finally {
      Context.CURRENT_CONTEXT.remove();
    }
  }
Exemplo n.º 7
0
  @SuppressWarnings("unchecked")
  private void doDelta() {
    addStatusMessage("Delta Dump started");
    VariableResolverImpl resolver = getVariableResolver();

    if (document.deleteQuery != null) {
      writer.deleteByQuery(document.deleteQuery);
    }

    addStatusMessage("Identifying Delta");
    LOG.info("Starting delta collection.");
    Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>();
    Set<Map<String, Object>> allPks = collectDelta(root, resolver, deletedKeys);
    if (stop.get()) return;
    addStatusMessage("Deltas Obtained");
    addStatusMessage("Building documents");
    if (!deletedKeys.isEmpty()) {
      allPks.removeAll(deletedKeys);
      deleteAll(deletedKeys);
      // Make sure that documents are not re-created
    }
    deletedKeys = null;

    statusMessages.put("Total Changed Documents", allPks.size());
    VariableResolverImpl vri = getVariableResolver();
    Iterator<Map<String, Object>> pkIter = allPks.iterator();
    while (pkIter.hasNext()) {
      Map<String, Object> map = pkIter.next();
      vri.addNamespace(DataConfig.IMPORTER_NS_SHORT + ".delta", map);
      buildDocument(vri, null, map, root, true, null);
      pkIter.remove();
      // check for abort
      if (stop.get()) break;
    }

    if (!stop.get()) {
      LOG.info("Delta Import completed successfully");
    }
  }
Exemplo n.º 8
0
  /**
   * Collects unique keys of all Solr documents for whom one or more source tables have been changed
   * since the last indexed time.
   *
   * <p>Note: In our definition, unique key of Solr document is the primary key of the top level
   * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml
   *
   * @return an iterator to the list of keys for which Solr documents should be updated.
   */
  @SuppressWarnings("unchecked")
  public Set<Map<String, Object>> collectDelta(
      DataConfig.Entity entity,
      VariableResolverImpl resolver,
      Set<Map<String, Object>> deletedRows) {
    // someone called abort
    if (stop.get()) return new HashSet();

    EntityProcessor entityProcessor = getEntityProcessor(entity);
    ContextImpl context1 =
        new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this);
    entityProcessor.init(context1);

    Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>();

    if (entity.entities != null) {

      for (DataConfig.Entity entity1 : entity.entities) {
        // this ensures that we start from the leaf nodes
        myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows));
        // someone called abort
        if (stop.get()) return new HashSet();
      }
    }
    // identifying the modified rows for this entity

    Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
    LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
    // get the modified rows in this entity
    while (true) {
      Map<String, Object> row = entityProcessor.nextModifiedRowKey();

      if (row == null) break;

      deltaSet.put(row.get(entity.getPk()).toString(), row);
      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }
    // get the deleted rows for this entity
    Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
    while (true) {
      Map<String, Object> row = entityProcessor.nextDeletedRowKey();
      if (row == null) break;

      deletedSet.add(row);

      // Remove deleted rows from the delta rows
      String deletedRowPk = row.get(entity.getPk()).toString();
      if (deltaSet.containsKey(deletedRowPk)) {
        deltaSet.remove(deletedRowPk);
      }

      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }

    LOG.info(
        "Completed ModifiedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deltaSet.size());
    LOG.info(
        "Completed DeletedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deletedSet.size());

    myModifiedPks.addAll(deltaSet.values());
    Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
    // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified
    // because of these
    // propogate up the changes in the chain
    if (entity.parentEntity != null) {
      // identifying deleted rows with deltas

      for (Map<String, Object> row : myModifiedPks) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
      // running the same for deletedrows
      for (Map<String, Object> row : deletedSet) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
    }
    LOG.info("Completed parentDeltaQuery for Entity: " + entity.name);
    if (entity.isDocRoot) deletedRows.addAll(deletedSet);

    // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true"
    return entity.parentEntity == null
        ? myModifiedPks
        : new HashSet<Map<String, Object>>(parentKeyList);
  }
Exemplo n.º 9
0
  @SuppressWarnings("unchecked")
  private void buildDocument(
      VariableResolverImpl vr,
      DocWrapper doc,
      Map<String, Object> pk,
      DataConfig.Entity entity,
      boolean isRoot,
      ContextImpl parentCtx) {

    EntityProcessorWrapper entityProcessor = getEntityProcessor(entity);

    ContextImpl ctx =
        new ContextImpl(
            entity,
            vr,
            null,
            pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP,
            session,
            parentCtx,
            this);
    entityProcessor.init(ctx);
    Context.CURRENT_CONTEXT.set(ctx);

    if (requestParameters.start > 0) {
      writer.log(SolrWriter.DISABLE_LOGGING, null, null);
    }

    if (verboseDebug) {
      writer.log(SolrWriter.START_ENTITY, entity.name, null);
    }

    int seenDocCount = 0;

    try {
      while (true) {
        if (stop.get()) return;
        if (importStatistics.docCount.get() > (requestParameters.start + requestParameters.rows))
          break;
        try {
          seenDocCount++;

          if (seenDocCount > requestParameters.start) {
            writer.log(SolrWriter.ENABLE_LOGGING, null, null);
          }

          if (verboseDebug && entity.isDocRoot) {
            writer.log(SolrWriter.START_DOC, entity.name, null);
          }
          if (doc == null && entity.isDocRoot) {
            doc = new DocWrapper();
            ctx.setDoc(doc);
            DataConfig.Entity e = entity;
            while (e.parentEntity != null) {
              addFields(
                  e.parentEntity, doc, (Map<String, Object>) vr.resolve(e.parentEntity.name), vr);
              e = e.parentEntity;
            }
          }

          Map<String, Object> arow = entityProcessor.nextRow();
          if (arow == null) {
            break;
          }

          // Support for start parameter in debug mode
          if (entity.isDocRoot) {
            if (seenDocCount <= requestParameters.start) continue;
            if (seenDocCount > requestParameters.start + requestParameters.rows) {
              LOG.info("Indexing stopped at docCount = " + importStatistics.docCount);
              break;
            }
          }

          if (verboseDebug) {
            writer.log(SolrWriter.ENTITY_OUT, entity.name, arow);
          }
          importStatistics.rowsCount.incrementAndGet();
          if (doc != null) {
            handleSpecialCommands(arow, doc);
            addFields(entity, doc, arow, vr);
          }
          if (entity.entities != null) {
            vr.addNamespace(entity.name, arow);
            for (DataConfig.Entity child : entity.entities) {
              buildDocument(vr, doc, child.isDocRoot ? pk : null, child, false, ctx);
            }
            vr.removeNamespace(entity.name);
          }
          /*The child entities would have changed the CURRENT_CONTEXT. So when they are done, set it back to the old.
           *
           */
          Context.CURRENT_CONTEXT.set(ctx);

          if (entity.isDocRoot) {
            if (stop.get()) return;
            if (!doc.isEmpty()) {
              boolean result = writer.upload(doc);
              doc = null;
              if (result) {
                importStatistics.docCount.incrementAndGet();
              } else {
                importStatistics.failedDocCount.incrementAndGet();
              }
            }
          }

        } catch (DataImportHandlerException e) {
          if (verboseDebug) {
            writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e);
          }
          if (e.getErrCode() == DataImportHandlerException.SKIP_ROW) {
            continue;
          }
          if (isRoot) {
            if (e.getErrCode() == DataImportHandlerException.SKIP) {
              importStatistics.skipDocCount.getAndIncrement();
              doc = null;
            } else {
              LOG.error("Exception while processing: " + entity.name + " document : " + doc, e);
            }
            if (e.getErrCode() == DataImportHandlerException.SEVERE) throw e;
          } else throw e;
        } catch (Throwable t) {
          if (verboseDebug) {
            writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, t);
          }
          throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t);
        } finally {
          if (verboseDebug) {
            writer.log(SolrWriter.ROW_END, entity.name, null);
            if (entity.isDocRoot) writer.log(SolrWriter.END_DOC, null, null);
            Context.CURRENT_CONTEXT.remove();
          }
        }
      }
    } finally {
      if (verboseDebug) {
        writer.log(SolrWriter.END_ENTITY, null, null);
      }
      entityProcessor.destroy();
    }
  }