Пример #1
0
 @SuppressWarnings("unchecked")
 private void finish(Properties lastIndexTimeProps) {
   LOG.info("Import completed successfully");
   statusMessages.put(
       "",
       "Indexing completed. Added/Updated: "
           + importStatistics.docCount
           + " documents. Deleted "
           + importStatistics.deletedDocCount
           + " documents.");
   if (requestParameters.commit) {
     writer.commit(requestParameters.optimize);
     addStatusMessage("Committed");
     if (requestParameters.optimize) addStatusMessage("Optimized");
   }
   try {
     writer.persist(lastIndexTimeProps);
   } catch (Exception e) {
     LOG.error("Could not write property file", e);
     statusMessages.put(
         "error",
         "Could not write property file. Delta imports will not work. "
             + "Make sure your conf directory is writable");
   }
 }
 /**
  * If the where clause is present the cache is sql Vs Map of key Vs List of Rows. Only used by
  * cache implementations.
  *
  * @param query the query string for which cached data is to be returned
  * @return the cached row corresponding to the given query after all variables have been resolved
  */
 protected Map<String, Object> getIdCacheData(String query) {
   Map<Object, List<Map<String, Object>>> rowIdVsRows = cacheWithWhereClause.get(query);
   List<Map<String, Object>> rows = null;
   Object key = context.resolve(cacheVariableName);
   if (key == null) {
     throw new DataImportHandlerException(
         DataImportHandlerException.WARN,
         "The cache lookup value : "
             + cacheVariableName
             + " is resolved to be null in the entity :"
             + context.getEntityAttribute("name"));
   }
   if (rowIdVsRows != null) {
     rows = rowIdVsRows.get(key);
     if (rows == null) return null;
     dataSourceRowCache = new ArrayList<Map<String, Object>>(rows);
     return getFromRowCacheTransformed();
   } else {
     rows = getAllNonCachedRows();
     if (rows.isEmpty()) {
       return null;
     } else {
       rowIdVsRows = new HashMap<Object, List<Map<String, Object>>>();
       for (Map<String, Object> row : rows) {
         Object k = row.get(cachePk);
         if (k == null) {
           throw new DataImportHandlerException(
               DataImportHandlerException.WARN,
               "No value available for the cache key : "
                   + cachePk
                   + " in the entity : "
                   + context.getEntityAttribute("name"));
         }
         if (!k.getClass().equals(key.getClass())) {
           throw new DataImportHandlerException(
               DataImportHandlerException.WARN,
               "The key in the cache type : "
                   + k.getClass().getName()
                   + "is not same as the lookup value type "
                   + key.getClass().getName()
                   + " in the entity "
                   + context.getEntityAttribute("name"));
         }
         if (rowIdVsRows.get(k) == null) rowIdVsRows.put(k, new ArrayList<Map<String, Object>>());
         rowIdVsRows.get(k).add(row);
       }
       cacheWithWhereClause.put(query, rowIdVsRows);
       if (!rowIdVsRows.containsKey(key)) return null;
       dataSourceRowCache = new ArrayList<Map<String, Object>>(rowIdVsRows.get(key));
       if (dataSourceRowCache.isEmpty()) {
         dataSourceRowCache = null;
         return null;
       }
       return getFromRowCacheTransformed();
     }
   }
 }
Пример #3
0
  private void handleSpecialCommands(Map<String, Object> arow, DocWrapper doc) {
    Object value = arow.get("$deleteDocById");
    if (value != null) {
      if (value instanceof Collection) {
        Collection collection = (Collection) value;
        for (Object o : collection) {
          writer.deleteDoc(o.toString());
        }
      } else {
        writer.deleteDoc(value);
      }
    }
    value = arow.get("$deleteDocByQuery");
    if (value != null) {
      if (value instanceof Collection) {
        Collection collection = (Collection) value;
        for (Object o : collection) {
          writer.deleteByQuery(o.toString());
        }
      } else {
        writer.deleteByQuery(value.toString());
      }
    }
    value = arow.get("$docBoost");
    if (value != null) {
      float value1 = 1.0f;
      if (value instanceof Number) {
        value1 = ((Number) value).floatValue();
      } else {
        value1 = Float.parseFloat(value.toString());
      }
      doc.setDocumentBoost(value1);
    }

    value = arow.get("$skipDoc");
    if (value != null) {
      if (Boolean.parseBoolean(value.toString())) {
        throw new DataImportHandlerException(
            DataImportHandlerException.SKIP, "Document skipped :" + arow);
      }
    }

    value = arow.get("$skipRow");
    if (value != null) {
      if (Boolean.parseBoolean(value.toString())) {
        throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW);
      }
    }
  }
Пример #4
0
 @SuppressWarnings("unchecked")
 private void addFields(
     DataConfig.Entity entity, DocWrapper doc, Map<String, Object> arow, VariableResolver vr) {
   for (Map.Entry<String, Object> entry : arow.entrySet()) {
     String key = entry.getKey();
     Object value = entry.getValue();
     if (value == null) continue;
     if (key.startsWith("$")) continue;
     List<DataConfig.Field> field = entity.colNameVsField.get(key);
     if (field == null && dataImporter.getSchema() != null) {
       // This can be a dynamic field or a field which does not have an entry in data-config ( an
       // implicit field)
       SchemaField sf = dataImporter.getSchema().getFieldOrNull(key);
       if (sf == null) {
         sf = dataImporter.getConfig().lowerNameVsSchemaField.get(key.toLowerCase(Locale.ENGLISH));
       }
       if (sf != null) {
         addFieldToDoc(entry.getValue(), sf.getName(), 1.0f, sf.multiValued(), doc);
       }
       // else do nothing. if we add it it may fail
     } else {
       if (field != null) {
         for (DataConfig.Field f : field) {
           String name = f.getName();
           if (f.dynamicName) {
             name = vr.replaceTokens(name);
           }
           if (f.toWrite) addFieldToDoc(entry.getValue(), name, f.boost, f.multiValued, doc);
         }
       }
     }
   }
 }
Пример #5
0
 private void deleteAll(Set<Map<String, Object>> deletedKeys) {
   LOG.info("Deleting stale documents ");
   Iterator<Map<String, Object>> iter = deletedKeys.iterator();
   while (iter.hasNext()) {
     Map<String, Object> map = iter.next();
     String keyName = root.isDocRoot ? root.getPk() : root.getSchemaPk();
     Object key = map.get(keyName);
     if (key == null) {
       LOG.warn("no key was available for deleteted pk query. keyName = " + keyName);
       continue;
     }
     writer.deleteDoc(key);
     importStatistics.deletedDocCount.incrementAndGet();
     iter.remove();
   }
 }
Пример #6
0
 public Map<String, Object> getStatsSnapshot() {
   Map<String, Object> result = new HashMap<String, Object>();
   result.put("docCount", docCount.get());
   result.put("deletedDocCount", deletedDocCount.get());
   result.put("rowCount", rowsCount.get());
   result.put("queryCount", rowsCount.get());
   result.put("skipDocCount", skipDocCount.get());
   return result;
 }
Пример #7
0
  @SuppressWarnings("unchecked")
  private void doDelta() {
    addStatusMessage("Delta Dump started");
    VariableResolverImpl resolver = getVariableResolver();

    if (document.deleteQuery != null) {
      writer.deleteByQuery(document.deleteQuery);
    }

    addStatusMessage("Identifying Delta");
    LOG.info("Starting delta collection.");
    Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>();
    Set<Map<String, Object>> allPks = collectDelta(root, resolver, deletedKeys);
    if (stop.get()) return;
    addStatusMessage("Deltas Obtained");
    addStatusMessage("Building documents");
    if (!deletedKeys.isEmpty()) {
      allPks.removeAll(deletedKeys);
      deleteAll(deletedKeys);
      // Make sure that documents are not re-created
    }
    deletedKeys = null;

    statusMessages.put("Total Changed Documents", allPks.size());
    VariableResolverImpl vri = getVariableResolver();
    Iterator<Map<String, Object>> pkIter = allPks.iterator();
    while (pkIter.hasNext()) {
      Map<String, Object> map = pkIter.next();
      vri.addNamespace(DataConfig.IMPORTER_NS_SHORT + ".delta", map);
      buildDocument(vri, null, map, root, true, null);
      pkIter.remove();
      // check for abort
      if (stop.get()) break;
    }

    if (!stop.get()) {
      LOG.info("Delta Import completed successfully");
    }
  }
Пример #8
0
 public VariableResolverImpl getVariableResolver() {
   try {
     VariableResolverImpl resolver = null;
     if (dataImporter != null && dataImporter.getCore() != null) {
       resolver =
           new VariableResolverImpl(
               dataImporter.getCore().getResourceLoader().getCoreProperties());
     } else resolver = new VariableResolverImpl();
     Map<String, Object> indexerNamespace = new HashMap<String, Object>();
     if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) {
       indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME));
     } else {
       // set epoch
       indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
     }
     indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime());
     indexerNamespace.put("request", requestParameters.requestParams);
     indexerNamespace.put("functions", functionsNamespace);
     for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) {
       String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY;
       String lastIndex = persistedProperties.getProperty(key);
       if (lastIndex != null) {
         indexerNamespace.put(key, lastIndex);
       } else {
         indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH));
       }
     }
     resolver.addNamespace(DataConfig.IMPORTER_NS_SHORT, indexerNamespace);
     resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace);
     return resolver;
   } catch (Exception e) {
     wrapAndThrow(SEVERE, e);
     // unreachable statement
     return null;
   }
 }
Пример #9
0
  /**
   * Collects unique keys of all Solr documents for whom one or more source tables have been changed
   * since the last indexed time.
   *
   * <p>Note: In our definition, unique key of Solr document is the primary key of the top level
   * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml
   *
   * @return an iterator to the list of keys for which Solr documents should be updated.
   */
  @SuppressWarnings("unchecked")
  public Set<Map<String, Object>> collectDelta(
      DataConfig.Entity entity,
      VariableResolverImpl resolver,
      Set<Map<String, Object>> deletedRows) {
    // someone called abort
    if (stop.get()) return new HashSet();

    EntityProcessor entityProcessor = getEntityProcessor(entity);
    ContextImpl context1 =
        new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this);
    entityProcessor.init(context1);

    Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>();

    if (entity.entities != null) {

      for (DataConfig.Entity entity1 : entity.entities) {
        // this ensures that we start from the leaf nodes
        myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows));
        // someone called abort
        if (stop.get()) return new HashSet();
      }
    }
    // identifying the modified rows for this entity

    Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>();
    LOG.info("Running ModifiedRowKey() for Entity: " + entity.name);
    // get the modified rows in this entity
    while (true) {
      Map<String, Object> row = entityProcessor.nextModifiedRowKey();

      if (row == null) break;

      deltaSet.put(row.get(entity.getPk()).toString(), row);
      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }
    // get the deleted rows for this entity
    Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>();
    while (true) {
      Map<String, Object> row = entityProcessor.nextDeletedRowKey();
      if (row == null) break;

      deletedSet.add(row);

      // Remove deleted rows from the delta rows
      String deletedRowPk = row.get(entity.getPk()).toString();
      if (deltaSet.containsKey(deletedRowPk)) {
        deltaSet.remove(deletedRowPk);
      }

      importStatistics.rowsCount.incrementAndGet();
      // check for abort
      if (stop.get()) return new HashSet();
    }

    LOG.info(
        "Completed ModifiedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deltaSet.size());
    LOG.info(
        "Completed DeletedRowKey for Entity: "
            + entity.name
            + " rows obtained : "
            + deletedSet.size());

    myModifiedPks.addAll(deltaSet.values());
    Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>();
    // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified
    // because of these
    // propogate up the changes in the chain
    if (entity.parentEntity != null) {
      // identifying deleted rows with deltas

      for (Map<String, Object> row : myModifiedPks) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
      // running the same for deletedrows
      for (Map<String, Object> row : deletedSet) {
        getModifiedParentRows(
            resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList);
        // check for abort
        if (stop.get()) return new HashSet();
      }
    }
    LOG.info("Completed parentDeltaQuery for Entity: " + entity.name);
    if (entity.isDocRoot) deletedRows.addAll(deletedSet);

    // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true"
    return entity.parentEntity == null
        ? myModifiedPks
        : new HashSet<Map<String, Object>>(parentKeyList);
  }
Пример #10
0
 public Object getSessionAttribute(String key) {
   return session == null ? null : session.get(key);
 }
Пример #11
0
 public void setSessionAttribute(String key, Object val) {
   if (session == null) session = new HashMap<String, Object>();
   session.put(key, val);
 }
Пример #12
0
 @SuppressWarnings("unchecked")
 public void addStatusMessage(String msg) {
   statusMessages.put(msg, DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
 }
Пример #13
0
 void rollback() {
   writer.rollback();
   statusMessages.put("", "Indexing failed. Rolled back all changes.");
   addStatusMessage("Rolledback");
 }
Пример #14
0
  @SuppressWarnings("unchecked")
  public void execute() {
    dataImporter.store(DataImporter.STATUS_MSGS, statusMessages);
    document = dataImporter.getConfig().document;
    final AtomicLong startTime = new AtomicLong(System.currentTimeMillis());
    statusMessages.put(
        TIME_ELAPSED,
        new Object() {
          public String toString() {
            return getTimeElapsedSince(startTime.get());
          }
        });

    statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, importStatistics.queryCount);
    statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, importStatistics.rowsCount);
    statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, importStatistics.docCount);
    statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, importStatistics.skipDocCount);

    List<String> entities = requestParameters.entities;

    // Trigger onImportStart
    if (document.onImportStart != null) {
      invokeEventListener(document.onImportStart);
    }
    AtomicBoolean fullCleanDone = new AtomicBoolean(false);
    // we must not do a delete of *:* multiple times if there are multiple root entities to be run
    Properties lastIndexTimeProps = new Properties();
    lastIndexTimeProps.setProperty(
        LAST_INDEX_KEY,
        DataImporter.DATE_TIME_FORMAT.get().format(dataImporter.getIndexStartTime()));
    for (DataConfig.Entity e : document.entities) {
      if (entities != null && !entities.contains(e.name)) continue;
      lastIndexTimeProps.setProperty(
          e.name + "." + LAST_INDEX_KEY, DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
      root = e;
      String delQuery = e.allAttributes.get("preImportDeleteQuery");
      if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) {
        cleanByQuery(delQuery, fullCleanDone);
        doDelta();
        delQuery = e.allAttributes.get("postImportDeleteQuery");
        if (delQuery != null) {
          fullCleanDone.set(false);
          cleanByQuery(delQuery, fullCleanDone);
        }
      } else {
        cleanByQuery(delQuery, fullCleanDone);
        doFullDump();
        delQuery = e.allAttributes.get("postImportDeleteQuery");
        if (delQuery != null) {
          fullCleanDone.set(false);
          cleanByQuery(delQuery, fullCleanDone);
        }
      }
      statusMessages.remove(DataImporter.MSG.TOTAL_DOC_PROCESSED);
    }

    if (stop.get()) {
      // Dont commit if aborted using command=abort
      statusMessages.put("Aborted", DataImporter.DATE_TIME_FORMAT.get().format(new Date()));
      rollback();
    } else {
      // Do not commit unnecessarily if this is a delta-import and no documents were created or
      // deleted
      if (!requestParameters.clean) {
        if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) {
          finish(lastIndexTimeProps);
        }
      } else {
        // Finished operation normally, commit now
        finish(lastIndexTimeProps);
      }

      if (writer != null) {
        writer.finish();
      }

      if (document.onImportEnd != null) {
        invokeEventListener(document.onImportEnd);
      }
    }

    statusMessages.remove(TIME_ELAPSED);
    statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, "" + importStatistics.docCount.get());
    if (importStatistics.failedDocCount.get() > 0)
      statusMessages.put(
          DataImporter.MSG.TOTAL_FAILED_DOCS, "" + importStatistics.failedDocCount.get());

    statusMessages.put("Time taken ", getTimeElapsedSince(startTime.get()));
    LOG.info("Time taken = " + getTimeElapsedSince(startTime.get()));
  }