@SuppressWarnings("unchecked") private void finish(Properties lastIndexTimeProps) { LOG.info("Import completed successfully"); statusMessages.put( "", "Indexing completed. Added/Updated: " + importStatistics.docCount + " documents. Deleted " + importStatistics.deletedDocCount + " documents."); if (requestParameters.commit) { writer.commit(requestParameters.optimize); addStatusMessage("Committed"); if (requestParameters.optimize) addStatusMessage("Optimized"); } try { writer.persist(lastIndexTimeProps); } catch (Exception e) { LOG.error("Could not write property file", e); statusMessages.put( "error", "Could not write property file. Delta imports will not work. " + "Make sure your conf directory is writable"); } }
/** * If the where clause is present the cache is sql Vs Map of key Vs List of Rows. Only used by * cache implementations. * * @param query the query string for which cached data is to be returned * @return the cached row corresponding to the given query after all variables have been resolved */ protected Map<String, Object> getIdCacheData(String query) { Map<Object, List<Map<String, Object>>> rowIdVsRows = cacheWithWhereClause.get(query); List<Map<String, Object>> rows = null; Object key = context.resolve(cacheVariableName); if (key == null) { throw new DataImportHandlerException( DataImportHandlerException.WARN, "The cache lookup value : " + cacheVariableName + " is resolved to be null in the entity :" + context.getEntityAttribute("name")); } if (rowIdVsRows != null) { rows = rowIdVsRows.get(key); if (rows == null) return null; dataSourceRowCache = new ArrayList<Map<String, Object>>(rows); return getFromRowCacheTransformed(); } else { rows = getAllNonCachedRows(); if (rows.isEmpty()) { return null; } else { rowIdVsRows = new HashMap<Object, List<Map<String, Object>>>(); for (Map<String, Object> row : rows) { Object k = row.get(cachePk); if (k == null) { throw new DataImportHandlerException( DataImportHandlerException.WARN, "No value available for the cache key : " + cachePk + " in the entity : " + context.getEntityAttribute("name")); } if (!k.getClass().equals(key.getClass())) { throw new DataImportHandlerException( DataImportHandlerException.WARN, "The key in the cache type : " + k.getClass().getName() + "is not same as the lookup value type " + key.getClass().getName() + " in the entity " + context.getEntityAttribute("name")); } if (rowIdVsRows.get(k) == null) rowIdVsRows.put(k, new ArrayList<Map<String, Object>>()); rowIdVsRows.get(k).add(row); } cacheWithWhereClause.put(query, rowIdVsRows); if (!rowIdVsRows.containsKey(key)) return null; dataSourceRowCache = new ArrayList<Map<String, Object>>(rowIdVsRows.get(key)); if (dataSourceRowCache.isEmpty()) { dataSourceRowCache = null; return null; } return getFromRowCacheTransformed(); } } }
private void handleSpecialCommands(Map<String, Object> arow, DocWrapper doc) { Object value = arow.get("$deleteDocById"); if (value != null) { if (value instanceof Collection) { Collection collection = (Collection) value; for (Object o : collection) { writer.deleteDoc(o.toString()); } } else { writer.deleteDoc(value); } } value = arow.get("$deleteDocByQuery"); if (value != null) { if (value instanceof Collection) { Collection collection = (Collection) value; for (Object o : collection) { writer.deleteByQuery(o.toString()); } } else { writer.deleteByQuery(value.toString()); } } value = arow.get("$docBoost"); if (value != null) { float value1 = 1.0f; if (value instanceof Number) { value1 = ((Number) value).floatValue(); } else { value1 = Float.parseFloat(value.toString()); } doc.setDocumentBoost(value1); } value = arow.get("$skipDoc"); if (value != null) { if (Boolean.parseBoolean(value.toString())) { throw new DataImportHandlerException( DataImportHandlerException.SKIP, "Document skipped :" + arow); } } value = arow.get("$skipRow"); if (value != null) { if (Boolean.parseBoolean(value.toString())) { throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW); } } }
@SuppressWarnings("unchecked") private void addFields( DataConfig.Entity entity, DocWrapper doc, Map<String, Object> arow, VariableResolver vr) { for (Map.Entry<String, Object> entry : arow.entrySet()) { String key = entry.getKey(); Object value = entry.getValue(); if (value == null) continue; if (key.startsWith("$")) continue; List<DataConfig.Field> field = entity.colNameVsField.get(key); if (field == null && dataImporter.getSchema() != null) { // This can be a dynamic field or a field which does not have an entry in data-config ( an // implicit field) SchemaField sf = dataImporter.getSchema().getFieldOrNull(key); if (sf == null) { sf = dataImporter.getConfig().lowerNameVsSchemaField.get(key.toLowerCase(Locale.ENGLISH)); } if (sf != null) { addFieldToDoc(entry.getValue(), sf.getName(), 1.0f, sf.multiValued(), doc); } // else do nothing. if we add it it may fail } else { if (field != null) { for (DataConfig.Field f : field) { String name = f.getName(); if (f.dynamicName) { name = vr.replaceTokens(name); } if (f.toWrite) addFieldToDoc(entry.getValue(), name, f.boost, f.multiValued, doc); } } } } }
private void deleteAll(Set<Map<String, Object>> deletedKeys) { LOG.info("Deleting stale documents "); Iterator<Map<String, Object>> iter = deletedKeys.iterator(); while (iter.hasNext()) { Map<String, Object> map = iter.next(); String keyName = root.isDocRoot ? root.getPk() : root.getSchemaPk(); Object key = map.get(keyName); if (key == null) { LOG.warn("no key was available for deleteted pk query. keyName = " + keyName); continue; } writer.deleteDoc(key); importStatistics.deletedDocCount.incrementAndGet(); iter.remove(); } }
public Map<String, Object> getStatsSnapshot() { Map<String, Object> result = new HashMap<String, Object>(); result.put("docCount", docCount.get()); result.put("deletedDocCount", deletedDocCount.get()); result.put("rowCount", rowsCount.get()); result.put("queryCount", rowsCount.get()); result.put("skipDocCount", skipDocCount.get()); return result; }
@SuppressWarnings("unchecked") private void doDelta() { addStatusMessage("Delta Dump started"); VariableResolverImpl resolver = getVariableResolver(); if (document.deleteQuery != null) { writer.deleteByQuery(document.deleteQuery); } addStatusMessage("Identifying Delta"); LOG.info("Starting delta collection."); Set<Map<String, Object>> deletedKeys = new HashSet<Map<String, Object>>(); Set<Map<String, Object>> allPks = collectDelta(root, resolver, deletedKeys); if (stop.get()) return; addStatusMessage("Deltas Obtained"); addStatusMessage("Building documents"); if (!deletedKeys.isEmpty()) { allPks.removeAll(deletedKeys); deleteAll(deletedKeys); // Make sure that documents are not re-created } deletedKeys = null; statusMessages.put("Total Changed Documents", allPks.size()); VariableResolverImpl vri = getVariableResolver(); Iterator<Map<String, Object>> pkIter = allPks.iterator(); while (pkIter.hasNext()) { Map<String, Object> map = pkIter.next(); vri.addNamespace(DataConfig.IMPORTER_NS_SHORT + ".delta", map); buildDocument(vri, null, map, root, true, null); pkIter.remove(); // check for abort if (stop.get()) break; } if (!stop.get()) { LOG.info("Delta Import completed successfully"); } }
public VariableResolverImpl getVariableResolver() { try { VariableResolverImpl resolver = null; if (dataImporter != null && dataImporter.getCore() != null) { resolver = new VariableResolverImpl( dataImporter.getCore().getResourceLoader().getCoreProperties()); } else resolver = new VariableResolverImpl(); Map<String, Object> indexerNamespace = new HashMap<String, Object>(); if (persistedProperties.getProperty(LAST_INDEX_TIME) != null) { indexerNamespace.put(LAST_INDEX_TIME, persistedProperties.getProperty(LAST_INDEX_TIME)); } else { // set epoch indexerNamespace.put(LAST_INDEX_TIME, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH)); } indexerNamespace.put(INDEX_START_TIME, dataImporter.getIndexStartTime()); indexerNamespace.put("request", requestParameters.requestParams); indexerNamespace.put("functions", functionsNamespace); for (DataConfig.Entity entity : dataImporter.getConfig().document.entities) { String key = entity.name + "." + SolrWriter.LAST_INDEX_KEY; String lastIndex = persistedProperties.getProperty(key); if (lastIndex != null) { indexerNamespace.put(key, lastIndex); } else { indexerNamespace.put(key, DataImporter.DATE_TIME_FORMAT.get().format(EPOCH)); } } resolver.addNamespace(DataConfig.IMPORTER_NS_SHORT, indexerNamespace); resolver.addNamespace(DataConfig.IMPORTER_NS, indexerNamespace); return resolver; } catch (Exception e) { wrapAndThrow(SEVERE, e); // unreachable statement return null; } }
/** * Collects unique keys of all Solr documents for whom one or more source tables have been changed * since the last indexed time. * * <p>Note: In our definition, unique key of Solr document is the primary key of the top level * entity (unless skipped using docRoot=false) in the Solr document in data-config.xml * * @return an iterator to the list of keys for which Solr documents should be updated. */ @SuppressWarnings("unchecked") public Set<Map<String, Object>> collectDelta( DataConfig.Entity entity, VariableResolverImpl resolver, Set<Map<String, Object>> deletedRows) { // someone called abort if (stop.get()) return new HashSet(); EntityProcessor entityProcessor = getEntityProcessor(entity); ContextImpl context1 = new ContextImpl(entity, resolver, null, Context.FIND_DELTA, session, null, this); entityProcessor.init(context1); Set<Map<String, Object>> myModifiedPks = new HashSet<Map<String, Object>>(); if (entity.entities != null) { for (DataConfig.Entity entity1 : entity.entities) { // this ensures that we start from the leaf nodes myModifiedPks.addAll(collectDelta(entity1, resolver, deletedRows)); // someone called abort if (stop.get()) return new HashSet(); } } // identifying the modified rows for this entity Map<String, Map<String, Object>> deltaSet = new HashMap<String, Map<String, Object>>(); LOG.info("Running ModifiedRowKey() for Entity: " + entity.name); // get the modified rows in this entity while (true) { Map<String, Object> row = entityProcessor.nextModifiedRowKey(); if (row == null) break; deltaSet.put(row.get(entity.getPk()).toString(), row); importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return new HashSet(); } // get the deleted rows for this entity Set<Map<String, Object>> deletedSet = new HashSet<Map<String, Object>>(); while (true) { Map<String, Object> row = entityProcessor.nextDeletedRowKey(); if (row == null) break; deletedSet.add(row); // Remove deleted rows from the delta rows String deletedRowPk = row.get(entity.getPk()).toString(); if (deltaSet.containsKey(deletedRowPk)) { deltaSet.remove(deletedRowPk); } importStatistics.rowsCount.incrementAndGet(); // check for abort if (stop.get()) return new HashSet(); } LOG.info( "Completed ModifiedRowKey for Entity: " + entity.name + " rows obtained : " + deltaSet.size()); LOG.info( "Completed DeletedRowKey for Entity: " + entity.name + " rows obtained : " + deletedSet.size()); myModifiedPks.addAll(deltaSet.values()); Set<Map<String, Object>> parentKeyList = new HashSet<Map<String, Object>>(); // all that we have captured is useless (in a sub-entity) if no rows in the parent is modified // because of these // propogate up the changes in the chain if (entity.parentEntity != null) { // identifying deleted rows with deltas for (Map<String, Object> row : myModifiedPks) { getModifiedParentRows( resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); // check for abort if (stop.get()) return new HashSet(); } // running the same for deletedrows for (Map<String, Object> row : deletedSet) { getModifiedParentRows( resolver.addNamespace(entity.name, row), entity.name, entityProcessor, parentKeyList); // check for abort if (stop.get()) return new HashSet(); } } LOG.info("Completed parentDeltaQuery for Entity: " + entity.name); if (entity.isDocRoot) deletedRows.addAll(deletedSet); // Do not use entity.isDocRoot here because one of descendant entities may set rootEntity="true" return entity.parentEntity == null ? myModifiedPks : new HashSet<Map<String, Object>>(parentKeyList); }
public Object getSessionAttribute(String key) { return session == null ? null : session.get(key); }
public void setSessionAttribute(String key, Object val) { if (session == null) session = new HashMap<String, Object>(); session.put(key, val); }
@SuppressWarnings("unchecked") public void addStatusMessage(String msg) { statusMessages.put(msg, DataImporter.DATE_TIME_FORMAT.get().format(new Date())); }
void rollback() { writer.rollback(); statusMessages.put("", "Indexing failed. Rolled back all changes."); addStatusMessage("Rolledback"); }
@SuppressWarnings("unchecked") public void execute() { dataImporter.store(DataImporter.STATUS_MSGS, statusMessages); document = dataImporter.getConfig().document; final AtomicLong startTime = new AtomicLong(System.currentTimeMillis()); statusMessages.put( TIME_ELAPSED, new Object() { public String toString() { return getTimeElapsedSince(startTime.get()); } }); statusMessages.put(DataImporter.MSG.TOTAL_QUERIES_EXECUTED, importStatistics.queryCount); statusMessages.put(DataImporter.MSG.TOTAL_ROWS_EXECUTED, importStatistics.rowsCount); statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, importStatistics.docCount); statusMessages.put(DataImporter.MSG.TOTAL_DOCS_SKIPPED, importStatistics.skipDocCount); List<String> entities = requestParameters.entities; // Trigger onImportStart if (document.onImportStart != null) { invokeEventListener(document.onImportStart); } AtomicBoolean fullCleanDone = new AtomicBoolean(false); // we must not do a delete of *:* multiple times if there are multiple root entities to be run Properties lastIndexTimeProps = new Properties(); lastIndexTimeProps.setProperty( LAST_INDEX_KEY, DataImporter.DATE_TIME_FORMAT.get().format(dataImporter.getIndexStartTime())); for (DataConfig.Entity e : document.entities) { if (entities != null && !entities.contains(e.name)) continue; lastIndexTimeProps.setProperty( e.name + "." + LAST_INDEX_KEY, DataImporter.DATE_TIME_FORMAT.get().format(new Date())); root = e; String delQuery = e.allAttributes.get("preImportDeleteQuery"); if (dataImporter.getStatus() == DataImporter.Status.RUNNING_DELTA_DUMP) { cleanByQuery(delQuery, fullCleanDone); doDelta(); delQuery = e.allAttributes.get("postImportDeleteQuery"); if (delQuery != null) { fullCleanDone.set(false); cleanByQuery(delQuery, fullCleanDone); } } else { cleanByQuery(delQuery, fullCleanDone); doFullDump(); delQuery = e.allAttributes.get("postImportDeleteQuery"); if (delQuery != null) { fullCleanDone.set(false); cleanByQuery(delQuery, fullCleanDone); } } statusMessages.remove(DataImporter.MSG.TOTAL_DOC_PROCESSED); } if (stop.get()) { // Dont commit if aborted using command=abort statusMessages.put("Aborted", DataImporter.DATE_TIME_FORMAT.get().format(new Date())); rollback(); } else { // Do not commit unnecessarily if this is a delta-import and no documents were created or // deleted if (!requestParameters.clean) { if (importStatistics.docCount.get() > 0 || importStatistics.deletedDocCount.get() > 0) { finish(lastIndexTimeProps); } } else { // Finished operation normally, commit now finish(lastIndexTimeProps); } if (writer != null) { writer.finish(); } if (document.onImportEnd != null) { invokeEventListener(document.onImportEnd); } } statusMessages.remove(TIME_ELAPSED); statusMessages.put(DataImporter.MSG.TOTAL_DOC_PROCESSED, "" + importStatistics.docCount.get()); if (importStatistics.failedDocCount.get() > 0) statusMessages.put( DataImporter.MSG.TOTAL_FAILED_DOCS, "" + importStatistics.failedDocCount.get()); statusMessages.put("Time taken ", getTimeElapsedSince(startTime.get())); LOG.info("Time taken = " + getTimeElapsedSince(startTime.get())); }