private void addFieldToDoc( Object value, String name, float boost, boolean multiValued, DocWrapper doc) { if (value instanceof Collection) { Collection collection = (Collection) value; if (multiValued) { for (Object o : collection) { if (o != null) doc.addField(name, o, boost); } } else { if (doc.getField(name) == null) for (Object o : collection) { if (o != null) { doc.addField(name, o, boost); break; } } } } else if (multiValued) { if (value != null) { doc.addField(name, value, boost); } } else { if (doc.getField(name) == null && value != null) doc.addField(name, value, boost); } }
private void handleSpecialCommands(Map<String, Object> arow, DocWrapper doc) { Object value = arow.get("$deleteDocById"); if (value != null) { if (value instanceof Collection) { Collection collection = (Collection) value; for (Object o : collection) { writer.deleteDoc(o.toString()); } } else { writer.deleteDoc(value); } } value = arow.get("$deleteDocByQuery"); if (value != null) { if (value instanceof Collection) { Collection collection = (Collection) value; for (Object o : collection) { writer.deleteByQuery(o.toString()); } } else { writer.deleteByQuery(value.toString()); } } value = arow.get("$docBoost"); if (value != null) { float value1 = 1.0f; if (value instanceof Number) { value1 = ((Number) value).floatValue(); } else { value1 = Float.parseFloat(value.toString()); } doc.setDocumentBoost(value1); } value = arow.get("$skipDoc"); if (value != null) { if (Boolean.parseBoolean(value.toString())) { throw new DataImportHandlerException( DataImportHandlerException.SKIP, "Document skipped :" + arow); } } value = arow.get("$skipRow"); if (value != null) { if (Boolean.parseBoolean(value.toString())) { throw new DataImportHandlerException(DataImportHandlerException.SKIP_ROW); } } }
@SuppressWarnings("unchecked") private void buildDocument( VariableResolverImpl vr, DocWrapper doc, Map<String, Object> pk, DataConfig.Entity entity, boolean isRoot, ContextImpl parentCtx) { EntityProcessorWrapper entityProcessor = getEntityProcessor(entity); ContextImpl ctx = new ContextImpl( entity, vr, null, pk == null ? Context.FULL_DUMP : Context.DELTA_DUMP, session, parentCtx, this); entityProcessor.init(ctx); Context.CURRENT_CONTEXT.set(ctx); if (requestParameters.start > 0) { writer.log(SolrWriter.DISABLE_LOGGING, null, null); } if (verboseDebug) { writer.log(SolrWriter.START_ENTITY, entity.name, null); } int seenDocCount = 0; try { while (true) { if (stop.get()) return; if (importStatistics.docCount.get() > (requestParameters.start + requestParameters.rows)) break; try { seenDocCount++; if (seenDocCount > requestParameters.start) { writer.log(SolrWriter.ENABLE_LOGGING, null, null); } if (verboseDebug && entity.isDocRoot) { writer.log(SolrWriter.START_DOC, entity.name, null); } if (doc == null && entity.isDocRoot) { doc = new DocWrapper(); ctx.setDoc(doc); DataConfig.Entity e = entity; while (e.parentEntity != null) { addFields( e.parentEntity, doc, (Map<String, Object>) vr.resolve(e.parentEntity.name), vr); e = e.parentEntity; } } Map<String, Object> arow = entityProcessor.nextRow(); if (arow == null) { break; } // Support for start parameter in debug mode if (entity.isDocRoot) { if (seenDocCount <= requestParameters.start) continue; if (seenDocCount > requestParameters.start + requestParameters.rows) { LOG.info("Indexing stopped at docCount = " + importStatistics.docCount); break; } } if (verboseDebug) { writer.log(SolrWriter.ENTITY_OUT, entity.name, arow); } importStatistics.rowsCount.incrementAndGet(); if (doc != null) { handleSpecialCommands(arow, doc); addFields(entity, doc, arow, vr); } if (entity.entities != null) { vr.addNamespace(entity.name, arow); for (DataConfig.Entity child : entity.entities) { buildDocument(vr, doc, child.isDocRoot ? pk : null, child, false, ctx); } vr.removeNamespace(entity.name); } /*The child entities would have changed the CURRENT_CONTEXT. So when they are done, set it back to the old. * */ Context.CURRENT_CONTEXT.set(ctx); if (entity.isDocRoot) { if (stop.get()) return; if (!doc.isEmpty()) { boolean result = writer.upload(doc); doc = null; if (result) { importStatistics.docCount.incrementAndGet(); } else { importStatistics.failedDocCount.incrementAndGet(); } } } } catch (DataImportHandlerException e) { if (verboseDebug) { writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, e); } if (e.getErrCode() == DataImportHandlerException.SKIP_ROW) { continue; } if (isRoot) { if (e.getErrCode() == DataImportHandlerException.SKIP) { importStatistics.skipDocCount.getAndIncrement(); doc = null; } else { LOG.error("Exception while processing: " + entity.name + " document : " + doc, e); } if (e.getErrCode() == DataImportHandlerException.SEVERE) throw e; } else throw e; } catch (Throwable t) { if (verboseDebug) { writer.log(SolrWriter.ENTITY_EXCEPTION, entity.name, t); } throw new DataImportHandlerException(DataImportHandlerException.SEVERE, t); } finally { if (verboseDebug) { writer.log(SolrWriter.ROW_END, entity.name, null); if (entity.isDocRoot) writer.log(SolrWriter.END_DOC, null, null); Context.CURRENT_CONTEXT.remove(); } } } } finally { if (verboseDebug) { writer.log(SolrWriter.END_ENTITY, null, null); } entityProcessor.destroy(); } }
private void runAThread(ThreadedEntityProcessorWrapper epw, EntityRow rows, String currProcess) throws Exception { currentEntityProcWrapper.set(epw); epw.threadedInit(context); initEntity(); try { epw.init(rows); DocWrapper docWrapper = this.docWrapper; Context.CURRENT_CONTEXT.set(context); for (; ; ) { if (DocBuilder.this.stop.get()) break; try { Map<String, Object> arow = epw.nextRow(); if (arow == null) { break; } else { importStatistics.rowsCount.incrementAndGet(); if (docWrapper == null && entity.isDocRoot) { docWrapper = new DocWrapper(); context.setDoc(docWrapper); DataConfig.Entity e = entity.parentEntity; for (EntityRow row = rows; row != null && e != null; row = row.tail, e = e.parentEntity) { addFields(e, docWrapper, row.row, epw.resolver); } } if (docWrapper != null) { handleSpecialCommands(arow, docWrapper); addFields(entity, docWrapper, arow, epw.resolver); } if (entity.entities != null) { EntityRow nextRow = new EntityRow(arow, rows, entity.name); for (DataConfig.Entity e : entity.entities) { epw.children.get(e).run(docWrapper, currProcess, nextRow); } } } if (entity.isDocRoot) { LOG.info("a row on docroot" + docWrapper); if (!docWrapper.isEmpty()) { LOG.info("adding a doc " + docWrapper); boolean result = writer.upload(docWrapper); docWrapper = null; if (result) { importStatistics.docCount.incrementAndGet(); } else { importStatistics.failedDocCount.incrementAndGet(); } } } } catch (DataImportHandlerException dihe) { exception = dihe; if (dihe.getErrCode() == SKIP_ROW || dihe.getErrCode() == SKIP) { importStatistics.skipDocCount.getAndIncrement(); exception = null; // should not propogate up continue; } if (entity.isDocRoot) { if (dihe.getErrCode() == DataImportHandlerException.SKIP) { importStatistics.skipDocCount.getAndIncrement(); exception = null; // should not propogate up } else { LOG.error( "Exception while processing: " + entity.name + " document : " + docWrapper, dihe); } if (dihe.getErrCode() == DataImportHandlerException.SEVERE) throw dihe; } else { // if this is not the docRoot then the execution has happened in the same thread. so // propogate up, // it will be handled at the docroot entityEnded.set(true); throw dihe; } entityEnded.set(true); } } } finally { epw.destroy(); currentEntityProcWrapper.remove(); Context.CURRENT_CONTEXT.remove(); } }