/** * This method is used by the index update mapper and process a document operation into the * current intermediate form. * * @param doc input document operation * @param analyzer the analyzer * @throws IOException */ public void process(DocumentAndOp doc, Analyzer analyzer) throws IOException { if (doc.getOp() == DocumentAndOp.Op.DELETE || doc.getOp() == DocumentAndOp.Op.UPDATE) { deleteList.add(doc.getTerm()); } if (doc.getOp() == DocumentAndOp.Op.INSERT || doc.getOp() == DocumentAndOp.Op.UPDATE) { if (writer == null) { // analyzer is null because we specify an analyzer with addDocument writer = createWriter(); } writer.addDocument(doc.getDocument(), analyzer); numDocs++; } }
/** * Map a key-value pair to a shard-and-intermediate form pair. Internally, the local analysis is * first applied to map the key-value pair to a document id-and-operation pair, then the * docid-and-operation pair is mapped to a shard-intermediate form pair. The intermediate form is * of the form of a single-document ram index and/or a single delete term. */ public void map( K key, V value, OutputCollector<Shard, IntermediateForm> output, Reporter reporter) throws IOException { synchronized (this) { localAnalysis.map(key, value, tmpCollector, reporter); if (tmpKey != null && tmpValue != null) { DocumentAndOp doc = tmpValue; IntermediateForm form = new IntermediateForm(); form.configure(iconf); form.process(doc, analyzer); form.closeWriter(); if (doc.getOp() == DocumentAndOp.Op.INSERT) { int chosenShard = distributionPolicy.chooseShardForInsert(tmpKey); if (chosenShard >= 0) { // insert into one shard output.collect(shards[chosenShard], form); } else { throw new IOException("Chosen shard for insert must be >= 0"); } } else if (doc.getOp() == DocumentAndOp.Op.DELETE) { int chosenShard = distributionPolicy.chooseShardForDelete(tmpKey); if (chosenShard >= 0) { // delete from one shard output.collect(shards[chosenShard], form); } else { // broadcast delete to all shards for (int i = 0; i < shards.length; i++) { output.collect(shards[i], form); } } } else { // UPDATE int insertToShard = distributionPolicy.chooseShardForInsert(tmpKey); int deleteFromShard = distributionPolicy.chooseShardForDelete(tmpKey); if (insertToShard >= 0) { if (insertToShard == deleteFromShard) { // update into one shard output.collect(shards[insertToShard], form); } else { // prepare a deletion form IntermediateForm deletionForm = new IntermediateForm(); deletionForm.configure(iconf); deletionForm.process( new DocumentAndOp(DocumentAndOp.Op.DELETE, doc.getTerm()), analyzer); deletionForm.closeWriter(); if (deleteFromShard >= 0) { // delete from one shard output.collect(shards[deleteFromShard], deletionForm); } else { // broadcast delete to all shards for (int i = 0; i < shards.length; i++) { output.collect(shards[i], deletionForm); } } // prepare an insertion form IntermediateForm insertionForm = new IntermediateForm(); insertionForm.configure(iconf); insertionForm.process( new DocumentAndOp(DocumentAndOp.Op.INSERT, doc.getDocument()), analyzer); insertionForm.closeWriter(); // insert into one shard output.collect(shards[insertToShard], insertionForm); } } else { throw new IOException("Chosen shard for insert must be >= 0"); } } } } }