/* (non-Javadoc) * @see org.apache.hadoop.mapred.MapReduceBase#configure(org.apache.hadoop.mapred.JobConf) */ public void configure(JobConf job) { iconf = new IndexUpdateConfiguration(job); analyzer = (Analyzer) ReflectionUtils.newInstance(iconf.getDocumentAnalyzerClass(), job); localAnalysis = (ILocalAnalysis) ReflectionUtils.newInstance(iconf.getLocalAnalysisClass(), job); localAnalysis.configure(job); shards = Shard.getIndexShards(iconf); distributionPolicy = (IDistributionPolicy) ReflectionUtils.newInstance(iconf.getDistributionPolicyClass(), job); distributionPolicy.init(shards); LOG.info("sea.document.analyzer = " + analyzer.getClass().getName()); LOG.info("sea.local.analysis = " + localAnalysis.getClass().getName()); LOG.info(shards.length + " shards = " + iconf.getIndexShards()); LOG.info("sea.distribution.policy = " + distributionPolicy.getClass().getName()); }
/** * Map a key-value pair to a shard-and-intermediate form pair. Internally, the local analysis is * first applied to map the key-value pair to a document id-and-operation pair, then the * docid-and-operation pair is mapped to a shard-intermediate form pair. The intermediate form is * of the form of a single-document ram index and/or a single delete term. */ public void map( K key, V value, OutputCollector<Shard, IntermediateForm> output, Reporter reporter) throws IOException { synchronized (this) { localAnalysis.map(key, value, tmpCollector, reporter); if (tmpKey != null && tmpValue != null) { DocumentAndOp doc = tmpValue; IntermediateForm form = new IntermediateForm(); form.configure(iconf); form.process(doc, analyzer); form.closeWriter(); if (doc.getOp() == DocumentAndOp.Op.INSERT) { int chosenShard = distributionPolicy.chooseShardForInsert(tmpKey); if (chosenShard >= 0) { // insert into one shard output.collect(shards[chosenShard], form); } else { throw new IOException("Chosen shard for insert must be >= 0"); } } else if (doc.getOp() == DocumentAndOp.Op.DELETE) { int chosenShard = distributionPolicy.chooseShardForDelete(tmpKey); if (chosenShard >= 0) { // delete from one shard output.collect(shards[chosenShard], form); } else { // broadcast delete to all shards for (int i = 0; i < shards.length; i++) { output.collect(shards[i], form); } } } else { // UPDATE int insertToShard = distributionPolicy.chooseShardForInsert(tmpKey); int deleteFromShard = distributionPolicy.chooseShardForDelete(tmpKey); if (insertToShard >= 0) { if (insertToShard == deleteFromShard) { // update into one shard output.collect(shards[insertToShard], form); } else { // prepare a deletion form IntermediateForm deletionForm = new IntermediateForm(); deletionForm.configure(iconf); deletionForm.process( new DocumentAndOp(DocumentAndOp.Op.DELETE, doc.getTerm()), analyzer); deletionForm.closeWriter(); if (deleteFromShard >= 0) { // delete from one shard output.collect(shards[deleteFromShard], deletionForm); } else { // broadcast delete to all shards for (int i = 0; i < shards.length; i++) { output.collect(shards[i], deletionForm); } } // prepare an insertion form IntermediateForm insertionForm = new IntermediateForm(); insertionForm.configure(iconf); insertionForm.process( new DocumentAndOp(DocumentAndOp.Op.INSERT, doc.getDocument()), analyzer); insertionForm.closeWriter(); // insert into one shard output.collect(shards[insertToShard], insertionForm); } } else { throw new IOException("Chosen shard for insert must be >= 0"); } } } } }
/* (non-Javadoc) * @see org.apache.hadoop.mapred.MapReduceBase#close() */ public void close() throws IOException { localAnalysis.close(); }