@Override protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer tokenizer = new StringTokenizer(line, "\t"); if (tokenizer.countTokens() == 4) { // u.data record tokenizer.nextToken(); String itemid = tokenizer.nextToken(); String rating = tokenizer.nextToken(); item.set(itemid); fields.set(rating); context.write(item, fields); } else { // u.item record tokenizer = new StringTokenizer(line, "|"); String itemid = tokenizer.nextToken(); String title = tokenizer.nextToken(); String release = tokenizer.nextToken(); // tokenizer.nextToken(); String imdb = tokenizer.nextToken(); fields.set(title + "\t" + release + "\t" + imdb); item.set(itemid); context.write(item, fields); } // TotalRecords counter Counter counter = context.getCounter("MyCounter", "TOTALRECORDS"); counter.increment(1); }
@Override protected void setup(Mapper.Context context) throws IOException, InterruptedException { super.setup(context); logger.info("in setup of " + context.getTaskAttemptID().toString()); String fileName = ((FileSplit) context.getInputSplit()).getPath() + ""; System.out.println("in stdout" + context.getTaskAttemptID().toString() + " " + fileName); System.err.println("in stderr" + context.getTaskAttemptID().toString()); }
private BWAAlnInstance(Mapper.Context context, String bin) throws IOException, URISyntaxException { super(context, bin); taskId = context.getTaskAttemptID().toString(); taskId = taskId.substring(taskId.indexOf("m_")); ref = HalvadeFileUtils.downloadBWAIndex(context, taskId); alnCustomArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "bwa", "aln"); }
private void flush(final Mapper.Context context) throws IOException, InterruptedException { for (final FaunusVertex vertex : this.map.values()) { this.longWritable.set(vertex.getIdAsLong()); context.write(this.longWritable, vertex); context.getCounter(Counters.VERTICES_EMITTED).increment(1l); } this.map.clear(); this.counter = 0; }
@Test(expected = IOException.class) public final void testMapperForNullKeyValue() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD); when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter); MergeKeyMapper mapper = new MergeKeyMapper(); Text val = new Text("valueOfKey"); mapper.map(null, val, context); }
@Override public void setup(final Mapper.Context context) throws IOException, InterruptedException { final FileSystem fs = FileSystem.get(context.getConfiguration()); try { this.engine.eval( new InputStreamReader(fs.open(new Path(context.getConfiguration().get(SCRIPT_PATH))))); this.engine.put(ARGS, context.getConfiguration().getStrings(SCRIPT_ARGS)); this.engine.eval(SETUP_ARGS); } catch (Exception e) { throw new InterruptedException(e.getMessage()); } this.outputs = new SafeMapperOutputs(context); }
@Override public void setup(final Mapper.Context context) throws IOException, InterruptedException { this.map = new CounterMap<Object>(); this.property = context.getConfiguration().get(PROPERTY); this.isVertex = context .getConfiguration() .getClass(CLASS, Element.class, Element.class) .equals(Vertex.class); this.handler = new WritableHandler( context.getConfiguration().getClass(TYPE, Text.class, WritableComparable.class)); this.outputs = new SafeMapperOutputs(context); }
public void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { parser.parse(value); if (parser.isValidTemperature()) { int airTemperature = parser.getAirTemperature(); if (airTemperature > 1000) { System.err.println("Temperature over 100 degrees for input: " + value); context.setStatus("Detected possibly corrupt record: see logs."); context.getCounter(Temperature.OVER_100).increment(1); } LOG.info("Map key:" + key); if (LOG.isDebugEnabled()) { LOG.debug("Map value" + value); } context.write(new Text(parser.getYear()), new IntWritable(airTemperature)); } }
@Override protected void cleanup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { for (WordCount entries : this.sortValues.tailSet(this.sortValues.first())) { context.write(entries.getKey(), entries.getValue()); } }
@Override public void setup(final Mapper.Context context) throws IOException, InterruptedException { this.isVertex = context .getConfiguration() .getClass(CLASS, Element.class, Element.class) .equals(Vertex.class); }
@Override protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { kMaxValues = context.getConfiguration().getInt("map.numberOfKWords", 10); this.sortValues = new TreeSet<WordCount>(); }
@Override protected void map(LongWritable k1, Text v1, org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { final String[] split = v1.toString().split(" "); String ip = split[0]; String time = split[3].replace("[", ""); String path = split[6]; String v2 = "ip=" + ip + "|" + "time=" + time + "|" + "path=" + path; context.write(k1, new Text(v2)); }
@Test public final void testMapperValidValues() throws IOException, InterruptedException { Mapper.Context context = mock(Mapper.Context.class); Counters counters = new Counters(); Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW); when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter); MergeKeyMapper mapper = new MergeKeyMapper(); Text key = new Text("abc123"); Text val = new Text("valueOfKey"); mapper.isOld = false; mapper.map(key, val, context); HihoValue hihoValue = new HihoValue(); hihoValue.setVal(val); hihoValue.setIsOld(false); HihoTuple hihoTuple = new HihoTuple(); hihoTuple.setKey(key); verify(context).write(hihoTuple, hihoValue); assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue()); }
@Override protected void map(LongWritable key, Text lines, Mapper.Context context) throws IOException, InterruptedException { String line = lines.toString(); String[] tokens = line.split(","); // YYYY = tokens[0] // MM = tokens[1] // count = tokens[2] String yearMonth = tokens[0] + "-" + tokens[1]; int count = Integer.parseInt(tokens[2]); entry.setYearMonth(yearMonth); entry.setCount(count); value.set(tokens[2]); context.write(entry, value); }
public MemoryMapContext(final Mapper.Context context) throws IOException, InterruptedException { super( context.getConfiguration(), context.getTaskAttemptID() == null ? new TaskAttemptID() : context.getTaskAttemptID(), null, null, context.getOutputCommitter(), null, context.getInputSplit()); this.context = context; this.globalConfiguration = context.getConfiguration(); }
@Override protected void map(LongWritable key, Text value, Mapper.Context context) throws IOException, InterruptedException { String document = value.toString(); System.out.println("'" + document + "'"); try { XMLStreamReader reader = XMLInputFactory.newInstance() .createXMLStreamReader(new ByteArrayInputStream(document.getBytes())); String propertyName = ""; String propertyValue = ""; String currentElement = ""; while (reader.hasNext()) { int code = reader.next(); switch (code) { case XMLStreamConstants.START_ELEMENT: // START_ELEMENT: currentElement = reader.getLocalName(); break; case XMLStreamConstants.CHARACTERS: // CHARACTERS: if (currentElement.equalsIgnoreCase("uid")) { propertyName += reader.getText().trim(); System.out.println(propertyName); } else if (currentElement.equalsIgnoreCase("location")) { propertyValue += reader.getText().trim(); System.out.println(propertyValue); } else if (currentElement.equalsIgnoreCase("age")) { propertyValue += ("," + reader.getText().trim()); System.out.println(propertyValue); } break; } } reader.close(); context.write(new Text(propertyName.trim()), new Text(propertyValue.trim())); } catch (Exception e) { throw new IOException(e); } }
protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException { sourceColumn = ByteBuffer.wrap(context.getConfiguration().get(CONF_COLUMN_NAME).getBytes()); }
protected void setup(Mapper.Context context) throws IOException, InterruptedException { Configuration config = context.getConfiguration(); this.caseSensitive = config.getBoolean("wordcount.case.sensitive", false); }
@Override protected void setup(Mapper.Context ctx) throws IOException, InterruptedException { maxSimilaritiesPerRow = ctx.getConfiguration().getInt(MAX_SIMILARITIES_PER_ROW, 0); Preconditions.checkArgument( maxSimilaritiesPerRow > 0, "Incorrect maximum number of similarities per row!"); }
/** * execute the blat command and return a list of sequence ids that match * * @param seqDatabase is the key/value map of sequences that act as reference keyed by name * @param seqQueryFilepath is the path the the blast output results * @return a list of sequence ids in the reference that match the cazy database */ public Set<String> exec( Map<String, String> seqDatabase, String seqQueryFilepath, Mapper.Context context) throws IOException, InterruptedException { /* first, take the blatInputFile and find the corresponding sequence in the seqMap. find both the exact sequence id, as well as its matching pair and write to temporary file. */ // Map<String,String> l = new HashMap<String,String>(); File seqQueryFile = null; log.info("Preparing Blat execution"); if (context != null) context.setStatus("Preparing Blat execution"); Map<String, String> l = new HashMap<String, String>(); int numGroups = 0; int numReads = 0; /* open query file. */ Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); for (Path filenamePath : MetaUtils.findAllPaths(new Path(seqQueryFilepath))) { if (!fs.exists(filenamePath)) { throw new IOException("file not found: " + seqQueryFilepath); } FSDataInputStream in = fs.open(filenamePath); BufferedReader bufRead = new BufferedReader(new InputStreamReader(in)); /* Filter FileReader through a Buffered read to read a line at a time */ String line = bufRead.readLine(); // String that holds current file line /* read the line into key/value with key being the first column, value is all the remaining columns */ while (line != null) { numGroups++; String[] a = line.split("\t", 2); l.put(a[0], a[1]); numReads += a[1].split("\t").length; line = bufRead.readLine(); } bufRead.close(); } if (context != null) context.getCounter("blat.input", "NUMBER_OF_INPUT_READS").increment(numReads); if (context != null) context.getCounter("blat.input", "NUMBER_OF_INPUT_GROUPS").increment(numGroups); log.info("read " + numReads + " Reads in " + numGroups + " gene groups"); /* now dump the database from the map to a file */ String seqFilepath = dumpToFile(seqDatabase); if (seqFilepath == null) { /* return with fail */ throw new IOException("unable to write " + seqDatabase + " to filesystem"); } Map<String, Set<String>> s = new HashMap<String, Set<String>>(); /* now loop through all the lines previously read in, write out a seqfile in temp directory then execute blat. */ int numBlats = 0; int totalBlats = l.size(); for (String k : l.keySet()) { numBlats++; /* k is a grouping key */ log.info("processing group " + k); if (context != null) { context.setStatus("Executing Blat " + numBlats + "/" + totalBlats); } /* create a new file in temp direectory */ seqQueryFile = new File(tmpDirFile, "blatquery.fa"); BufferedWriter out = new BufferedWriter(new FileWriter(seqQueryFile.getPath())); /* look up all the sequences and write them to the file. include the paired ends */ int queryCount = 0; for (String key : l.get(k).split("\t")) { if (paired) { /* for paired end data, look for both pairs */ String key1 = key + "/1"; // forward String key2 = key + "/2"; // backward if (seqDatabase.containsKey(key1)) { queryCount++; out.write(">" + key1 + "\n"); out.write(seqDatabase.get(key1) + "\n"); } if (seqDatabase.containsKey(key2)) { queryCount++; out.write(">" + key2 + "\n"); out.write(seqDatabase.get(key2) + "\n"); } } else { /* if data is not paired, just look up key */ if (seqDatabase.containsKey(key)) { queryCount++; out.write(">" + key + "\n"); out.write(seqDatabase.get(key) + "\n"); } } } /* close the temporary file */ out.close(); if (queryCount == 0) { /* means that none of these queries were in this portion of the database. no point executing blat, so just return */ log.info("skipping blat since i didn't find any query sequences in this database"); continue; } /* now set up a blat execution */ List<String> commands = new ArrayList<String>(); commands.add("/bin/sh"); commands.add("-c"); commands.add( commandPath + " " + commandLine + " " + seqFilepath + " " + seqQueryFile.getPath() + " " + tmpDirFile.getPath() + "/blat.output"); log.info("command = " + commands); SystemCommandExecutor commandExecutor = new SystemCommandExecutor(commands); exitValue = commandExecutor.executeCommand(); // stdout and stderr of the command are returned as StringBuilder objects stdout = commandExecutor.getStandardOutputFromCommand().toString(); stderr = commandExecutor.getStandardErrorFromCommand().toString(); log.debug("exit = " + exitValue); log.debug("stdout = " + stdout); log.debug("stderr = " + stderr); /* now parse the output and clean up */ log.debug("reading outputfile: " + tmpDirFile.getPath() + "/blat.output"); FileReader input = new FileReader(tmpDirFile.getPath() + "/blat.output"); /* Filter FileReader through a Buffered read to read a line at a time */ BufferedReader bufRead2 = new BufferedReader(input); String line2; // String that holds current file line int count = 0; // Line number of count // Read first line line2 = bufRead2.readLine(); // Read through file one line at time. Print line # and line while (line2 != null) { String[] a = line2.split("\t", 3); if (s.containsKey(k)) { s.get(k).add(a[1]); } else { s.put(k, new HashSet<String>()); s.get(k).add(a[1]); } line2 = bufRead2.readLine(); count++; } bufRead2.close(); log.debug("done reading file"); /* should clean up - note: files get overwritten, so don't worry about it. :-) */ } if (context != null) context.setStatus("Postprocessing Blat output"); /* post processing. since i need to return in the format of <groupid> <readid1> <readid2> <readid3> ... as a single string (one string per line). */ log.info("Postprocessing Blat"); log.info(" numGroups = " + s.keySet().size()); Set<String> ss = new HashSet<String>(); for (String k : s.keySet()) { StringBuilder stringBuilder = new StringBuilder(); for (Iterator iter = s.get(k).iterator(); iter.hasNext(); ) { stringBuilder.append(", " + iter.next()); } ss.add(k + stringBuilder); } return ss; }