@Override
  protected void map(LongWritable key, Text value, Mapper.Context context)
      throws IOException, InterruptedException {
    String line = value.toString();
    StringTokenizer tokenizer = new StringTokenizer(line, "\t");
    if (tokenizer.countTokens() == 4) { // u.data record
      tokenizer.nextToken();
      String itemid = tokenizer.nextToken();
      String rating = tokenizer.nextToken();
      item.set(itemid);
      fields.set(rating);
      context.write(item, fields);
    } else { // u.item record
      tokenizer = new StringTokenizer(line, "|");
      String itemid = tokenizer.nextToken();
      String title = tokenizer.nextToken();
      String release = tokenizer.nextToken();
      // tokenizer.nextToken();
      String imdb = tokenizer.nextToken();
      fields.set(title + "\t" + release + "\t" + imdb);
      item.set(itemid);
      context.write(item, fields);
    }

    // TotalRecords counter
    Counter counter = context.getCounter("MyCounter", "TOTALRECORDS");
    counter.increment(1);
  }
 @Override
 protected void setup(Mapper.Context context) throws IOException, InterruptedException {
   super.setup(context);
   logger.info("in setup of " + context.getTaskAttemptID().toString());
   String fileName = ((FileSplit) context.getInputSplit()).getPath() + "";
   System.out.println("in stdout" + context.getTaskAttemptID().toString() + " " + fileName);
   System.err.println("in stderr" + context.getTaskAttemptID().toString());
 }
 private BWAAlnInstance(Mapper.Context context, String bin)
     throws IOException, URISyntaxException {
   super(context, bin);
   taskId = context.getTaskAttemptID().toString();
   taskId = taskId.substring(taskId.indexOf("m_"));
   ref = HalvadeFileUtils.downloadBWAIndex(context, taskId);
   alnCustomArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "bwa", "aln");
 }
 private void flush(final Mapper.Context context) throws IOException, InterruptedException {
   for (final FaunusVertex vertex : this.map.values()) {
     this.longWritable.set(vertex.getIdAsLong());
     context.write(this.longWritable, vertex);
     context.getCounter(Counters.VERTICES_EMITTED).increment(1l);
   }
   this.map.clear();
   this.counter = 0;
 }
 @Test(expected = IOException.class)
 public final void testMapperForNullKeyValue() throws IOException, InterruptedException {
   Mapper.Context context = mock(Mapper.Context.class);
   Counters counters = new Counters();
   Counter counter = counters.findCounter(MergeRecordCounter.BAD_RECORD);
   when(context.getCounter(MergeRecordCounter.BAD_RECORD)).thenReturn(counter);
   MergeKeyMapper mapper = new MergeKeyMapper();
   Text val = new Text("valueOfKey");
   mapper.map(null, val, context);
 }
Exemple #6
0
 @Override
 public void setup(final Mapper.Context context) throws IOException, InterruptedException {
   final FileSystem fs = FileSystem.get(context.getConfiguration());
   try {
     this.engine.eval(
         new InputStreamReader(fs.open(new Path(context.getConfiguration().get(SCRIPT_PATH)))));
     this.engine.put(ARGS, context.getConfiguration().getStrings(SCRIPT_ARGS));
     this.engine.eval(SETUP_ARGS);
   } catch (Exception e) {
     throw new InterruptedException(e.getMessage());
   }
   this.outputs = new SafeMapperOutputs(context);
 }
 @Override
 public void setup(final Mapper.Context context) throws IOException, InterruptedException {
   this.map = new CounterMap<Object>();
   this.property = context.getConfiguration().get(PROPERTY);
   this.isVertex =
       context
           .getConfiguration()
           .getClass(CLASS, Element.class, Element.class)
           .equals(Vertex.class);
   this.handler =
       new WritableHandler(
           context.getConfiguration().getClass(TYPE, Text.class, WritableComparable.class));
   this.outputs = new SafeMapperOutputs(context);
 }
 public void map(LongWritable key, Text value, Mapper.Context context)
     throws IOException, InterruptedException {
   parser.parse(value);
   if (parser.isValidTemperature()) {
     int airTemperature = parser.getAirTemperature();
     if (airTemperature > 1000) {
       System.err.println("Temperature over 100 degrees for input: " + value);
       context.setStatus("Detected possibly corrupt record: see logs.");
       context.getCounter(Temperature.OVER_100).increment(1);
     }
     LOG.info("Map key:" + key);
     if (LOG.isDebugEnabled()) {
       LOG.debug("Map value" + value);
     }
     context.write(new Text(parser.getYear()), new IntWritable(airTemperature));
   }
 }
  @Override
  protected void cleanup(org.apache.hadoop.mapreduce.Mapper.Context context)
      throws IOException, InterruptedException {

    for (WordCount entries : this.sortValues.tailSet(this.sortValues.first())) {
      context.write(entries.getKey(), entries.getValue());
    }
  }
 @Override
 public void setup(final Mapper.Context context) throws IOException, InterruptedException {
   this.isVertex =
       context
           .getConfiguration()
           .getClass(CLASS, Element.class, Element.class)
           .equals(Vertex.class);
 }
  @Override
  protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
      throws IOException, InterruptedException {

    kMaxValues = context.getConfiguration().getInt("map.numberOfKWords", 10);

    this.sortValues = new TreeSet<WordCount>();
  }
Exemple #12
0
 @Override
 protected void map(LongWritable k1, Text v1, org.apache.hadoop.mapreduce.Mapper.Context context)
     throws IOException, InterruptedException {
   final String[] split = v1.toString().split(" ");
   String ip = split[0];
   String time = split[3].replace("[", "");
   String path = split[6];
   String v2 = "ip=" + ip + "|" + "time=" + time + "|" + "path=" + path;
   context.write(k1, new Text(v2));
 }
  @Test
  public final void testMapperValidValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Counters counters = new Counters();
    Counter counter = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW);
    when(context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW)).thenReturn(counter);

    MergeKeyMapper mapper = new MergeKeyMapper();
    Text key = new Text("abc123");
    Text val = new Text("valueOfKey");
    mapper.isOld = false;
    mapper.map(key, val, context);

    HihoValue hihoValue = new HihoValue();
    hihoValue.setVal(val);
    hihoValue.setIsOld(false);
    HihoTuple hihoTuple = new HihoTuple();
    hihoTuple.setKey(key);
    verify(context).write(hihoTuple, hihoValue);
    assertEquals(1, context.getCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue());
  }
  @Override
  protected void map(LongWritable key, Text lines, Mapper.Context context)
      throws IOException, InterruptedException {
    String line = lines.toString();
    String[] tokens = line.split(",");
    // YYYY = tokens[0]
    // MM = tokens[1]
    // count = tokens[2]
    String yearMonth = tokens[0] + "-" + tokens[1];
    int count = Integer.parseInt(tokens[2]);

    entry.setYearMonth(yearMonth);
    entry.setCount(count);
    value.set(tokens[2]);

    context.write(entry, value);
  }
Exemple #15
0
 public MemoryMapContext(final Mapper.Context context) throws IOException, InterruptedException {
   super(
       context.getConfiguration(),
       context.getTaskAttemptID() == null ? new TaskAttemptID() : context.getTaskAttemptID(),
       null,
       null,
       context.getOutputCommitter(),
       null,
       context.getInputSplit());
   this.context = context;
   this.globalConfiguration = context.getConfiguration();
 }
Exemple #16
0
    @Override
    protected void map(LongWritable key, Text value, Mapper.Context context)
        throws IOException, InterruptedException {
      String document = value.toString();
      System.out.println("'" + document + "'");
      try {
        XMLStreamReader reader =
            XMLInputFactory.newInstance()
                .createXMLStreamReader(new ByteArrayInputStream(document.getBytes()));
        String propertyName = "";
        String propertyValue = "";
        String currentElement = "";
        while (reader.hasNext()) {
          int code = reader.next();
          switch (code) {
            case XMLStreamConstants.START_ELEMENT: // START_ELEMENT:
              currentElement = reader.getLocalName();
              break;
            case XMLStreamConstants.CHARACTERS: // CHARACTERS:
              if (currentElement.equalsIgnoreCase("uid")) {
                propertyName += reader.getText().trim();
                System.out.println(propertyName);
              } else if (currentElement.equalsIgnoreCase("location")) {
                propertyValue += reader.getText().trim();
                System.out.println(propertyValue);

              } else if (currentElement.equalsIgnoreCase("age")) {
                propertyValue += ("," + reader.getText().trim());
                System.out.println(propertyValue);
              }
              break;
          }
        }
        reader.close();
        context.write(new Text(propertyName.trim()), new Text(propertyValue.trim()));

      } catch (Exception e) {
        throw new IOException(e);
      }
    }
 protected void setup(org.apache.hadoop.mapreduce.Mapper.Context context)
     throws IOException, InterruptedException {
   sourceColumn = ByteBuffer.wrap(context.getConfiguration().get(CONF_COLUMN_NAME).getBytes());
 }
 protected void setup(Mapper.Context context) throws IOException, InterruptedException {
   Configuration config = context.getConfiguration();
   this.caseSensitive = config.getBoolean("wordcount.case.sensitive", false);
 }
 @Override
 protected void setup(Mapper.Context ctx) throws IOException, InterruptedException {
   maxSimilaritiesPerRow = ctx.getConfiguration().getInt(MAX_SIMILARITIES_PER_ROW, 0);
   Preconditions.checkArgument(
       maxSimilaritiesPerRow > 0, "Incorrect maximum number of similarities per row!");
 }
Exemple #20
0
  /**
   * execute the blat command and return a list of sequence ids that match
   *
   * @param seqDatabase is the key/value map of sequences that act as reference keyed by name
   * @param seqQueryFilepath is the path the the blast output results
   * @return a list of sequence ids in the reference that match the cazy database
   */
  public Set<String> exec(
      Map<String, String> seqDatabase, String seqQueryFilepath, Mapper.Context context)
      throws IOException, InterruptedException {

    /*
    first, take the blatInputFile and find the corresponding sequence in the
    seqMap.  find both the exact sequence id, as well as its matching pair
    and write to temporary file.
    */
    // Map<String,String> l = new HashMap<String,String>();

    File seqQueryFile = null;

    log.info("Preparing Blat execution");
    if (context != null) context.setStatus("Preparing Blat execution");

    Map<String, String> l = new HashMap<String, String>();
    int numGroups = 0;
    int numReads = 0;

    /*
    open query file.
     */

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    for (Path filenamePath : MetaUtils.findAllPaths(new Path(seqQueryFilepath))) {
      if (!fs.exists(filenamePath)) {
        throw new IOException("file not found: " + seqQueryFilepath);
      }

      FSDataInputStream in = fs.open(filenamePath);
      BufferedReader bufRead = new BufferedReader(new InputStreamReader(in));

      /*
      Filter FileReader through a Buffered read to read a line at a time
      */

      String line = bufRead.readLine(); // String that holds current file line

      /*
      read the line into key/value with key being the first column, value is all the
      remaining columns
      */
      while (line != null) {
        numGroups++;
        String[] a = line.split("\t", 2);
        l.put(a[0], a[1]);
        numReads += a[1].split("\t").length;
        line = bufRead.readLine();
      }
      bufRead.close();
    }

    if (context != null)
      context.getCounter("blat.input", "NUMBER_OF_INPUT_READS").increment(numReads);
    if (context != null)
      context.getCounter("blat.input", "NUMBER_OF_INPUT_GROUPS").increment(numGroups);
    log.info("read " + numReads + " Reads in " + numGroups + " gene groups");

    /*
    now dump the database from the map to a file
     */
    String seqFilepath = dumpToFile(seqDatabase);

    if (seqFilepath == null) {
      /*
      return with fail
       */
      throw new IOException("unable to write " + seqDatabase + " to filesystem");
    }

    Map<String, Set<String>> s = new HashMap<String, Set<String>>();

    /*
    now loop through all the lines previously read in, write out a seqfile in temp directory
    then execute blat.
     */
    int numBlats = 0;
    int totalBlats = l.size();

    for (String k : l.keySet()) {
      numBlats++;

      /*
      k is a grouping key
       */

      log.info("processing group " + k);

      if (context != null) {
        context.setStatus("Executing Blat " + numBlats + "/" + totalBlats);
      }
      /*
      create a new file in temp direectory
       */
      seqQueryFile = new File(tmpDirFile, "blatquery.fa");
      BufferedWriter out = new BufferedWriter(new FileWriter(seqQueryFile.getPath()));

      /*
      look up all the sequences and write them to the file.  include the paired ends
       */
      int queryCount = 0;
      for (String key : l.get(k).split("\t")) {

        if (paired) {

          /*
          for paired end data, look for both pairs
           */

          String key1 = key + "/1"; // forward
          String key2 = key + "/2"; // backward

          if (seqDatabase.containsKey(key1)) {
            queryCount++;
            out.write(">" + key1 + "\n");
            out.write(seqDatabase.get(key1) + "\n");
          }
          if (seqDatabase.containsKey(key2)) {
            queryCount++;
            out.write(">" + key2 + "\n");
            out.write(seqDatabase.get(key2) + "\n");
          }
        } else {

          /*
          if data is not paired, just look up key
           */

          if (seqDatabase.containsKey(key)) {
            queryCount++;
            out.write(">" + key + "\n");
            out.write(seqDatabase.get(key) + "\n");
          }
        }
      }
      /*
      close the temporary file
      */
      out.close();

      if (queryCount == 0) {
        /*
        means that none of these queries were in this portion of the database.  no point
        executing blat, so just return
         */
        log.info("skipping blat since i didn't find any query sequences in this database");
        continue;
      }

      /*
      now set up a blat execution
       */
      List<String> commands = new ArrayList<String>();
      commands.add("/bin/sh");
      commands.add("-c");
      commands.add(
          commandPath
              + " "
              + commandLine
              + " "
              + seqFilepath
              + " "
              + seqQueryFile.getPath()
              + " "
              + tmpDirFile.getPath()
              + "/blat.output");

      log.info("command = " + commands);

      SystemCommandExecutor commandExecutor = new SystemCommandExecutor(commands);
      exitValue = commandExecutor.executeCommand();

      // stdout and stderr of the command are returned as StringBuilder objects
      stdout = commandExecutor.getStandardOutputFromCommand().toString();
      stderr = commandExecutor.getStandardErrorFromCommand().toString();

      log.debug("exit = " + exitValue);
      log.debug("stdout = " + stdout);
      log.debug("stderr = " + stderr);

      /*
      now parse the output and clean up
      */

      log.debug("reading outputfile: " + tmpDirFile.getPath() + "/blat.output");

      FileReader input = new FileReader(tmpDirFile.getPath() + "/blat.output");

      /*
      Filter FileReader through a Buffered read to read a line at a time
      */
      BufferedReader bufRead2 = new BufferedReader(input);

      String line2; // String that holds current file line
      int count = 0; // Line number of count

      // Read first line
      line2 = bufRead2.readLine();

      // Read through file one line at time. Print line # and line
      while (line2 != null) {
        String[] a = line2.split("\t", 3);
        if (s.containsKey(k)) {
          s.get(k).add(a[1]);
        } else {
          s.put(k, new HashSet<String>());
          s.get(k).add(a[1]);
        }
        line2 = bufRead2.readLine();
        count++;
      }

      bufRead2.close();

      log.debug("done reading file");

      /*
      should clean up - note: files get overwritten, so don't worry about it. :-)
       */

    }

    if (context != null) context.setStatus("Postprocessing Blat output");
    /*
    post processing.  since i need to return in the format of
    <groupid> <readid1> <readid2> <readid3> ...
    as a single string (one string per line).
     */

    log.info("Postprocessing Blat");
    log.info("  numGroups = " + s.keySet().size());

    Set<String> ss = new HashSet<String>();

    for (String k : s.keySet()) {
      StringBuilder stringBuilder = new StringBuilder();
      for (Iterator iter = s.get(k).iterator(); iter.hasNext(); ) {
        stringBuilder.append(", " + iter.next());
      }
      ss.add(k + stringBuilder);
    }

    return ss;
  }