public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String cur_file =
       ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName();
   String train_file = context.getConfiguration().get("train_file");
   if (cur_file.equals(train_file)) {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     myKey.set(word);
     myVal.set(f_id);
     context.write(myKey, myVal);
   } else {
     StringTokenizer st = new StringTokenizer(value.toString());
     String word = st.nextToken();
     String f_id = st.nextToken();
     StringBuilder builder = new StringBuilder(dlt);
     while (st.hasMoreTokens()) {
       String filename = st.nextToken();
       String tf_idf = st.nextToken();
       builder.append(filename);
       builder.append(dlt);
       builder.append(tf_idf);
       builder.append("\t");
     }
     myKey.set(word);
     myVal.set(builder.toString());
     context.write(myKey, myVal);
   }
 }
예제 #2
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     if (word.toString().length() == 7) {
       context.write(word, one);
     }
   }
 }
예제 #3
0
    public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException, InterruptedException {
      int sum = 0;
      boolean filtered = true;
      String keypair = key.toString();
      String[] keys = keypair.split(" ");
      if (keys.length == 1) {
        filtered = false;
      } else {
        if (keys[0].equals("*") || keys[1].equals("*")) {
          filtered = false;
        }
      }

      if (!filtered) {
        for (IntWritable val : values) {
          sum += val.get();
        }
        context.write(key, new IntWritable(sum));
        return;
      }

      for (IntWritable val : values) {
        if (val.get() == -1) {
          filtered = false;
          continue;
        }
        sum += val.get();
      }
      // filter non-needed events
      if (filtered) return;
      context.write(key, new IntWritable(sum));
    }
  public void map(
      LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter)
      throws IOException {
    String line = value.toString();

    if (validate(line, reporter)) output.collect(key, value);
  }
  @Override
  protected void reduce(Text key, Iterable<Text> vals, Context ctx) {
    String name = null;
    String year = null;

    for (Text xx : vals) {
      String[] parts = xx.toString().split("=");

      if (parts[0].equals("name")) {
        name = parts[1];
      }

      if (parts[0].equals("year")) {
        year = parts[1];
      }
    }

    try {
      if (name != null && year != null) {
        ctx.write(new Text(year), new Text(name));
      }
    } catch (Exception ee) {
      ee.printStackTrace(System.err);
      throw new Error("I give up");
    }
  }
예제 #6
0
    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
예제 #7
0
파일: Fill.java 프로젝트: uzielgl/hadoop
    @Override
    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = "";
      String header = "";
      TreeMap<String, String> ciudades = new TreeMap<String, String>();

      // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad
      while (values.hasNext()) {
        String[] tmp = values.next().toString().split(",");
        String ciudad = tmp[0];
        String mes = tmp[1];
        String temperatura = tmp[2];
        String fecha = tmp[3];

        ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]);
      }

      // Recorremos las ciudades y vamos imprimiendo
      for (String ciudad : ciudades.keySet()) {
        header += ciudad + ",,";
        String[] temporal = ciudades.get(ciudad).split(",");
        line += temporal[2] + "," + temporal[1] + ",";
      }

      if (c == 0) { // Imprimimos cabezera
        output.collect(new Text("Año,"), new Text(header));
        c++;
      }

      output.collect(new Text(key.toString() + ","), new Text(line));
    }
  public void map(
      LongWritable key,
      Text value,
      OutputCollector<IntWritable, DoubleWritable> output,
      Reporter reporter)
      throws IOException {
    /*
     * It implements the mapper. It outputs the numbers of weight and updated weights.
     *
     * Note that the format of intermediate output is <IntWritable, DoubleWritable>,
     * because the key is the number of weight (an integer), and the value is the weight's value (double)
     */
    inputData = value.toString();

    // go through the process
    initialize();
    getposphase();
    getnegphase();
    update();

    // output the intermediate data
    // The <key, value> pairs are <weightID, weightUpdate>
    double[][] vishidinc_array = vishidinc.getArray();
    for (int i = 0; i < numdims; i++) {
      for (int j = 0; j < numhid; j++) {
        weightPos.set(i * numhid + j);
        weightValue.set(vishidinc_array[i][j]);
        output.collect(weightPos, weightValue);
      }
    }
  }
예제 #9
0
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line, " ");
      int dim = tokenizer.countTokens();
      double[] coords = new double[dim];
      for (int i = 0; i < dim; i++) {
        coords[i] = Double.valueOf(tokenizer.nextToken());
      }
      Point point = new Point(coords);

      Centroid nearest = null;
      double nearestDistance = Double.MAX_VALUE;
      for (Centroid c : centers) {
        double dist = point.euclideanDistance(c);
        if (nearest == null) {
          nearest = c;
          nearestDistance = dist;
        } else {
          if (dist < nearestDistance) {
            nearest = c;
            nearestDistance = dist;
          }
        }
      }
      context.write(nearest, point);
    }
예제 #10
0
 // Identity
 public void reduce(Text key, Iterable<Text> values, Context context)
     throws IOException, InterruptedException {
   System.out.println("I'm in Job1 reduce");
   for (Text val : values) {
     System.out.println("job1:redInp:-" + val.toString());
     context.write(new Text(""), val);
   }
 }
예제 #11
0
파일: Sort.java 프로젝트: robbyzhang/hadoop
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String line = value.toString();
      String[] arr = line.split(",");

      word.set(arr[0]);
      context.write(word, one);
    }
예제 #12
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      System.out.println("HELLO" + key + value);

      String line = value.toString();
      String[] components = line.split("\\s+");
      context.write(new Text(components[0]), new Text(components[1]));
    }
예제 #13
0
 @Test
 public void get() throws Exception {
   // vv MapFileSeekTest
   Text value = new Text();
   reader.get(new IntWritable(496), value);
   assertThat(value.toString(), is("One, two, buckle my shoe"));
   // ^^ MapFileSeekTest
 }
예제 #14
0
 public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter)
     throws IOException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     output.collect(word, one);
   }
 }
예제 #15
0
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   Text word = new Text();
   StringTokenizer s = new StringTokenizer(value.toString());
   while (s.hasMoreTokens()) {
     word.set(s.nextToken());
     context.write(word, one);
   }
 }
 public void map(LongWritable key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   String[] currentUserTuples = line.split("::");
   int currentAgeValue = Integer.parseInt(currentUserTuples[2].trim());
   if (currentUserTuples[1].trim().equalsIgnoreCase("M") && currentAgeValue <= targetAge) {
     context.write(new Text("UserId :: " + currentUserTuples[0].trim()), one);
   }
 }
예제 #17
0
    public void reduce(
        Text key,
        Iterator<CrawlDatum> values,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      boolean oldSet = false;
      boolean injectedSet = false;
      while (values.hasNext()) {
        CrawlDatum val = values.next();
        if (val.getStatus() == CrawlDatum.STATUS_INJECTED) {
          injected.set(val);
          injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
          injectedSet = true;
        } else {
          old.set(val);
          oldSet = true;
        }
      }
      CrawlDatum res = null;

      /**
       * Whether to overwrite, ignore or update existing records
       *
       * @see https://issues.apache.org/jira/browse/NUTCH-1405
       */

      // Injected record already exists and overwrite but not update
      if (injectedSet && oldSet && overwrite) {
        res = injected;

        if (update) {
          LOG.info(key.toString() + " overwritten with injected record but update was specified.");
        }
      }

      // Injected record already exists and update but not overwrite
      if (injectedSet && oldSet && update && !overwrite) {
        res = old;
        old.putAllMetaData(injected);
        old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore());
        old.setFetchInterval(
            injected.getFetchInterval() != interval
                ? injected.getFetchInterval()
                : old.getFetchInterval());
      }

      // Old default behaviour
      if (injectedSet && !oldSet) {
        res = injected;
      } else {
        res = old;
      }

      output.collect(key, res);
    }
 @Override
 protected void reduce(
     Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
     throws IOException, InterruptedException {
   BigDecimal sum = new BigDecimal(0);
   for (Text val : values) {
     sum = sum.add(new BigDecimal(val.toString()));
   }
   context.write(key, new Text(sum.toString()));
 }
 public void map(
     IntWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String dataRow = value.toString();
   StringTokenizer tk = new StringTokenizer(dataRow);
   String label = tk.nextToken();
   String image = tk.nextToken();
   dataString.set(label + "\t" + image);
   output.collect(sameKey, dataString);
 }
예제 #20
0
    public void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      // read in a document (point in 58-dimensional space)

      List<Double> p = GetPoint(value.toString());

      int idxClosestCentoid = IndexOfClosestCentroid(p);

      context.write(new IntWritable(idxClosestCentoid), value);
    }
예제 #21
0
 @Override
 public void map(Object key, Text value, Context context)
     throws IOException, InterruptedException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   int movieId = Integer.parseInt(tokenizer.nextToken());
   while (tokenizer.hasMoreTokens()) {
     String word = tokenizer.nextToken();
     context.write(new Text("1"), new IntWritable(1));
   }
 }
  /*
   * Finds a full file and sets it as the value.
   */
  public synchronized boolean next(LongWritable key, Text value) throws IOException {
    Text line = new Text();
    boolean retrieved = true;

    String result = "";

    value.clear();

    while (retrieved) {
      retrieved = recordReader.next(key, line);

      if (line.toString().length() > 0) {
        String lineValue = line.toString();
        result += lineValue + "\n";
      }
    }

    value.set(result);
    return true;
  }
    @Override
    public void map(Text key, Text value, Context context)
        throws IOException, InterruptedException {
      Float avg = Float.parseFloat(value.toString());
      String word = key.toString();
      String[] parts = word.split("-");
      String newKey = parts[0] + "-" + parts[1];
      String carrier = parts[2];

      if (!carrierAvgDelay.containsKey(newKey)) {
        TreeSet<Pair<Float, String>> avgDelayMap = new TreeSet<Pair<Float, String>>();
        carrierAvgDelay.put(newKey, avgDelayMap);
      }
      TreeSet<Pair<Float, String>> ts = carrierAvgDelay.get(newKey);
      ts.add(new Pair<Float, String>(avg, carrier));
      if (ts.size() > 10) {
        ts.remove(ts.last());
      }
      carrierAvgDelay.put(newKey, ts);
    }
    public void reduce(Text key, Iterable<Text> values, Context context)
        throws IOException, InterruptedException {
      String input[];
      double result = 0.0;

      /* adds all the values corresponding to a key */
      for (Text value : values) {
        result += Double.parseDouble(value.toString());
      }

      context.write(null, new Text(key + "," + Double.toString(result)));
    }
예제 #25
0
 public void map(
     LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String arr[] = value.toString().split("\\r?\\n");
   for (String row : arr) {
     if (row.startsWith("\"")) {
       continue;
     }
     String parts[] = row.split(",");
     output.collect(new IntWritable(new Integer(parts[1])), new Text(parts[4]));
   }
 }
예제 #26
0
  public void testInputFormat() {

    try {
      JobConf conf = new JobConf();
      String TMP_DIR = System.getProperty("test.build.data", "/tmp");
      Path filename = new Path("file:///" + TMP_DIR + "/tmpSeqFile");
      SequenceFile.Writer sfw =
          SequenceFile.createWriter(
              FileSystem.getLocal(conf),
              conf,
              filename,
              ChukwaArchiveKey.class,
              ChunkImpl.class,
              SequenceFile.CompressionType.NONE,
              Reporter.NULL);

      StringBuilder buf = new StringBuilder();
      int offsets[] = new int[lines.length];
      for (int i = 0; i < lines.length; ++i) {
        buf.append(lines[i]);
        buf.append("\n");
        offsets[i] = buf.length() - 1;
      }
      ChukwaArchiveKey key = new ChukwaArchiveKey(0, "datatype", "sname", 0);
      ChunkImpl val = new ChunkImpl("datatype", "sname", 0, buf.toString().getBytes(), null);
      val.setRecordOffsets(offsets);
      sfw.append(key, val);
      sfw.append(key, val); // write it twice
      sfw.close();

      long len = FileSystem.getLocal(conf).getFileStatus(filename).getLen();
      InputSplit split = new FileSplit(filename, 0, len, (String[]) null);
      ChukwaInputFormat in = new ChukwaInputFormat();
      RecordReader<LongWritable, Text> r = in.getRecordReader(split, conf, Reporter.NULL);

      LongWritable l = r.createKey();
      Text line = r.createValue();
      for (int i = 0; i < lines.length * 2; ++i) {
        boolean succeeded = r.next(l, line);
        assertTrue(succeeded);
        assertEquals(i, l.get());
        assertEquals(lines[i % lines.length], line.toString());
        System.out.println("read line: " + l.get() + " " + line);
      }
      boolean succeeded = r.next(l, line);
      assertFalse(succeeded);

    } catch (IOException e) {
      e.printStackTrace();
      fail("IO exception " + e);
    }
  }
예제 #27
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<IntWritable, IntWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   IntWritable clave = new IntWritable();
   IntWritable valor = new IntWritable();
   clave.set(Integer.parseInt(line));
   valor.set(Integer.parseInt(line) + 1);
   output.collect(clave, valor);
 }
예제 #28
0
  public static String format(Object o, String defaultHTML) {
    String s = Text.toString(o, null);

    if (Text.isEmpty(s)) {
      s = defaultHTML;
    } else {
      s = s.trim();
      s = HTML.escape(s);
      s = escapeNotAscii(s);
    }

    return s;
  }
예제 #29
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<DoubleWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   DoubleWritable clave = new DoubleWritable();
   DoubleWritable valor = new DoubleWritable();
   clave.set(Double.parseDouble(line));
   valor.set(Math.sqrt(Double.parseDouble(line)));
   output.collect(clave, valor);
 }
    public void map(
        Text key,
        Text val,
        org.apache.hadoop.mapreduce.Mapper<Text, Text, Text, Text>.Context context)
        throws IOException, InterruptedException {
      int i = 0, n = 0, j = 0, lj = 0, hj = 0;
      String tem = "";

      initStopWordsMap(); // initialize  the stop list
      String line = val.toString();
      StringTokenizer itr =
          new StringTokenizer(line.toLowerCase(), tokenDelimiter); // set delimiter
      n = itr.countTokens();
      cache = new String[n];
      for (i = 0; i < n; i++) {
        cache[i] = new String(""); // initialize the cache
      }
      i = 0;
      while (itr.hasMoreTokens()) {
        cache[i] = itr.nextToken(); // padding the cache with the words of the content
        i++;
      }
      for (i = 0; i < n; i++) {
        keyWord = cache[i];
        keyWord = keyWord.trim();
        if (!hmStopWord.containsKey(keyWord)) {
          lj = i - 10;
          hj = i + 10;
          if (lj < 0) lj = 0;
          if (hj > n) hj = n;
          tem = " ";
          for (j = lj; j < hj; j++) tem += cache[j] + " ";
          location = new Text();
          location.set(key.toString() + tem);
          context.write(new Text(keyWord), location);
        }
      }
    }