예제 #1
0
파일: Fill.java 프로젝트: uzielgl/hadoop
    @Override
    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = "";
      String header = "";
      TreeMap<String, String> ciudades = new TreeMap<String, String>();

      // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad
      while (values.hasNext()) {
        String[] tmp = values.next().toString().split(",");
        String ciudad = tmp[0];
        String mes = tmp[1];
        String temperatura = tmp[2];
        String fecha = tmp[3];

        ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]);
      }

      // Recorremos las ciudades y vamos imprimiendo
      for (String ciudad : ciudades.keySet()) {
        header += ciudad + ",,";
        String[] temporal = ciudades.get(ciudad).split(",");
        line += temporal[2] + "," + temporal[1] + ",";
      }

      if (c == 0) { // Imprimimos cabezera
        output.collect(new Text("Año,"), new Text(header));
        c++;
      }

      output.collect(new Text(key.toString() + ","), new Text(line));
    }
예제 #2
0
    void search(
        Vector<Star> v1,
        Vector<Star> v2,
        BlockIDWritable key,
        OutputCollector<BlockIDWritable, PairWritable> output)
        throws IOException {
      for (int i = 0; i < v1.size(); i++) {
        for (int j = 0; j < v2.size(); j++) {
          Star star1 = v1.get(i);
          Star star2 = v2.get(j);
          // what is this margin about
          if (star1.margin && star2.margin) continue;

          double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z;
          if (dist > costheta) {
            p.set(star1, star2, dist);
            output.collect(key, p);
            p.set(star2, star1, dist);
            output.collect(key, p);
            //		num += 2;

          }
        }
      } // end for i,j
    }
    @SuppressWarnings("unchecked")
    public void reduce(
        Text key,
        Iterator<Text> values,
        OutputCollector<NullWritable, Text> output,
        Reporter reporter)
        throws IOException {

      OutputCollector collector =
          multipleOutputs.getCollector("station", key.toString().replace("-", ""), reporter);
      while (values.hasNext()) {
        collector.collect(NullWritable.get(), values.next());
      }
    }
  public void map(
      LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter)
      throws IOException {
    String line = value.toString();

    if (validate(line, reporter)) output.collect(key, value);
  }
  public void map(
      LongWritable key,
      Text value,
      OutputCollector<IntWritable, DoubleWritable> output,
      Reporter reporter)
      throws IOException {
    /*
     * It implements the mapper. It outputs the numbers of weight and updated weights.
     *
     * Note that the format of intermediate output is <IntWritable, DoubleWritable>,
     * because the key is the number of weight (an integer), and the value is the weight's value (double)
     */
    inputData = value.toString();

    // go through the process
    initialize();
    getposphase();
    getnegphase();
    update();

    // output the intermediate data
    // The <key, value> pairs are <weightID, weightUpdate>
    double[][] vishidinc_array = vishidinc.getArray();
    for (int i = 0; i < numdims; i++) {
      for (int j = 0; j < numhid; j++) {
        weightPos.set(i * numhid + j);
        weightValue.set(vishidinc_array[i][j]);
        output.collect(weightPos, weightValue);
      }
    }
  }
예제 #6
0
    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      parser.parse(value);
      output.collect(new Text(parser.getStationId()), value);
    }
예제 #8
0
 /**
  * Takes in (id, node) pairs and emits them right back out
  *
  * @param key The Node ID
  * @param Node The Node object
  * @param output An Output Collector that collects (id, node) pairs
  * @param reporter Default reporter object
  */
 public void map(
     IntWritable key,
     WikiPage value,
     OutputCollector<IntWritable, WikiPage> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, value);
 }
예제 #9
0
 public void reduce(
     DoubleWritable key,
     Iterator<DoubleWritable> values,
     OutputCollector<DoubleWritable, Text> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, new Text(values.next().toString() + " - "));
 }
    public void reduce(
        IntPair key,
        Iterator<NullWritable> values,
        OutputCollector<IntPair, NullWritable> output,
        Reporter reporter)
        throws IOException {

      /*[*/ output.collect(key, NullWritable.get()); /*]*/
    }
예제 #11
0
 public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter)
     throws IOException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     output.collect(word, one);
   }
 }
예제 #12
0
 @Override
 public void reduce(
     IntWritable key,
     Iterator<DoubleWritable> values,
     OutputCollector<IntWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, values.next());
 }
예제 #13
0
    public void reduce(
        Text key,
        Iterator<CrawlDatum> values,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      boolean oldSet = false;
      boolean injectedSet = false;
      while (values.hasNext()) {
        CrawlDatum val = values.next();
        if (val.getStatus() == CrawlDatum.STATUS_INJECTED) {
          injected.set(val);
          injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
          injectedSet = true;
        } else {
          old.set(val);
          oldSet = true;
        }
      }
      CrawlDatum res = null;

      /**
       * Whether to overwrite, ignore or update existing records
       *
       * @see https://issues.apache.org/jira/browse/NUTCH-1405
       */

      // Injected record already exists and overwrite but not update
      if (injectedSet && oldSet && overwrite) {
        res = injected;

        if (update) {
          LOG.info(key.toString() + " overwritten with injected record but update was specified.");
        }
      }

      // Injected record already exists and update but not overwrite
      if (injectedSet && oldSet && update && !overwrite) {
        res = old;
        old.putAllMetaData(injected);
        old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore());
        old.setFetchInterval(
            injected.getFetchInterval() != interval
                ? injected.getFetchInterval()
                : old.getFetchInterval());
      }

      // Old default behaviour
      if (injectedSet && !oldSet) {
        res = injected;
      } else {
        res = old;
      }

      output.collect(key, res);
    }
예제 #14
0
 public void reduce(
     Text key,
     Iterator<IntWritable> values,
     OutputCollector<Text, IntWritable> output,
     Reporter reporter)
     throws IOException {
   int sum = 0;
   while (values.hasNext()) {
     sum += values.next().get();
   }
   output.collect(key, new IntWritable(sum));
 }
예제 #15
0
 public void map(
     LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String arr[] = value.toString().split("\\r?\\n");
   for (String row : arr) {
     if (row.startsWith("\"")) {
       continue;
     }
     String parts[] = row.split(",");
     output.collect(new IntWritable(new Integer(parts[1])), new Text(parts[4]));
   }
 }
예제 #16
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<IntWritable, IntWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   IntWritable clave = new IntWritable();
   IntWritable valor = new IntWritable();
   clave.set(Integer.parseInt(line));
   valor.set(Integer.parseInt(line) + 1);
   output.collect(clave, valor);
 }
예제 #17
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<DoubleWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   DoubleWritable clave = new DoubleWritable();
   DoubleWritable valor = new DoubleWritable();
   clave.set(Double.parseDouble(line));
   valor.set(Math.sqrt(Double.parseDouble(line)));
   output.collect(clave, valor);
 }
예제 #18
0
 // reduce method accepts the Key Value pairs from mappers, do the aggregation based on keys and
 // produce the final out put
 public void reduce(
     Text key,
     Iterator<IntWritable> values,
     OutputCollector<Text, IntWritable> output,
     Reporter reporter)
     throws IOException {
   int sum = 0;
   /*iterates through all the values available with a key and add them together and give the
   final result as the key and sum of its values*/
   while (values.hasNext()) {
     sum += values.next().get();
   }
   output.collect(key, new IntWritable(sum));
 }
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntPair, NullWritable> output,
        Reporter reporter)
        throws IOException {

      parser.parse(value);
      if (parser.isValidTemperature()) {
        /*[*/ output.collect(
            new IntPair(parser.getYearInt(), +parser.getAirTemperature()),
            NullWritable.get()); /*]*/
      }
    }
예제 #20
0
    @Override
    public void reduce(
        Text key, /*[*/
        Iterator /*]*/<IntWritable> values,
        /*[*/ OutputCollector<Text, IntWritable> output,
        Reporter reporter /*]*/)
        throws IOException {

      int maxValue = Integer.MIN_VALUE;
      while (
      /*[*/ values.hasNext() /*]*/) {
        maxValue = Math.max(maxValue, /*[*/ values.next().get() /*]*/);
      }
      /*[*/ output.collect /*]*/(key, new IntWritable(maxValue));
    }
예제 #21
0
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      String lastToken = null;
      StringTokenizer s = new StringTokenizer(line, "\t");
      String year = s.nextToken();

      while (s.hasMoreTokens()) {
        lastToken = s.nextToken();
      }

      int averagePrice = Integer.parseInt(lastToken);
      output.collect(new Text(year), new IntWritable(averagePrice));
    }
예제 #22
0
    public void reduce(
        Text key,
        Iterator<IntWritable> value,
        OutputCollector<Text, IntWritable> output,
        Reporter reporter)
        throws IOException {
      // TODO Auto-generated method stub
      int maxAvg = 30;
      int val = Integer.MIN_VALUE;
      while (value.hasNext()) {

        if ((val = value.next().get()) > maxAvg) {
          output.collect(key, new IntWritable(val));
        }
      }
    }
예제 #23
0
파일: Fill.java 프로젝트: uzielgl/hadoop
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      StringTokenizer tokens = new StringTokenizer(line);

      String[] keys = tokens.nextToken().toString().split("-");
      String date_temp = tokens.nextToken();

      String country = keys[0];
      String year = keys[1];

      // Mandamos año y un iterable de [ciudad,mes,temperatura,fecha]
      output.collect(new Text(year), new Text(country + "," + date_temp));
    }
예제 #24
0
    /** Implements the map-method of the Mapper-interface */
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      while (tokenizer.hasMoreTokens()) {

        String token = tokenizer.nextToken();

        if (token.length() > 3) {
          word.set(token);
          output.collect(word, one);
        }
      }
    }
예제 #25
0
  public void map(
      LongWritable key, Point value, OutputCollector<LongWritable, Point> output, Reporter reporter)
      throws IOException {
    double min = value.sumOfSquares(centers.get(0));
    int best = 0;

    for (int index = 1; index < numberOfCenters; ++index) {
      double current = value.sumOfSquares(centers.get(index));

      if (current < min) {
        min = current;
        best = index;
      }
    }

    reporter.incrCounter("NUMBER", "NODES", 1);
    reporter.incrCounter("CENTER", "" + best, 1);

    output.collect(new LongWritable(best), value);
  }
예제 #26
0
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, DoubleWritable> output,
        Reporter reporter)
        throws IOException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      int rowIdx = 0;
      double xValue = 0;

      if (tokenizer.hasMoreTokens()) {
        rowIdx = Integer.parseInt(tokenizer.nextToken());
        xValue = Double.parseDouble(tokenizer.nextToken());
      }

      double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx];
      output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult));
    }
예제 #27
0
    @Override
    public void map(
        LongWritable key,
        Text value,
        /*[*/ OutputCollector<Text, IntWritable> output,
        Reporter reporter /*]*/)
        throws IOException {

      String line = value.toString();
      String year = line.substring(15, 19);
      int airTemperature;
      if (line.charAt(87) == '+') { // parseInt doesn't like leading plus signs
        airTemperature = Integer.parseInt(line.substring(88, 92));
      } else {
        airTemperature = Integer.parseInt(line.substring(87, 92));
      }
      String quality = line.substring(92, 93);
      if (airTemperature != MISSING && quality.matches("[01459]")) {
        /*[*/ output.collect /*]*/(new Text(year), new IntWritable(airTemperature));
      }
    }
예제 #28
0
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      HashMap<String, Integer> countries_map = new HashMap<String, Integer>();
      ArrayList<Integer> counties = new ArrayList<>();
      String cp = new String();

      while (values.hasNext()) {
        cp = values.next().toString();
        if (countries_map.containsKey(cp)) {
          countries_map.put(cp, countries_map.get(cp) + 1);
        } else {
          countries_map.put(cp, 1);
        }
      }

      for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) {
        counties.add(entry.getValue());
      }
      output.collect(
          key,
          new Text(
              ""
                  + countries_map.entrySet().size()
                  + " "
                  + Collections.min(counties)
                  + " "
                  + median(counties)
                  + " "
                  + Collections.max(counties)
                  + " "
                  + mean(counties)
                  + " "
                  + standard_deviation(counties)));
    }
예제 #29
0
    public void reduce(
        BlockIDWritable key,
        Iterator<PairWritable> values,
        OutputCollector<BlockIDWritable, PairWritable> output,
        Reporter reporter)
        throws IOException {
      // Vector<Star> starV = new Vector<Star>();
      int buketsizeX = 0;
      int buketsizeY = 0;
      double bwidth = maxAlphas[key.zoneNum]; // ra ,x
      double bheight = theta; // dec ,y
      /* add 10 more in each dimension to make sure there is no overflow. */
      Vector<Star>[][] arrstarV =
          new Vector[((int) (zoneHeight / bheight)) + 10]
              [((int) (blockWidth / bwidth)) + 10]; // create bucket vector[Y][X]

      int num = 0;
      while (values.hasNext()) {
        num++;
        Star s = values.next().get(0);

        // participant
        double posx = (s.ra - blockRanges[key.raNum][0]) / bwidth;
        int x = (int) posx + 1; // shit by 1 in case star comes from other block
        double posy = (s.dec - zoneRanges[key.zoneNum][0]) / bheight;
        int y = (int) posy + 1;

        // set bucket size as max
        if (buketsizeX < x) buketsizeX = x;
        if (buketsizeY < y) buketsizeY = y;
        // create according bucket
        if (arrstarV[y][x] == null)
          // TODO avaoid creating vectors here.
          arrstarV[y][x] = new Vector<Star>();
        // put star into bucket
        arrstarV[y][x].add(s);
      }
      // start reducer
      int i, j, row, col;
      // for each bucket
      for (row = 0; row <= buketsizeY; row++) {
        for (col = 0; col <= buketsizeX; col++) {
          //		starV.clear();
          // construct a new vector to do compare
          // TODO we need to avoid searching objects in the border.
          if (arrstarV[row][col] != null) {
            // old method to generate output
            for (i = 0; i < arrstarV[row][col].size(); i++) {
              for (j = i + 1; j < arrstarV[row][col].size(); j++) {
                Star star1 = arrstarV[row][col].get(i);
                Star star2 = arrstarV[row][col].get(j);
                // what is this margin about
                if (star1.margin && star2.margin) continue;

                double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z;
                if (dist > costheta) {
                  p.set(star1, star2, dist);
                  output.collect(key, p);
                  p.set(star2, star1, dist);
                  output.collect(key, p);
                  //		num += 2;

                }
              }
            } // end for i,j

          } // end if
          else {
            continue;
          }
          // 4 more neighbors
          // right upper arrstarV[row-1][col+1] vs arrstarV[row][col]
          if (row != 0 && arrstarV[row - 1][col + 1] != null) {
            search(arrstarV[row][col], arrstarV[row - 1][col + 1], key, output);
          }
          // right arrstarV[row][col+1] vs arrstarV[row][col]
          if (arrstarV[row][col + 1] != null) {
            search(arrstarV[row][col], arrstarV[row][col + 1], key, output);
          }
          // right lower
          if (arrstarV[row + 1][col + 1] != null) {
            search(arrstarV[row][col], arrstarV[row + 1][col + 1], key, output);
          }
          // lower
          if (arrstarV[row + 1][col] != null) {
            search(arrstarV[row][col], arrstarV[row + 1][col], key, output);
          } // end if
        } // end colum
      } // end row
    }
예제 #30
0
    public void map(
        LongWritable key,
        Star value,
        OutputCollector<BlockIDWritable, PairWritable> output,
        Reporter reporter)
        throws IOException {

      loc.set(value.ra, value.dec);
      int zoneNum = loc.zoneNum;
      int raNum = loc.raNum;
      p.set(value, null);

      /*
       * When the block size increases (> theta), only part of a block
       * needs to be copied to its neighbor.
       */
      output.collect(loc, p);

      /*
       * only replicate objects in the border of a block. I expect most of
       * objects don't need to be copied.
       */
      if (value.dec > zoneRanges[zoneNum][0] + theta
          && value.dec < zoneRanges[zoneNum][1] - theta
          && value.ra > blockRanges[raNum][0] + maxAlphas[zoneNum]
          && value.ra < blockRanges[raNum][1] - maxAlphas[zoneNum]) return;

      /*
       * the code below is to copy the star to some neighbors. We only
       * need to copy an object to the bottom, left, left bottom, left top
       * neighbors
       */
      value.margin = true;

      /*
       * we should treat the entire zone 0 as a block, so we only needs to
       * copy some objects at the corner to their neighbors
       */
      if (loc.zoneNum == 0) {
        /* copy the object to the right top neighbor */
        if (value.ra >= blockRanges[raNum][1] - maxAlphas[zoneNum]
            && value.ra <= blockRanges[raNum][1]
            && value.dec >= zoneRanges[zoneNum][1] - theta
            && value.dec <= zoneRanges[zoneNum][1]) {
          //					BlockIDWritable loc1 = new BlockIDWritable();
          /* raNum of objects in zone 0 is always 0,
           * we need to recalculate it. */
          //					loc1.raNum = BlockIDWritable.ra2Num(value.ra) + 1;
          //					if (loc1.raNum == numBlocks) {
          //						loc1.raNum = 0;
          //						value.ra -= 360;
          //					}
          //					loc1.zoneNum = loc.zoneNum + 1;
          ///					output.collect(loc1, p);
        }
        return;
      } else if (loc.zoneNum == numZones - 1) {
        /* copy the object to the bottom neighbor */
        if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) {
          /* raNum of objects in zone zoneNum - 1 is always 0,
           * we need to recalculate it. */
          loc1.raNum = BlockIDWritable.ra2Num(value.ra);
          loc1.zoneNum = loc.zoneNum - 1;
          output.collect(loc1, p);

          /* copy the object to the right bottom neighbor */
          while (value.ra >= blockRanges[loc1.raNum][1] - maxAlphas[zoneNum]
              && value.ra <= blockRanges[loc1.raNum][1]) {
            loc1.raNum++;
            if (loc1.raNum == numBlocks) {
              loc1.raNum = 0;
              value.ra -= 360;
            }
            loc1.zoneNum = loc.zoneNum - 1;
            output.collect(loc1, p);
          }
        }
        return;
      }

      boolean wrap = false;
      loc1.raNum = loc.raNum;
      /* copy the object to the right neighbor */
      while (value.ra >= blockRanges[loc1.raNum][1] - maxAlphas[zoneNum]
          && value.ra <= blockRanges[loc1.raNum][1]) {
        loc1.raNum++;
        loc1.zoneNum = loc.zoneNum;
        /*
         * when the object is copied to the right neighbor, we need to
         * be careful. we need to convert ra and raNum if ra is close to
         * 360.
         */
        if (loc1.raNum == numBlocks) {
          loc1.raNum = 0;
          value.ra -= 360;
          wrap = true;
        }
        output.collect(loc1, p);
        /* copy the object to the right bottom neighbor */
        if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) {
          loc1.zoneNum = loc.zoneNum - 1;
          output.collect(loc1, p);
        }
        /* copy the object to the right top neighbor */
        if (value.dec >= zoneRanges[zoneNum][1] - theta && value.dec <= zoneRanges[zoneNum][1]) {
          loc1.zoneNum = loc.zoneNum + 1;
          output.collect(loc1, p);
        }
      }
      if (wrap) {
        value.ra += 360;
      }

      /* copy the object to the bottom neighbor */
      if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) {
        loc1.raNum = loc.raNum;
        loc1.zoneNum = loc.zoneNum - 1;
        if (loc1.zoneNum == 0) loc1.raNum = 0;
        output.collect(loc1, p);
      }
    }