Exemplo n.º 1
0
    @Override
    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = "";
      String header = "";
      TreeMap<String, String> ciudades = new TreeMap<String, String>();

      // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad
      while (values.hasNext()) {
        String[] tmp = values.next().toString().split(",");
        String ciudad = tmp[0];
        String mes = tmp[1];
        String temperatura = tmp[2];
        String fecha = tmp[3];

        ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]);
      }

      // Recorremos las ciudades y vamos imprimiendo
      for (String ciudad : ciudades.keySet()) {
        header += ciudad + ",,";
        String[] temporal = ciudades.get(ciudad).split(",");
        line += temporal[2] + "," + temporal[1] + ",";
      }

      if (c == 0) { // Imprimimos cabezera
        output.collect(new Text("Año,"), new Text(header));
        c++;
      }

      output.collect(new Text(key.toString() + ","), new Text(line));
    }
Exemplo n.º 2
0
    void search(
        Vector<Star> v1,
        Vector<Star> v2,
        BlockIDWritable key,
        OutputCollector<BlockIDWritable, PairWritable> output)
        throws IOException {
      for (int i = 0; i < v1.size(); i++) {
        for (int j = 0; j < v2.size(); j++) {
          Star star1 = v1.get(i);
          Star star2 = v2.get(j);
          // what is this margin about
          if (star1.margin && star2.margin) continue;

          double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z;
          if (dist > costheta) {
            p.set(star1, star2, dist);
            output.collect(key, p);
            p.set(star2, star1, dist);
            output.collect(key, p);
            //		num += 2;

          }
        }
      } // end for i,j
    }
  public void map(
      LongWritable key,
      Text value,
      OutputCollector<IntWritable, DoubleWritable> output,
      Reporter reporter)
      throws IOException {
    /*
     * It implements the mapper. It outputs the numbers of weight and updated weights.
     *
     * Note that the format of intermediate output is <IntWritable, DoubleWritable>,
     * because the key is the number of weight (an integer), and the value is the weight's value (double)
     */
    inputData = value.toString();

    // go through the process
    initialize();
    getposphase();
    getnegphase();
    update();

    // output the intermediate data
    // The <key, value> pairs are <weightID, weightUpdate>
    double[][] vishidinc_array = vishidinc.getArray();
    for (int i = 0; i < numdims; i++) {
      for (int j = 0; j < numhid; j++) {
        weightPos.set(i * numhid + j);
        weightValue.set(vishidinc_array[i][j]);
        output.collect(weightPos, weightValue);
      }
    }
  }
Exemplo n.º 4
0
    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }
Exemplo n.º 5
0
 public void reduce(
     DoubleWritable key,
     Iterator<DoubleWritable> values,
     OutputCollector<DoubleWritable, Text> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, new Text(values.next().toString() + " - "));
 }
Exemplo n.º 6
0
 /**
  * Takes in (id, node) pairs and emits them right back out
  *
  * @param key The Node ID
  * @param Node The Node object
  * @param output An Output Collector that collects (id, node) pairs
  * @param reporter Default reporter object
  */
 public void map(
     IntWritable key,
     WikiPage value,
     OutputCollector<IntWritable, WikiPage> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, value);
 }
Exemplo n.º 7
0
 public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter)
     throws IOException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     output.collect(word, one);
   }
 }
Exemplo n.º 8
0
 @Override
 public void reduce(
     IntWritable key,
     Iterator<DoubleWritable> values,
     OutputCollector<IntWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, values.next());
 }
Exemplo n.º 9
0
    public void reduce(
        Text key,
        Iterator<CrawlDatum> values,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      boolean oldSet = false;
      boolean injectedSet = false;
      while (values.hasNext()) {
        CrawlDatum val = values.next();
        if (val.getStatus() == CrawlDatum.STATUS_INJECTED) {
          injected.set(val);
          injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
          injectedSet = true;
        } else {
          old.set(val);
          oldSet = true;
        }
      }
      CrawlDatum res = null;

      /**
       * Whether to overwrite, ignore or update existing records
       *
       * @see https://issues.apache.org/jira/browse/NUTCH-1405
       */

      // Injected record already exists and overwrite but not update
      if (injectedSet && oldSet && overwrite) {
        res = injected;

        if (update) {
          LOG.info(key.toString() + " overwritten with injected record but update was specified.");
        }
      }

      // Injected record already exists and update but not overwrite
      if (injectedSet && oldSet && update && !overwrite) {
        res = old;
        old.putAllMetaData(injected);
        old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore());
        old.setFetchInterval(
            injected.getFetchInterval() != interval
                ? injected.getFetchInterval()
                : old.getFetchInterval());
      }

      // Old default behaviour
      if (injectedSet && !oldSet) {
        res = injected;
      } else {
        res = old;
      }

      output.collect(key, res);
    }
Exemplo n.º 10
0
      public void map(
          BytesWritable key,
          BytesWritable value,
          OutputCollector<BytesWritable, IntWritable> output,
          Reporter reporter)
          throws IOException {
        // newKey = (key, value)
        BytesWritable keyValue = new BytesWritable(pair(key, value));

        // output (newKey, value)
        output.collect(keyValue, this.value);
      }
Exemplo n.º 11
0
 public void reduce(
     Text key,
     Iterator<IntWritable> values,
     OutputCollector<Text, IntWritable> output,
     Reporter reporter)
     throws IOException {
   int sum = 0;
   while (values.hasNext()) {
     sum += values.next().get();
   }
   output.collect(key, new IntWritable(sum));
 }
Exemplo n.º 12
0
 public void map(
     LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String arr[] = value.toString().split("\\r?\\n");
   for (String row : arr) {
     if (row.startsWith("\"")) {
       continue;
     }
     String parts[] = row.split(",");
     output.collect(new IntWritable(new Integer(parts[1])), new Text(parts[4]));
   }
 }
Exemplo n.º 13
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<DoubleWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   DoubleWritable clave = new DoubleWritable();
   DoubleWritable valor = new DoubleWritable();
   clave.set(Double.parseDouble(line));
   valor.set(Math.sqrt(Double.parseDouble(line)));
   output.collect(clave, valor);
 }
Exemplo n.º 14
0
 public void map(
     LongWritable key,
     Text value,
     OutputCollector<IntWritable, IntWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   IntWritable clave = new IntWritable();
   IntWritable valor = new IntWritable();
   clave.set(Integer.parseInt(line));
   valor.set(Integer.parseInt(line) + 1);
   output.collect(clave, valor);
 }
Exemplo n.º 15
0
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      String lastToken = null;
      StringTokenizer s = new StringTokenizer(line, "\t");
      String year = s.nextToken();

      while (s.hasMoreTokens()) {
        lastToken = s.nextToken();
      }

      int averagePrice = Integer.parseInt(lastToken);
      output.collect(new Text(year), new IntWritable(averagePrice));
    }
Exemplo n.º 16
0
    public void reduce(
        Text key,
        Iterator<IntWritable> value,
        OutputCollector<Text, IntWritable> output,
        Reporter reporter)
        throws IOException {
      // TODO Auto-generated method stub
      int maxAvg = 30;
      int val = Integer.MIN_VALUE;
      while (value.hasNext()) {

        if ((val = value.next().get()) > maxAvg) {
          output.collect(key, new IntWritable(val));
        }
      }
    }
Exemplo n.º 17
0
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      StringTokenizer tokens = new StringTokenizer(line);

      String[] keys = tokens.nextToken().toString().split("-");
      String date_temp = tokens.nextToken();

      String country = keys[0];
      String year = keys[1];

      // Mandamos año y un iterable de [ciudad,mes,temperatura,fecha]
      output.collect(new Text(year), new Text(country + "," + date_temp));
    }
Exemplo n.º 18
0
    /** Implements the map-method of the Mapper-interface */
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {

      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      while (tokenizer.hasMoreTokens()) {

        String token = tokenizer.nextToken();

        if (token.length() > 3) {
          word.set(token);
          output.collect(word, one);
        }
      }
    }
Exemplo n.º 19
0
      public void reduce(
          IntWritable key,
          Iterator<RecordStatsWritable> values,
          OutputCollector<IntWritable, RecordStatsWritable> output,
          Reporter reporter)
          throws IOException {
        long bytes = 0;
        long records = 0;
        int xor = 0;
        while (values.hasNext()) {
          RecordStatsWritable stats = values.next();
          bytes += stats.getBytes();
          records += stats.getRecords();
          xor ^= stats.getChecksum();
        }

        output.collect(key, new RecordStatsWritable(bytes, records, xor));
      }
Exemplo n.º 20
0
  public void map(
      LongWritable key, Point value, OutputCollector<LongWritable, Point> output, Reporter reporter)
      throws IOException {
    double min = value.sumOfSquares(centers.get(0));
    int best = 0;

    for (int index = 1; index < numberOfCenters; ++index) {
      double current = value.sumOfSquares(centers.get(index));

      if (current < min) {
        min = current;
        best = index;
      }
    }

    reporter.incrCounter("NUMBER", "NODES", 1);
    reporter.incrCounter("CENTER", "" + best, 1);

    output.collect(new LongWritable(best), value);
  }
Exemplo n.º 21
0
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, DoubleWritable> output,
        Reporter reporter)
        throws IOException {
      String line = value.toString();
      StringTokenizer tokenizer = new StringTokenizer(line);

      int rowIdx = 0;
      double xValue = 0;

      if (tokenizer.hasMoreTokens()) {
        rowIdx = Integer.parseInt(tokenizer.nextToken());
        xValue = Double.parseDouble(tokenizer.nextToken());
      }

      double xResult = (resVec[rowIdx] - (sumVec[rowIdx] * xValue)) / diaVec[rowIdx];
      output.collect(new IntWritable(rowIdx), new DoubleWritable(xResult));
    }
Exemplo n.º 22
0
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      HashMap<String, Integer> countries_map = new HashMap<String, Integer>();
      ArrayList<Integer> counties = new ArrayList<>();
      String cp = new String();

      while (values.hasNext()) {
        cp = values.next().toString();
        if (countries_map.containsKey(cp)) {
          countries_map.put(cp, countries_map.get(cp) + 1);
        } else {
          countries_map.put(cp, 1);
        }
      }

      for (java.util.Map.Entry<String, Integer> entry : countries_map.entrySet()) {
        counties.add(entry.getValue());
      }
      output.collect(
          key,
          new Text(
              ""
                  + countries_map.entrySet().size()
                  + " "
                  + Collections.min(counties)
                  + " "
                  + median(counties)
                  + " "
                  + Collections.max(counties)
                  + " "
                  + mean(counties)
                  + " "
                  + standard_deviation(counties)));
    }
Exemplo n.º 23
0
      @SuppressWarnings("unchecked")
      public void map(
          WritableComparable key,
          Writable value,
          OutputCollector<IntWritable, RecordStatsWritable> output,
          Reporter reporter)
          throws IOException {
        // Set up rawKey and rawValue on the first call to 'map'
        if (recordId == -1) {
          rawKey = createRaw(key.getClass());
          rawValue = createRaw(value.getClass());
        }
        ++recordId;

        if (this.key == sortOutput) {
          // Check if keys are 'sorted' if this
          // record is from sort's output
          if (prevKey == null) {
            prevKey = key;
            keyClass = prevKey.getClass();
          } else {
            // Sanity check
            if (keyClass != key.getClass()) {
              throw new IOException(
                  "Type mismatch in key: expected "
                      + keyClass.getName()
                      + ", recieved "
                      + key.getClass().getName());
            }

            // Check if they were sorted correctly
            if (prevKey.compareTo(key) > 0) {
              throw new IOException(
                  "The 'map-reduce' framework wrongly"
                      + " classifed ("
                      + prevKey
                      + ") > ("
                      + key
                      + ") "
                      + "for record# "
                      + recordId);
            }
            prevKey = key;
          }

          // Check if the sorted output is 'partitioned' right
          int keyPartition = partitioner.getPartition(key, value, noSortReducers);
          if (partition != keyPartition) {
            throw new IOException(
                "Partitions do not match for record# "
                    + recordId
                    + " ! - '"
                    + partition
                    + "' v/s '"
                    + keyPartition
                    + "'");
          }
        }

        // Construct the record-stats and output (this.key, record-stats)
        byte[] keyBytes = rawKey.getRawBytes(key);
        int keyBytesLen = rawKey.getRawBytesLength(key);
        byte[] valueBytes = rawValue.getRawBytes(value);
        int valueBytesLen = rawValue.getRawBytesLength(value);

        int keyValueChecksum =
            (WritableComparator.hashBytes(keyBytes, keyBytesLen)
                ^ WritableComparator.hashBytes(valueBytes, valueBytesLen));

        output.collect(
            this.key, new RecordStatsWritable((keyBytesLen + valueBytesLen), 1, keyValueChecksum));
      }
Exemplo n.º 24
0
    public void map(
        WritableComparable<?> key,
        Text value,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      String url = value.toString(); // value is line of text

      if (url != null && url.trim().startsWith("#")) {
        /* Ignore line that start with # */
        return;
      }

      // if tabs : metadata that could be stored
      // must be name=value and separated by \t
      float customScore = -1f;
      int customInterval = interval;
      int fixedInterval = -1;
      Map<String, String> metadata = new TreeMap<String, String>();
      if (url.indexOf("\t") != -1) {
        String[] splits = url.split("\t");
        url = splits[0];
        for (int s = 1; s < splits.length; s++) {
          // find separation between name and value
          int indexEquals = splits[s].indexOf("=");
          if (indexEquals == -1) {
            // skip anything without a =
            continue;
          }
          String metaname = splits[s].substring(0, indexEquals);
          String metavalue = splits[s].substring(indexEquals + 1);
          if (metaname.equals(nutchScoreMDName)) {
            try {
              customScore = Float.parseFloat(metavalue);
            } catch (NumberFormatException nfe) {
            }
          } else if (metaname.equals(nutchFetchIntervalMDName)) {
            try {
              customInterval = Integer.parseInt(metavalue);
            } catch (NumberFormatException nfe) {
            }
          } else if (metaname.equals(nutchFixedFetchIntervalMDName)) {
            try {
              fixedInterval = Integer.parseInt(metavalue);
            } catch (NumberFormatException nfe) {
            }
          } else metadata.put(metaname, metavalue);
        }
      }
      try {
        url = urlNormalizers.normalize(url, URLNormalizers.SCOPE_INJECT);
        url = filters.filter(url); // filter the url
      } catch (Exception e) {
        if (LOG.isWarnEnabled()) {
          LOG.warn("Skipping " + url + ":" + e);
        }
        url = null;
      }
      if (url == null) {
        reporter.getCounter("injector", "urls_filtered").increment(1);
      } else { // if it passes
        value.set(url); // collect it
        CrawlDatum datum = new CrawlDatum();
        datum.setStatus(CrawlDatum.STATUS_INJECTED);

        // Is interval custom? Then set as meta data
        if (fixedInterval > -1) {
          // Set writable using float. Flaot is used by AdaptiveFetchSchedule
          datum
              .getMetaData()
              .put(Nutch.WRITABLE_FIXED_INTERVAL_KEY, new FloatWritable(fixedInterval));
          datum.setFetchInterval(fixedInterval);
        } else {
          datum.setFetchInterval(customInterval);
        }

        datum.setFetchTime(curTime);
        // now add the metadata
        Iterator<String> keysIter = metadata.keySet().iterator();
        while (keysIter.hasNext()) {
          String keymd = keysIter.next();
          String valuemd = metadata.get(keymd);
          datum.getMetaData().put(new Text(keymd), new Text(valuemd));
        }
        if (customScore != -1) datum.setScore(customScore);
        else datum.setScore(scoreInjected);
        try {
          scfilters.injectedScore(value, datum);
        } catch (ScoringFilterException e) {
          if (LOG.isWarnEnabled()) {
            LOG.warn(
                "Cannot filter injected score for url "
                    + url
                    + ", using default ("
                    + e.getMessage()
                    + ")");
          }
        }
        reporter.getCounter("injector", "urls_injected").increment(1);
        output.collect(value, datum);
      }
    }
Exemplo n.º 25
0
    public void reduce(
        BlockIDWritable key,
        Iterator<PairWritable> values,
        OutputCollector<BlockIDWritable, PairWritable> output,
        Reporter reporter)
        throws IOException {
      // Vector<Star> starV = new Vector<Star>();
      int buketsizeX = 0;
      int buketsizeY = 0;
      double bwidth = maxAlphas[key.zoneNum]; // ra ,x
      double bheight = theta; // dec ,y
      /* add 10 more in each dimension to make sure there is no overflow. */
      Vector<Star>[][] arrstarV =
          new Vector[((int) (zoneHeight / bheight)) + 10]
              [((int) (blockWidth / bwidth)) + 10]; // create bucket vector[Y][X]

      int num = 0;
      while (values.hasNext()) {
        num++;
        Star s = values.next().get(0);

        // participant
        double posx = (s.ra - blockRanges[key.raNum][0]) / bwidth;
        int x = (int) posx + 1; // shit by 1 in case star comes from other block
        double posy = (s.dec - zoneRanges[key.zoneNum][0]) / bheight;
        int y = (int) posy + 1;

        // set bucket size as max
        if (buketsizeX < x) buketsizeX = x;
        if (buketsizeY < y) buketsizeY = y;
        // create according bucket
        if (arrstarV[y][x] == null)
          // TODO avaoid creating vectors here.
          arrstarV[y][x] = new Vector<Star>();
        // put star into bucket
        arrstarV[y][x].add(s);
      }
      // start reducer
      int i, j, row, col;
      // for each bucket
      for (row = 0; row <= buketsizeY; row++) {
        for (col = 0; col <= buketsizeX; col++) {
          //		starV.clear();
          // construct a new vector to do compare
          // TODO we need to avoid searching objects in the border.
          if (arrstarV[row][col] != null) {
            // old method to generate output
            for (i = 0; i < arrstarV[row][col].size(); i++) {
              for (j = i + 1; j < arrstarV[row][col].size(); j++) {
                Star star1 = arrstarV[row][col].get(i);
                Star star2 = arrstarV[row][col].get(j);
                // what is this margin about
                if (star1.margin && star2.margin) continue;

                double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z;
                if (dist > costheta) {
                  p.set(star1, star2, dist);
                  output.collect(key, p);
                  p.set(star2, star1, dist);
                  output.collect(key, p);
                  //		num += 2;

                }
              }
            } // end for i,j

          } // end if
          else {
            continue;
          }
          // 4 more neighbors
          // right upper arrstarV[row-1][col+1] vs arrstarV[row][col]
          if (row != 0 && arrstarV[row - 1][col + 1] != null) {
            search(arrstarV[row][col], arrstarV[row - 1][col + 1], key, output);
          }
          // right arrstarV[row][col+1] vs arrstarV[row][col]
          if (arrstarV[row][col + 1] != null) {
            search(arrstarV[row][col], arrstarV[row][col + 1], key, output);
          }
          // right lower
          if (arrstarV[row + 1][col + 1] != null) {
            search(arrstarV[row][col], arrstarV[row + 1][col + 1], key, output);
          }
          // lower
          if (arrstarV[row + 1][col] != null) {
            search(arrstarV[row][col], arrstarV[row + 1][col], key, output);
          } // end if
        } // end colum
      } // end row
    }
Exemplo n.º 26
0
    public void map(
        LongWritable key,
        Star value,
        OutputCollector<BlockIDWritable, PairWritable> output,
        Reporter reporter)
        throws IOException {

      loc.set(value.ra, value.dec);
      int zoneNum = loc.zoneNum;
      int raNum = loc.raNum;
      p.set(value, null);

      /*
       * When the block size increases (> theta), only part of a block
       * needs to be copied to its neighbor.
       */
      output.collect(loc, p);

      /*
       * only replicate objects in the border of a block. I expect most of
       * objects don't need to be copied.
       */
      if (value.dec > zoneRanges[zoneNum][0] + theta
          && value.dec < zoneRanges[zoneNum][1] - theta
          && value.ra > blockRanges[raNum][0] + maxAlphas[zoneNum]
          && value.ra < blockRanges[raNum][1] - maxAlphas[zoneNum]) return;

      /*
       * the code below is to copy the star to some neighbors. We only
       * need to copy an object to the bottom, left, left bottom, left top
       * neighbors
       */
      value.margin = true;

      /*
       * we should treat the entire zone 0 as a block, so we only needs to
       * copy some objects at the corner to their neighbors
       */
      if (loc.zoneNum == 0) {
        /* copy the object to the right top neighbor */
        if (value.ra >= blockRanges[raNum][1] - maxAlphas[zoneNum]
            && value.ra <= blockRanges[raNum][1]
            && value.dec >= zoneRanges[zoneNum][1] - theta
            && value.dec <= zoneRanges[zoneNum][1]) {
          //					BlockIDWritable loc1 = new BlockIDWritable();
          /* raNum of objects in zone 0 is always 0,
           * we need to recalculate it. */
          //					loc1.raNum = BlockIDWritable.ra2Num(value.ra) + 1;
          //					if (loc1.raNum == numBlocks) {
          //						loc1.raNum = 0;
          //						value.ra -= 360;
          //					}
          //					loc1.zoneNum = loc.zoneNum + 1;
          ///					output.collect(loc1, p);
        }
        return;
      } else if (loc.zoneNum == numZones - 1) {
        /* copy the object to the bottom neighbor */
        if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) {
          /* raNum of objects in zone zoneNum - 1 is always 0,
           * we need to recalculate it. */
          loc1.raNum = BlockIDWritable.ra2Num(value.ra);
          loc1.zoneNum = loc.zoneNum - 1;
          output.collect(loc1, p);

          /* copy the object to the right bottom neighbor */
          while (value.ra >= blockRanges[loc1.raNum][1] - maxAlphas[zoneNum]
              && value.ra <= blockRanges[loc1.raNum][1]) {
            loc1.raNum++;
            if (loc1.raNum == numBlocks) {
              loc1.raNum = 0;
              value.ra -= 360;
            }
            loc1.zoneNum = loc.zoneNum - 1;
            output.collect(loc1, p);
          }
        }
        return;
      }

      boolean wrap = false;
      loc1.raNum = loc.raNum;
      /* copy the object to the right neighbor */
      while (value.ra >= blockRanges[loc1.raNum][1] - maxAlphas[zoneNum]
          && value.ra <= blockRanges[loc1.raNum][1]) {
        loc1.raNum++;
        loc1.zoneNum = loc.zoneNum;
        /*
         * when the object is copied to the right neighbor, we need to
         * be careful. we need to convert ra and raNum if ra is close to
         * 360.
         */
        if (loc1.raNum == numBlocks) {
          loc1.raNum = 0;
          value.ra -= 360;
          wrap = true;
        }
        output.collect(loc1, p);
        /* copy the object to the right bottom neighbor */
        if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) {
          loc1.zoneNum = loc.zoneNum - 1;
          output.collect(loc1, p);
        }
        /* copy the object to the right top neighbor */
        if (value.dec >= zoneRanges[zoneNum][1] - theta && value.dec <= zoneRanges[zoneNum][1]) {
          loc1.zoneNum = loc.zoneNum + 1;
          output.collect(loc1, p);
        }
      }
      if (wrap) {
        value.ra += 360;
      }

      /* copy the object to the bottom neighbor */
      if (value.dec >= zoneRanges[zoneNum][0] && value.dec <= zoneRanges[zoneNum][0] + theta) {
        loc1.raNum = loc.raNum;
        loc1.zoneNum = loc.zoneNum - 1;
        if (loc1.zoneNum == 0) loc1.raNum = 0;
        output.collect(loc1, p);
      }
    }
  public void reduce(
      Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
      throws IOException {
    List<String[]> _temp = new ArrayList<String[]>();
    int count = 0;
    while (values.hasNext()) {
      Text _out = values.next();
      String[] tokens = StringUtils.splitPreserveAllTokens(_out.toString(), TAB);
      _temp.add(tokens);
      if (count++ > 100000) break;
    }

    if (count > 10000) {
      Set<String> ipSet = new HashSet<String>();
      for (int posI = 0; posI < _temp.size(); posI++) {
        String[] array = _temp.get(posI);
        if (array == null) continue;

        String mid = array[2];
        String ip = array[3];
        ipSet.add(ip);
      }
      output.collect(
          key, Utils.mergeKey(String.valueOf(ipSet.size()), StringUtils.join(ipSet, '|')));
      return;
    }

    /**
     * ·Ö×éËã·¨ FOREACH ALL_DATA IF IN INDEX THEN UPDATE INDEX AND INSERT DATA ELSE FOREACH SUB_DATA
     * MAKE INDEX AND SET FIND'S DATA AS NULL
     */
    // List<List<String[]>> dataList = new ArrayList<List<String[]>>();
    List<StringBuffer> indexList = new ArrayList<StringBuffer>();
    Set<String> ipSet = new HashSet<String>();
    boolean muliHost = false;
    for (int posI = 0; posI < _temp.size(); posI++) {
      String[] array = _temp.get(posI);
      if (array == null) continue;

      String mid = array[2];
      String ip = array[3];
      ipSet.add(ip);
      boolean hasIndex = false;
      for (int i = 0; i < indexList.size(); i++) {
        StringBuffer index = indexList.get(i);
        if (index.indexOf("|" + mid + "|") >= 0 || index.indexOf("|" + ip + "|") >= 0) {
          if (index.indexOf("|" + mid + "|") < 0) {
            index.append('|').append(mid).append('|');
          }

          if (index.indexOf("|" + ip + "|") < 0) {
            index.append('|').append(ip).append('|');
          }
          // dataList.get(i).add(array);
          hasIndex = true;
          break;
        }
      }
      if (!hasIndex) {
        StringBuffer index = new StringBuffer("|" + mid + "|" + ip + "|");
        // List<String[]> _tmp = new ArrayList<String[]>();
        // _tmp.add(array);
        for (int k = posI + 1; k < _temp.size(); k++) {
          String[] _newArray = _temp.get(k);
          if (_newArray == null) {
            continue;
          }
          String _mid = _newArray[2];
          String _ip = _newArray[3];
          if (index.indexOf("|" + _mid + "|") >= 0 || index.indexOf("|" + _ip + "|") >= 0) {
            if (index.indexOf("|" + _mid + "|") < 0) {
              index.append('|').append(_mid).append('|');
            }

            if (index.indexOf("|" + _ip + "|") < 0) {
              index.append('|').append(_ip).append('|');
            }
            // _tmp.add(_newArray);
            _temp.set(k, null);
          }
        }
        indexList.add(index);
        // dataList.add(_tmp);
      }
    }
    //        for(String[] _array : _temp){
    //            output.collect(key,Utils.mergeKey(_array[1],_array[2],_array[3],_array[4]));
    //        }

    StringBuffer allIndex = new StringBuffer();
    for (StringBuffer index : indexList) {
      allIndex.append(index).append(';');
    }
    if (allIndex.length() > 0) {
      allIndex.deleteCharAt(allIndex.length() - 1);
    }
    output.collect(
        key, Utils.mergeKey(String.valueOf(indexList.size()), StringUtils.join(ipSet, '|')));
  }
 public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
     throws IOException {
   String[] _allCols = StringUtils.splitPreserveAllTokens(value.toString(), TAB);
   output.collect(new Text(_allCols[0]), value);
 }
Exemplo n.º 29
0
    @Override
    public void reduce(
        IntWritable key,
        Iterator<ClusterWritable> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {

      float sumSimilarity = 0.0f;
      int numMovies = 0;
      float avgSimilarity = 0.0f;
      float similarity = 0.0f;
      int s = 0;
      int count;
      float diff = 0.0f;
      float minDiff = 1.0f;
      int candidate = 0;
      String data = new String("");
      String shortline = new String("");
      ArrayList<String> arrl = new ArrayList<String>();
      ArrayList<Float> simArrl = new ArrayList<Float>();
      String oneElm = new String();
      int indexShort, index2;
      Text val = new Text();

      while (values.hasNext()) {
        ClusterWritable cr = (ClusterWritable) values.next();
        similarity = cr.similarity;
        simArrl.addAll(cr.similarities);
        for (int i = 0; i < cr.movies.size(); i++) {
          oneElm = cr.movies.get(i);
          indexShort =
              oneElm.indexOf(
                  ",",
                  1000); // to avoid memory error caused by long arrays; it will results less
                         // accurate
          if (indexShort == -1) {
            shortline = new String(oneElm);
          } else {
            shortline = new String(oneElm.substring(0, indexShort));
          }
          arrl.add(shortline);
          output.collect(key, new Text(oneElm));
        }
        numMovies += cr.movies.size();
        sumSimilarity += similarity;
      }
      if (numMovies > 0) {
        avgSimilarity = sumSimilarity / (float) numMovies;
      }
      diff = 0.0f;
      minDiff = 1.0f;
      for (s = 0; s < numMovies; s++) {
        diff = (float) Math.abs(avgSimilarity - simArrl.get(s));
        if (diff < minDiff) {
          minDiff = diff;
          candidate = s;
        }
      }
      data = arrl.get(candidate);
      index2 = data.indexOf(":");
      String movieStr = data.substring(0, index2);
      String reviews = data.substring(index2 + 1);
      StringTokenizer token = new StringTokenizer(reviews, ",");
      count = 0;
      while (token.hasMoreTokens()) {
        token.nextToken();
        count++;
      }
      System.out.println(
          "The key = "
              + key.toString()
              + " has members = "
              + numMovies
              + " simil = "
              + simArrl.get(candidate));
      val = new Text(simArrl.get(candidate) + " " + movieStr + " " + count + " " + reviews);
      output.collect(key, val);
      reporter.incrCounter(Counter.VALUES, 1);
    }
Exemplo n.º 30
0
    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, ClusterWritable> output,
        Reporter reporter)
        throws IOException {

      String movieIdStr = new String();
      String reviewStr = new String();
      String userIdStr = new String();
      String reviews = new String();
      String line = new String();
      String tok = new String("");
      long movieId;
      int review, userId, p, q, r, rater, rating, movieIndex;
      int clusterId = 0;
      int[] n = new int[maxClusters];
      float[] sq_a = new float[maxClusters];
      float[] sq_b = new float[maxClusters];
      float[] numer = new float[maxClusters];
      float[] denom = new float[maxClusters];
      float max_similarity = 0.0f;
      float similarity = 0.0f;
      Cluster movie = new Cluster();
      ClusterWritable movies_arrl = new ClusterWritable();

      StringBuffer sb = new StringBuffer();

      line = ((Text) value).toString();
      movieIndex = line.indexOf(":");

      for (r = 0; r < maxClusters; r++) {
        numer[r] = 0.0f;
        denom[r] = 0.0f;
        sq_a[r] = 0.0f;
        sq_b[r] = 0.0f;
        n[r] = 0;
      }
      if (movieIndex > 0) {
        movieIdStr = line.substring(0, movieIndex);
        sb.append(movieIdStr).append(":");

        movieId = Long.parseLong(movieIdStr);
        movie.movie_id = movieId;
        reviews = line.substring(movieIndex + 1);
        StringTokenizer token = new StringTokenizer(reviews, ",");

        int attrCnt = 0;
        // while (token.hasMoreTokens()) { Leo
        while (token.hasMoreTokens() && attrCnt < attrNum) {
          tok = token.nextToken();
          int reviewIndex = tok.indexOf("_");
          // userIdStr = tok.substring(0, reviewIndex); //Leo
          userIdStr = String.valueOf(attrCnt);
          reviewStr = tok.substring(reviewIndex + 1);
          if (attrCnt > 0) {
            sb.append(",");
          }
          sb.append(String.valueOf(attrCnt)).append("_").append(reviewStr);
          userId = Integer.parseInt(userIdStr);
          review = Integer.parseInt(reviewStr);
          for (r = 0; r < totalClusters; r++) {
            /*for (q = 0; q < centroids_ref[r].total; q++) {
                rater = centroids_ref[r].reviews.get(q).rater_id;
                rating = (int) centroids_ref[r].reviews.get(q).rating;
                if (userId == rater) {
                    numer[r] += (float) (review * rating);
                    sq_a[r] += (float) (review * review);
                    sq_b[r] += (float) (rating * rating);
                    n[r]++; // counter
                    break; // to avoid multiple ratings by the same reviewer
                }
            }*/
            // Leo
            rating = (int) centroids_ref[r].reviews.get(attrCnt).rating;
            numer[r] += (float) ((review - rating) * (review - rating));

            n[r]++; // counter
          }
          attrCnt++;
        }
        for (p = 0; p < totalClusters; p++) {
          /*denom[p] = (float) ((Math.sqrt((double) sq_a[p])) * (Math
                  .sqrt((double) sq_b[p])));
          if (denom[p] > 0) {
              similarity = numer[p] / denom[p];
              if (similarity > max_similarity) {
                  max_similarity = similarity;
                  clusterId = p;
              }
          }*/
          // Leo
          similarity = 250 - numer[p];
          if (similarity > max_similarity) {
            max_similarity = similarity;
            clusterId = p;
          }
        }

        // movies_arrl.movies.add(line);//Leo
        movies_arrl.movies.add(sb.toString());
        movies_arrl.similarities.add(max_similarity);
        movies_arrl.similarity = max_similarity;
        output.collect(new IntWritable(clusterId), movies_arrl);
        reporter.incrCounter(Counter.WORDS, 1);
      }
    }