Java OutputCollectorの例、org.apache.hadoop.mapred.OutputCollector Javaの例

コード例 #1

1

ファイルを表示

ファイル: ForwardIndexMapper.java プロジェクト: Wolf-xu/KNNMusicRecommendationSystem

  public void map(
      Object unused,
      Text line,
      OutputCollector<LongWritable, PostingSongArrayWritable> output,
      Reporter reporter)
      throws IOException {

    StringTokenizer str = new StringTokenizer(line.toString(), " |\t");

    if (nRatings == 0) {
      userId = Long.parseLong(str.nextToken());
      nRatings = Integer.parseInt(str.nextToken());
      songsRatings.clear();
      totalRate = 0;
    } else {
      long songId = Long.parseLong(str.nextToken());
      int rate = Integer.parseInt(str.nextToken());
      songsRatings.add(new PostingSong(songId, rate));
      totalRate += rate;
      nRatings--;
      if (nRatings == 0) {
        nRatings = songsRatings.size();
        songsValue.setArray(songsRatings);
        output.collect(userIdKey, songsValue);
        nRatings = 0;
      }
    }
  }

コード例 #2

0

ファイルを表示

ファイル: ParquetTupleScheme.java プロジェクト: lw-lin/parquet-mr

 @Override
 public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sink)
     throws IOException {
   TupleEntry tuple = sink.getOutgoingEntry();
   OutputCollector outputCollector = sink.getOutput();
   outputCollector.collect(null, tuple);
 }

コード例 #3

0

ファイルを表示

ファイル: Hashmin.java プロジェクト: Yuzhen11/IOpregel-exp

    public void map(
        Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
        throws IOException {

      // vid neighbors_num n1 n2 ...
      // vid color 1/0 "COLOR"
      String str = value.toString();
      if (str.endsWith(COLOR)) {
        // color table
        String[] tokens = str.substring(0, str.length() - 5).split("\\s+");
        int change = Integer.parseInt(tokens[2]);
        if (change == 1) {
          IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0]));
          StringBuilder sb = new StringBuilder();
          sb.append(tokens[1]);
          sb.append(" ");
          sb.append(tokens[2]);
          sb.append(COLOR);
          output.collect(SourceId, new Text(sb.toString()));
        }
      } else {
        // edge table
        String[] tokens = value.toString().split("\\s+");
        IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0]));
        StringBuilder sb = new StringBuilder();
        for (int i = 1; i < tokens.length; i++) {
          if (sb.length() != 0) sb.append(" ");
          sb.append(tokens[i]);
        }
        output.collect(SourceId, new Text(sb.toString()));
      }
    }

コード例 #4

0

ファイルを表示

ファイル: Hashmin.java プロジェクト: Yuzhen11/IOpregel-exp

    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      int color = -1;
      int newColor = Integer.MAX_VALUE;
      int change = 0;
      while (values.hasNext()) {
        String str = values.next().toString();
        if (str.endsWith(COLOR)) {
          // color table
          String[] tmp = str.substring(0, str.length() - 5).split("\\s+");
          color = Integer.parseInt(tmp[0]);

          output.collect(key, new Text(str));
        } else {
          // messages table
          String[] tmp = str.split("\\s+");
          for (String i : tmp) {
            int tmpColor = Integer.parseInt(i);
            if (tmpColor < newColor) newColor = tmpColor;
          }
        }
      }
      if (newColor < Integer.MAX_VALUE) {
        output.collect(key, new Text(Integer.toString(newColor)));
      }
    }

コード例 #5

0

ファイルを表示

ファイル: IpAddrJoin.java プロジェクト: bibhutibhusan89/cloudera-training

    public void map(
        LongWritable key,
        Text value,
        OutputCollector<JoinRecordKey, JoinRecordValue> output,
        Reporter reporter)
        throws IOException {

      // create a string from the 1-character field separator;
      // since this is a ^A, it should be a regex for itself.
      char[] fieldSepChars = new char[1];
      fieldSepChars[0] = FIELD_SEPARATOR;
      String[] parts = value.toString().split(new String(fieldSepChars));

      if (parts.length >= MINIMUM_PARTS) {
        try {
          // get the int components on this line.
          int[] asInts = getInts(parts);

          if (parts.length == IP_ONLY_LENGTH) {
            // it's just an IP address
            output.collect(new JoinRecordKey(asInts), JoinRecordValue.getLogEntryInstance());
          } else if (parts.length == CITY_ID_LENGTH) {
            // it's an ip address and also the (city, country) ids
            output.collect(new JoinRecordKey(asInts), new JoinRecordValue(asInts[3], asInts[4]));
          }
        } catch (NumberFormatException nfe) {
          // unparsible line. do nothing, since we skipped
          // the uses of asInts by throwing to here.
        }
      }
    }

コード例 #6

0

ファイルを表示

ファイル: AccessLogJob.java プロジェクト: kevx/csp

    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String rKey = key.toString();
      String[] keyTokens = rKey.split("\\$\\$");
      String reduceType = keyTokens[0];

      // Main reduce
      if (AccessReduceTypeEnum.MAIN.toString().equals(reduceType)) {
        long useTime = 0l;
        long pv = 0l;
        while (values.hasNext()) {
          pv++;
          Text rValue = values.next();
          useTime += Long.parseLong(rValue.toString());
        }
        Text outKey = new Text(rKey);
        Text outValue = new Text(pv + spliter + useTime);
        output.collect(outKey, outValue);
      } else {
        long num = 0l;
        while (values.hasNext()) {
          values.next();
          num++;
        }
        Text outKey = new Text(rKey);
        Text outValue = new Text(String.valueOf(num));
        output.collect(outKey, outValue);
      }
    }

コード例 #7

0

ファイルを表示

ファイル: Fill.java プロジェクト: uzielgl/hadoop

    @Override
    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = "";
      String header = "";
      TreeMap<String, String> ciudades = new TreeMap<String, String>();

      // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad
      while (values.hasNext()) {
        String[] tmp = values.next().toString().split(",");
        String ciudad = tmp[0];
        String mes = tmp[1];
        String temperatura = tmp[2];
        String fecha = tmp[3];

        ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]);
      }

      // Recorremos las ciudades y vamos imprimiendo
      for (String ciudad : ciudades.keySet()) {
        header += ciudad + ",,";
        String[] temporal = ciudades.get(ciudad).split(",");
        line += temporal[2] + "," + temporal[1] + ",";
      }

      if (c == 0) { // Imprimimos cabezera
        output.collect(new Text("Año,"), new Text(header));
        c++;
      }

      output.collect(new Text(key.toString() + ","), new Text(line));
    }

コード例 #8

0

ファイルを表示

ファイル: PopBubbles.java プロジェクト: julianlau/contrail-emr

    public void map(
        LongWritable lineid, Text nodetxt, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      Node node = new Node();
      node.fromNodeMsg(nodetxt.toString());

      List<String> bubbles = node.getBubbles();
      if (bubbles != null) {
        for (String bubble : bubbles) {
          String[] vals = bubble.split("\\|");
          String minor = vals[0];
          String minord = vals[1];
          String dead = vals[2];
          String newd = vals[3];
          String newid = vals[4];
          String extracov = vals[5];

          output.collect(
              new Text(minor),
              new Text(
                  Node.KILLLINKMSG + "\t" + minord + "\t" + dead + "\t" + newd + "\t" + newid));

          output.collect(new Text(dead), new Text(Node.KILLMSG));
          output.collect(new Text(newid), new Text(Node.EXTRACOV + "\t" + extracov));

          reporter.incrCounter("Contrail", "bubblespopped", 1);
        }

        node.clearBubbles();
      }

      output.collect(new Text(node.getNodeId()), new Text(node.toNodeMsg()));
      reporter.incrCounter("Contrail", "nodes", 1);
    }

コード例 #9

0

ファイルを表示

ファイル: Compressible.java プロジェクト: nabsrock786/Hadoop_NGS_Lakshman

    public void map(
        LongWritable lineid, Text nodetxt, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      Node node = new Node();
      node.fromNodeMsg(nodetxt.toString());

      for (String adj : Node.dirs) {
        node.setCanCompress(adj, false);

        TailInfo next = node.gettail(adj);

        if (next != null /*&& node.getBlackEdges() == null*/) {
          if (next.id.equals(node.getNodeId())) {
            continue;
          }

          reporter.incrCounter("Brush", "remotemark", 1);

          output.collect(
              new Text(next.id), new Text(Node.HASUNIQUEP + "\t" + node.getNodeId() + "\t" + adj));
        }
      }

      output.collect(new Text(node.getNodeId()), new Text(node.toNodeMsg()));

      reporter.incrCounter("Brush", "nodes", 1);
    }

コード例 #10

0

ファイルを表示

ファイル: NeighborSearch.java プロジェクト: zheng-da/NeighborSearch

    void search(
        Vector<Star> v1,
        Vector<Star> v2,
        BlockIDWritable key,
        OutputCollector<BlockIDWritable, PairWritable> output)
        throws IOException {
      for (int i = 0; i < v1.size(); i++) {
        for (int j = 0; j < v2.size(); j++) {
          Star star1 = v1.get(i);
          Star star2 = v2.get(j);
          // what is this margin about
          if (star1.margin && star2.margin) continue;

          double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z;
          if (dist > costheta) {
            p.set(star1, star2, dist);
            output.collect(key, p);
            p.set(star2, star1, dist);
            output.collect(key, p);
            //		num += 2;

          }
        }
      } // end for i,j
    }

コード例 #11

0

ファイルを表示

ファイル: PiEstimator.java プロジェクト: JichengSong/hadoop-20

    /**
     * Map method.
     *
     * @param offset samples starting from the (offset+1)th sample.
     * @param size the number of samples for this map
     * @param out output {ture->numInside, false->numOutside}
     * @param reporter
     */
    public void map(
        LongWritable offset,
        LongWritable size,
        OutputCollector<BooleanWritable, LongWritable> out,
        Reporter reporter)
        throws IOException {

      final HaltonSequence haltonsequence = new HaltonSequence(offset.get());
      long numInside = 0L;
      long numOutside = 0L;

      for (long i = 0; i < size.get(); ) {
        // generate points in a unit square
        final double[] point = haltonsequence.nextPoint();

        // count points inside/outside of the inscribed circle of the square
        final double x = point[0] - 0.5;
        final double y = point[1] - 0.5;
        if (x * x + y * y > 0.25) {
          numOutside++;
        } else {
          numInside++;
        }

        // report status
        i++;
        if (i % 1000 == 0) {
          reporter.setStatus("Generated " + i + " samples.");
        }
      }

      // output map results
      out.collect(new BooleanWritable(true), new LongWritable(numInside));
      out.collect(new BooleanWritable(false), new LongWritable(numOutside));
    }

コード例 #12

0

ファイルを表示

ファイル: ManyTableJob.java プロジェクト: babokim/cloudata

    public void map(
        WritableComparable key,
        Writable value,
        OutputCollector<WritableComparable, Writable> collector,
        Reporter reporter)
        throws IOException {
      LOG.info("Start Map");
      if (err != null) {
        throw err;
      }

      DecimalFormat df = new DecimalFormat("00000000000000000000");

      collector.collect(new Text(tableName), new Text(""));

      for (long i = 0; i < 50000; i++) {
        long randNum = rand.nextLong();
        Row.Key rowKey = new Row.Key(df.format(randNum));
        Row row = new Row(rowKey);
        row.addCell("Col1", new Cell(Cell.Key.EMPTY_KEY, this.data));
        ctable.put(row);
        if (i % 1000 == 0) {
          reporter.progress();
        }

        if (i % 10000 == 0) {
          LOG.info("uploaded: " + i);
        }
        collector.collect(new Text(df.format(randNum)), new Text(""));
      }

      LOG.info("End Map");
    }

コード例 #13

0

ファイルを表示

ファイル: Similarity.java プロジェクト: gdfm/similarity-self-join

 @Override
 public void reduce(
     GenericKey key,
     Iterator<GenericValue> values,
     OutputCollector<GenericKey, GenericValue> output,
     Reporter reporter)
     throws IOException {
   if (key.getSecondary() < Preprocesser.MINIMUM_ID) { // vector
     output.collect(key, values.next());
     if (values.hasNext()) assert false : "Vectors should not get grouped by combiner: " + key;
   } else { // addend
     reporter.progress();
     int counter = 0;
     float sim = 0;
     HalfPair hp = null;
     while (values.hasNext()) {
       hp = (HalfPair) values.next().get();
       sim += hp.getSimilarity();
       if (counter++ % REPORTER_INTERVAL == 0) reporter.progress();
     }
     if (hp != null) {
       payload.set(hp.getID(), sim);
       outValue.set(payload);
       output.collect(key, outValue);
     } else {
       assert false : "There is nothing to combine!";
     }
   }
 }

コード例 #14

0

ファイルを表示

ファイル: CreateNeighbourhoodForBFS.java プロジェクト: saigoda/Recommender-System

 /*
  * (non-Javadoc)
  *
  * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object,
  * java.util.Iterator, org.apache.hadoop.mapred.OutputCollector,
  * org.apache.hadoop.mapred.Reporter)
  *
  * So the first key that the reducer receives is taken as the source
  * node.The paper says any arbitrary node can be chosen as the source
  * node.
  *
  * There is only one reducer so all the keys come to the same reducer.
  * The first key is the source.
  *
  * For source node the output is
  * <Key>\t<neighbor1>,<neighbor2>,<neighbor3>,<neighbor4>|distance|GRAY|source|1
  * The distance is the distance from
  * the parent node. If the distance is modified then it means the parent
  * of the node changedThey source node is GRAY indicating that this is
  * the node from which the processing should startThe 'source' indicates
  * that this is the parent node.
  *
  * For other nodes the output is
  *
  * <Key>\t<neighbor1>,<neighbor2>,<neighbor3>,<neighbor4>|Integer.MAX_VALUE
  * |WHITE|null|0
  *
  * The color of these nodes is white since they are ready to be
  * processed first or they are in the search frontier.The distance is
  * set to some arbitrary number since the node is not yet reached.
  *
  * The null indicates that there is no parent for this node.
  */
 public void reduce(
     Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
     throws IOException {
   // TODO Auto-generated method stub
   StringBuffer b = new StringBuffer();
   while (values.hasNext()) b.append(values.next().toString());
   if (!source) {
     output.collect(key, new Text(b.toString() + "|0|GRAY|source|1"));
     source = true;
   } else output.collect(key, new Text(b.toString() + "|Integer.MAX_VALUE|WHITE|null|0"));
 }

コード例 #15

0

ファイルを表示

ファイル: MapReduce.java プロジェクト: uuee6543/Panda-information-retrieval-package

    /**
     * MAPPING FUNCTION.
     *
     * @param key Text which is the document link.
     * @param value MapReduceNode which represents a document
     * @param output Map from each link URL or a special key (Text) and final page rank.
     *     (DoubleWritable)
     */
    @Override
    public void map(
        Text key,
        MapReduceNode value,
        OutputCollector<Text, DoubleWritable> output,
        Reporter reporter)
        throws IOException {

      DoubleWritable pageRank = value.getCurrentPageRank();
      output.collect(key, pageRank);
      output.collect(special_key, pageRank);
    }

コード例 #16

0

ファイルを表示

ファイル: MapreduceStringFinder.java プロジェクト: computingfacts/open-Technologies

    /**
     * @param key the input file
     * @param value population of candiates
     * @param output output to reducer
     * @param reporter for reporting purposes
     * @throws IOException
     */
    @Override
    public void map(
        Text key,
        Population value,
        OutputCollector<Text, BooleanWritable> output,
        Reporter reporter)
        throws IOException {
      try {

        try {

          /**
           * evolve the population of candidates of size (popsize) untill there is a match to the
           * key (problem)
           */
          solution =
              Population.evolveSolution(key.toString(), popsize, Population.generatedpopulation);

          /** return the result of the best solution found */
          result = EvolutionStalker.populationUpdates(EvolutionStalker.output);

          /** if solution found, output the best solution to reducer */
          if (solution.contentEquals(key.toString())) {
            found = true; // set found to true
            result = EvolutionStalker.populationUpdates(EvolutionStalker.output);
            output.collect(
                new Text(result), new BooleanWritable(found)); // emit best solution to reducer

          } else {

            result = EvolutionStalker.populationUpdates(EvolutionStalker.output);
            output.collect(new Text(result), new BooleanWritable(found));
          }
        } catch (NumberFormatException e) {
          log.warning("Unable to parse key," + key + " " + value + "" + e);

          // return;

        }

      } catch (Throwable e) {
        log.severe(
            "unexpected exception in mapper for key," + "value " + key + "," + value + "" + e);

        if (e instanceof IOException) {
          throw (IOException) e;
        }
        if (e instanceof RuntimeException) {
          throw (RuntimeException) e;
        }
        throw new IOException("unknown Exception occured", e);
      }
    }

コード例 #17

0

ファイルを表示

ファイル: CSRSorter.java プロジェクト: KGayan/Acacia

    public void map(
        LongWritable arg0,
        Text value,
        OutputCollector<LongWritable, LongWritable> output,
        Reporter arg3)
        throws IOException {
      String v = value.toString();
      int edgeCount = v.trim().split(" |,").length;

      // This is corresponding to the edges
      output.collect(new LongWritable(-1), new LongWritable(edgeCount));
      // This is corresponding to the vertices. FOr each vertex we emit <-2,1> KV pair
      output.collect(new LongWritable(-2), new LongWritable(1));
    }

コード例 #18

0

ファイルを表示

ファイル: PartitionByStationUsingMultipleOutputs.java プロジェクト: tranbahien/hadoop-book

    @SuppressWarnings("unchecked")
    public void reduce(
        Text key,
        Iterator<Text> values,
        OutputCollector<NullWritable, Text> output,
        Reporter reporter)
        throws IOException {

      OutputCollector collector =
          multipleOutputs.getCollector("station", key.toString().replace("-", ""), reporter);
      while (values.hasNext()) {
        collector.collect(NullWritable.get(), values.next());
      }
    }

コード例 #19

0

ファイルを表示

ファイル: AFormatterWG.java プロジェクト: ezubaric/Cloud9

    public void reduce(
        IntWritable key,
        Iterator<HITSNode> values,
        OutputCollector<IntWritable, HITSNode> output,
        Reporter reporter)
        throws IOException {
      // ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();
      adjList.clear();

      // System.out.println(key.toString());
      // System.out.println(adjList.toString());
      while (values.hasNext()) {
        valIn = values.next();
        ArrayListOfIntsWritable adjListIn = valIn.getAdjacencyList();
        adjListIn.trimToSize();
        adjList.addAll(adjListIn.getArray());
        // System.out.println(adjList.toString());
      }

      valOut.setType(HITSNode.TYPE_AUTH_COMPLETE);
      valOut.setHARank((float) 0.0);
      valOut.setAdjacencyList(adjList);
      valOut.setNodeId(key.get());

      output.collect(key, valOut);
    }

コード例 #20

0

ファイルを表示

ファイル: SimilarityReducer2.java プロジェクト: kriskrss/ItemBasedCF

 @Override
 public void reduce(
     Text moviePair, Iterator<Text> ratings, OutputCollector<Text, Text> output, Reporter reporter)
     throws IOException {
   ArrayList<Double> ratings1 = new ArrayList<Double>();
   ArrayList<Double> ratings2 = new ArrayList<Double>();
   double numRatings = 0;
   double totRating1 = 0, totrating2 = 0;
   while (ratings.hasNext()) {
     String[] rating = ratings.next().toString().split(",");
     ratings1.add(Double.parseDouble(rating[0]));
     ratings2.add(Double.parseDouble(rating[1]));
     totRating1 += Double.parseDouble(rating[0]);
     totrating2 += Double.parseDouble(rating[1]);
     numRatings += 1;
   }
   double avgRating1 = totRating1 / numRatings;
   double avgRating2 = totrating2 / numRatings;
   double sum1 = 0.0, sum2 = 0.0, sumProduct = 0.0;
   for (int i = 0; i < numRatings; i++) {
     sum1 += Math.pow((ratings1.get(i) - avgRating1), 2);
     sum2 += Math.pow((ratings2.get(i) - avgRating2), 2);
     sumProduct += (ratings1.get(i) - avgRating1) * (ratings2.get(i) - avgRating2);
   }
   double corr = sumProduct / (Math.sqrt(sum1) * Math.sqrt(sum2));
   System.out.println(sum1 + "," + sum2 + "," + sumProduct + "," + corr);
   if (Double.isNaN(corr)) {
     corr = 0;
   }
   output.collect(moviePair, new Text(corr + ""));
 }

コード例 #21

0

ファイルを表示

ファイル: SegmentReader.java プロジェクト: vuquangtin/news-nutch-plugin

  public void reduce(
      Text key,
      Iterator<NutchWritable> values,
      OutputCollector<Text, Text> output,
      Reporter reporter)
      throws IOException {
    StringBuffer dump = new StringBuffer();

    dump.append("\nRecno:: ").append(recNo++).append("\n");
    dump.append("URL:: " + key.toString() + "\n");
    while (values.hasNext()) {
      Writable value = values.next().get(); // unwrap
      if (value instanceof CrawlDatum) {
        dump.append("\nCrawlDatum::\n").append(((CrawlDatum) value).toString());
      } else if (value instanceof Content) {
        dump.append("\nContent::\n").append(((Content) value).toString());
      } else if (value instanceof ParseData) {
        dump.append("\nParseData::\n").append(((ParseData) value).toString());
      } else if (value instanceof ParseText) {
        dump.append("\nParseText::\n").append(((ParseText) value).toString());
      } else if (LOG.isWarnEnabled()) {
        LOG.warn("Unrecognized type: " + value.getClass());
      }
    }
    output.collect(key, new Text(dump.toString()));
  }

コード例 #22

0

ファイルを表示

ファイル: SortByDeptAndAgeUsingComparator.java プロジェクト: kdwcse/try-hadoop-mapreduce-java

    @Override
    public void reduce(
        IntWritable key,
        Iterator<Text> values,
        OutputCollector<IntWritable, Text> output,
        Reporter reporter)
        throws IOException {
      List<Text> list = new ArrayList<Text>();
      while (values.hasNext()) {
        // Iteratorの要素は参照が再利用される実装であるため、別途Textをnewする必要がある。
        list.add(new Text(values.next()));
      }
      // ageでソート
      Collections.sort(
          list,
          new Comparator<Text>() {

            private EmployeeRecordParser parser1 = new EmployeeRecordParser();
            private EmployeeRecordParser parser2 = new EmployeeRecordParser();

            @Override
            public int compare(Text o1, Text o2) {
              parser1.parse(o1);
              parser2.parse(o2);
              return Integer.valueOf(parser1.getEmployeeAge()).compareTo(parser2.getEmployeeAge());
            }
          });
      for (Text value : list) {
        output.collect(key, value);
      }
    }

コード例 #23

0

ファイルを表示

ファイル: SortByDeptAndAgeUsingComparator.java プロジェクト: kdwcse/try-hadoop-mapreduce-java

 @Override
 public void map(
     Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   parser.parse(value);
   output.collect(new IntWritable(parser.getDepartmentId()), value);
 }

コード例 #24

0

ファイルを表示

ファイル: L10.java プロジェクト: kidaak/Hadoop-MapReduce-1

 public void reduce(
     MyType key, Iterator<Text> iter, OutputCollector<MyType, Text> oc, Reporter reporter)
     throws IOException {
   while (iter.hasNext()) {
     oc.collect(null, iter.next());
   }
 }

コード例 #25

0

ファイルを表示

ファイル: XiangLi1_exercise3.java プロジェクト: xxiang13/hadoop

    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }

コード例 #26

0

ファイルを表示

ファイル: WordCountExtended.java プロジェクト: st3ffwo3/hadoop_samples

    /**
     * {@inheritDoc}
     *
     * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object,
     *     org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)
     */
    @Override
    public void map(
        LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter)
        throws IOException {
      String line = m_caseSensitive ? value.toString() : value.toString().toLowerCase();

      for (String pattern : m_patternsToSkip) {
        line = line.replaceAll(pattern, "");
      }

      StringTokenizer tokenizer = new StringTokenizer(line);
      while (tokenizer.hasMoreTokens()) {
        m_word.set(tokenizer.nextToken());
        output.collect(m_word, ONE);
        reporter.incrCounter(Counters.INPUT_WORDS, 1);
      }

      if ((++m_numRecords % 100) == 0) {
        reporter.setStatus(
            "Finished processing "
                + m_numRecords
                + " records "
                + "from the input file: "
                + m_inputFile);
      }
    }

コード例 #27

0

ファイルを表示

ファイル: DistRaid.java プロジェクト: fire9/hadoop-20

    /** Run a FileOperation */
    public void map(
        Text key,
        PolicyInfo policy,
        OutputCollector<WritableComparable, Text> out,
        Reporter reporter)
        throws IOException {
      this.reporter = reporter;
      try {
        LOG.info("Raiding file=" + key.toString() + " policy=" + policy);
        Path p = new Path(key.toString());
        FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p);
        st.clear();
        RaidNode.doRaid(jobconf, policy, fs, st, reporter);

        ++succeedcount;

        reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks);
        reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize);
        reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks);
        reporter.incrCounter(Counter.META_SIZE, st.metaSize);

        reporter.incrCounter(Counter.FILES_SUCCEEDED, 1);
      } catch (IOException e) {
        ++failcount;
        reporter.incrCounter(Counter.FILES_FAILED, 1);

        String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e);
        out.collect(null, new Text(s));
        LOG.info(s);
      } finally {
        reporter.setStatus(getCountString());
      }
    }

コード例 #28

0

ファイルを表示

ファイル: BuildPageRankRecords.java プロジェクト: kensk8er/MapReduceAssignment

    public void map(
        LongWritable key,
        Text t,
        OutputCollector<IntWritable, PageRankNode> output,
        Reporter reporter)
        throws IOException {

      String[] arr = t.toString().trim().split("\\s+");

      nid.set(Integer.parseInt(arr[0]));
      if (arr.length == 1) {
        node.setNodeId(Integer.parseInt(arr[0]));
        node.setAdjacencyList(new ArrayListOfIntsWritable());

      } else {
        node.setNodeId(Integer.parseInt(arr[0]));

        int[] neighbors = new int[arr.length - 1];
        for (int i = 1; i < arr.length; i++) {
          neighbors[i - 1] = Integer.parseInt(arr[i]);
        }

        node.setAdjacencyList(new ArrayListOfIntsWritable(neighbors));
      }

      reporter.incrCounter("graph", "numNodes", 1);
      reporter.incrCounter("graph", "numEdges", arr.length - 1);

      if (arr.length > 1) {
        reporter.incrCounter("graph", "numActiveNodes", 1);
      }

      output.collect(nid, node);
    }

コード例 #29

0

ファイルを表示

ファイル: AFormatterWG.java プロジェクト: ezubaric/Cloud9

    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntWritable, HITSNode> output,
        Reporter reporter)
        throws IOException {

      ArrayListOfIntsWritable links = new ArrayListOfIntsWritable();
      String line = ((Text) value).toString();
      StringTokenizer itr = new StringTokenizer(line);
      if (itr.hasMoreTokens()) {
        int curr = Integer.parseInt(itr.nextToken());
        if (stopList.contains(curr)) {
          return;
        }
        valOut.setAdjacencyList(links);
        valOut.setHARank((float) 1.0);
        valOut.setType(HITSNode.TYPE_AUTH_COMPLETE);
      }
      while (itr.hasMoreTokens()) {
        keyOut.set(Integer.parseInt(itr.nextToken()));
        valOut.setNodeId(keyOut.get());
        // System.out.println(keyOut.toString() + ", " +
        // valOut.toString());
        if (!(stopList.contains(keyOut.get()))) {
          output.collect(keyOut, valOut);
        }
      }
      // emit mentioned mentioner -> mentioned (mentioners) in links
      // emit mentioner mentioned -> mentioner (mentions) outlinks
      // emit mentioned a
      // emit mentioner 1
    }

コード例 #30

0

ファイルを表示

ファイル: SecondarySortProjectionReducer.java プロジェクト: yimuniao/data-algorithms-book

  public void reduce(
      CompositeKey key,
      Iterator<PairOfLongInt> values,
      OutputCollector<Text, Text> output,
      Reporter reporter)
      throws IOException {

    // note that values are sorted (by using MR's secondary sort)
    // below, builder will generate:
    //    CustoerID,Date1,Amount1,Date2,Amount2,...,DateN,AmountN
    // where Date1 <= Date2 <= ... <= DateN
    StringBuilder builder = new StringBuilder();
    builder.append(key.toString());
    while (values.hasNext()) {
      builder.append(",");
      PairOfLongInt pair = values.next();
      long timestamp = pair.getLeftElement(); // date as milliseconds
      String date = DateUtil.getDateAsString(timestamp);
      builder.append(date); // date as String		
      builder.append(",");
      builder.append(pair.getRightElement()); // amount
    }

    output.collect(null, new Text(builder.toString()));
  } // reduce