Java OutputCollector示例，OutputCollector Java示例

示例#1

0

显示文件

文件： Fill.java 项目： uzielgl/hadoop

    @Override
    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      String line = "";
      String header = "";
      TreeMap<String, String> ciudades = new TreeMap<String, String>();

      // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad
      while (values.hasNext()) {
        String[] tmp = values.next().toString().split(",");
        String ciudad = tmp[0];
        String mes = tmp[1];
        String temperatura = tmp[2];
        String fecha = tmp[3];

        ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]);
      }

      // Recorremos las ciudades y vamos imprimiendo
      for (String ciudad : ciudades.keySet()) {
        header += ciudad + ",,";
        String[] temporal = ciudades.get(ciudad).split(",");
        line += temporal[2] + "," + temporal[1] + ",";
      }

      if (c == 0) { // Imprimimos cabezera
        output.collect(new Text("Año,"), new Text(header));
        c++;
      }

      output.collect(new Text(key.toString() + ","), new Text(line));
    }

示例#2

0

显示文件

文件： NeighborSearch.java 项目： zheng-da/NeighborSearch

    void search(
        Vector<Star> v1,
        Vector<Star> v2,
        BlockIDWritable key,
        OutputCollector<BlockIDWritable, PairWritable> output)
        throws IOException {
      for (int i = 0; i < v1.size(); i++) {
        for (int j = 0; j < v2.size(); j++) {
          Star star1 = v1.get(i);
          Star star2 = v2.get(j);
          // what is this margin about
          if (star1.margin && star2.margin) continue;

          double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z;
          if (dist > costheta) {
            p.set(star1, star2, dist);
            output.collect(key, p);
            p.set(star2, star1, dist);
            output.collect(key, p);
            //		num += 2;

          }
        }
      } // end for i,j
    }

示例#3

0

显示文件

文件： RunMap.java 项目： EnamiBatte/15-440

 public void run() {
   // TODO Auto-generated method stub
   run = true;
   System.out.println("Running a map");
   while (run) {
     // Figure out how to read input file
     // Evaluate it
     Map<String, List<String>> input = reader.getKeyValuePairs();
     if (input == null) {
       map.setStatus(-1);
       Message msg = new Message();
       msg.setTask(map);
       msg.setType('f');
       coord.conn.sendMessage(msg);
       return;
     }
     OutputCollector<String, String> collect = new OutputCollector<String, String>();
     Set<String> keySet = input.keySet();
     for (String key : keySet) {
       List<String> values = input.get(key);
       for (String value : values) {
         map.getJob().map(key, value, collect);
       }
     }
     List<Pair> results = collect.getResults();
     int length = results.size();
     FileOutputStream out;
     try {
       System.out.println("Trying to Write to File");
       out = new FileOutputStream(new File(map.getOutput().get(0)));
       BufferedWriter dw = new BufferedWriter(new OutputStreamWriter(out));
       for (Pair p : results) {
         dw.append(p.toString());
         dw.newLine();
       }
       dw.newLine();
       dw.flush();
       dw.close();
       out.close();
       coord.dataNode.addFileToDFS(map.getOutput().get(0), coord.conn.port, false);
     } catch (Exception e) {
       // TODO Auto-generated catch block
       e.printStackTrace();
       map.setStatus(-1);
       Message msg = new Message();
       msg.setTask(map);
       msg.setType('f');
       coord.conn.sendMessage(msg);
       return;
     }
     map.setStatus(1);
     Message msg = new Message();
     msg.setTask(map);
     msg.setType('f');
     coord.conn.sendMessage(msg);
     break;
   }
 }

示例#4

0

显示文件

文件： PartitionByStationUsingMultipleOutputs.java 项目： tranbahien/hadoop-book

    @SuppressWarnings("unchecked")
    public void reduce(
        Text key,
        Iterator<Text> values,
        OutputCollector<NullWritable, Text> output,
        Reporter reporter)
        throws IOException {

      OutputCollector collector =
          multipleOutputs.getCollector("station", key.toString().replace("-", ""), reporter);
      while (values.hasNext()) {
        collector.collect(NullWritable.get(), values.next());
      }
    }

示例#5

0

显示文件

文件： UFOCountingRecordValidationMapper.java 项目： EricDoug/Hadoop-Beginner-s-Guide-Code

  public void map(
      LongWritable key, Text value, OutputCollector<LongWritable, Text> output, Reporter reporter)
      throws IOException {
    String line = value.toString();

    if (validate(line, reporter)) output.collect(key, value);
  }

示例#6

0

显示文件

文件： XiangLi1_exercise3.java 项目： xxiang13/hadoop

    // specify input and out keys
    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String line = value.toString(); // define new variable to be string

      ArrayList<Integer> range = new ArrayList<Integer>();
      for (int i = 2000; i <= 2010; i++) {
        range.add(i);
      }

      // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
      String[] inputs = line.split(",");

      try {

        int year = Integer.parseInt(inputs[165]);

        if (range.contains(year)) {
          String dur = inputs[3];
          String artist_name = inputs[2];
          String song_title = inputs[1];
          String final_input = artist_name + ',' + dur + ',' + song_title;
          Final_Value.set(final_input);
          output.collect(Final_Value, dummy);
        }
      } catch (NumberFormatException e) {
        // do nothing
      }
    }

示例#7

0

显示文件

文件： RBMMapper.java 项目： dery-hit/MapReduce-Based-Deep-Learning

  public void map(
      LongWritable key,
      Text value,
      OutputCollector<IntWritable, DoubleWritable> output,
      Reporter reporter)
      throws IOException {
    /*
     * It implements the mapper. It outputs the numbers of weight and updated weights.
     *
     * Note that the format of intermediate output is <IntWritable, DoubleWritable>,
     * because the key is the number of weight (an integer), and the value is the weight's value (double)
     */
    inputData = value.toString();

    // go through the process
    initialize();
    getposphase();
    getnegphase();
    update();

    // output the intermediate data
    // The <key, value> pairs are <weightID, weightUpdate>
    double[][] vishidinc_array = vishidinc.getArray();
    for (int i = 0; i < numdims; i++) {
      for (int j = 0; j < numhid; j++) {
        weightPos.set(i * numhid + j);
        weightValue.set(vishidinc_array[i][j]);
        output.collect(weightPos, weightValue);
      }
    }
  }

示例#8

0

显示文件

文件： PartitionByStationUsingMultipleOutputs.java 项目： tranbahien/hadoop-book

    public void map(
        LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

      parser.parse(value);
      output.collect(new Text(parser.getStationId()), value);
    }

示例#9

0

显示文件

文件： GenericMRLoadGenerator.java 项目： FloodDragon/hadoop

 protected void emit(K key, V val, OutputCollector<K, V> out) throws IOException {
   ++total;
   while ((float) kept / total < keep) {
     ++kept;
     out.collect(key, val);
   }
 }

示例#10

0

显示文件

文件： Sqrt2.java 项目： Totemika/repoHadoop

 public void reduce(
     DoubleWritable key,
     Iterator<DoubleWritable> values,
     OutputCollector<DoubleWritable, Text> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, new Text(values.next().toString() + " - "));
 }

示例#11

0

显示文件

文件： PageLinkMapper.java 项目： ntietz/data-processing

 /**
  * Takes in (id, node) pairs and emits them right back out
  *
  * @param key The Node ID
  * @param Node The Node object
  * @param output An Output Collector that collects (id, node) pairs
  * @param reporter Default reporter object
  */
 public void map(
     IntWritable key,
     WikiPage value,
     OutputCollector<IntWritable, WikiPage> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, value);
 }

示例#12

0

显示文件

文件： MongoDBScheme.java 项目： brush51/PredictionIO

  /**
   * @param flowProcess
   * @param sinkCall
   * @throws IOException
   */
  @Override
  public void sink(
      FlowProcess<JobConf> flowProcess, SinkCall<BSONWritable[], OutputCollector> sinkCall)
      throws IOException {
    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
    OutputCollector outputCollector = sinkCall.getOutput();

    String keyFieldName = this.fieldMappings.get(this.keyColumnName);
    Object key;

    // if fieldMappings doesn't have keyColumnName ("_id") field, then use new ObjectId() as key
    if (keyFieldName == null) {
      key = new ObjectId();
    } else {
      key = tupleEntry.selectTuple(new Fields(keyFieldName)).get(0);
    }
    // Object key = tupleEntry.selectTuple(new
    // Fields(this.fieldMappings.get(this.keyColumnName))).get(0);

    BasicDBObject dbObject = new BasicDBObject();

    for (String columnFieldName : columnFieldNames) {
      String columnFieldMapping = fieldMappings.get(columnFieldName);
      Object tupleEntryValue = null;

      try {
        if (columnFieldMapping != null) {
          // columnFieldMapping is null if no corresponding field name defined in Mappings.
          // only write the field value back to mongo if the field also defined in Mappings (ie. not
          // null)
          tupleEntryValue = tupleEntry.get(columnFieldMapping);
        }
      } catch (FieldsResolverException e) {
        logger.error("Couldn't resolve field: {}", columnFieldName);
      }

      if (tupleEntryValue != null && columnFieldName != keyColumnName) {
        // logger.info("Putting for output: {} {}", columnFieldName, tupleEntryValue);
        dbObject.put(columnFieldName, tupleEntryValue);
      }
    }
    logger.info("Putting key for output: {} {}", key, dbObject);
    // outputCollector.collect(new ObjectId(), dbObject);
    outputCollector.collect(key, dbObject);
  }

示例#13

0

显示文件

文件： MaxTemperatureUsingSecondarySort.java 项目： BoydYang/hadoop-book

    public void reduce(
        IntPair key,
        Iterator<NullWritable> values,
        OutputCollector<IntPair, NullWritable> output,
        Reporter reporter)
        throws IOException {

      /*[*/ output.collect(key, NullWritable.get()); /*]*/
    }

示例#14

0

显示文件

文件： WordCount.java 项目： brent8149/datascience-fall14

 public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter)
     throws IOException {
   String line = value.toString();
   StringTokenizer tokenizer = new StringTokenizer(line);
   while (tokenizer.hasMoreTokens()) {
     word.set(tokenizer.nextToken());
     output.collect(word, one);
   }
 }

示例#15

0

显示文件

文件： Jacobi.java 项目： Quetzaloid/Parallel-programming

 @Override
 public void reduce(
     IntWritable key,
     Iterator<DoubleWritable> values,
     OutputCollector<IntWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   output.collect(key, values.next());
 }

示例#16

0

显示文件

文件： Injector.java 项目： soolr/nutch-1.7

    public void reduce(
        Text key,
        Iterator<CrawlDatum> values,
        OutputCollector<Text, CrawlDatum> output,
        Reporter reporter)
        throws IOException {
      boolean oldSet = false;
      boolean injectedSet = false;
      while (values.hasNext()) {
        CrawlDatum val = values.next();
        if (val.getStatus() == CrawlDatum.STATUS_INJECTED) {
          injected.set(val);
          injected.setStatus(CrawlDatum.STATUS_DB_UNFETCHED);
          injectedSet = true;
        } else {
          old.set(val);
          oldSet = true;
        }
      }
      CrawlDatum res = null;

      /**
       * Whether to overwrite, ignore or update existing records
       *
       * @see https://issues.apache.org/jira/browse/NUTCH-1405
       */

      // Injected record already exists and overwrite but not update
      if (injectedSet && oldSet && overwrite) {
        res = injected;

        if (update) {
          LOG.info(key.toString() + " overwritten with injected record but update was specified.");
        }
      }

      // Injected record already exists and update but not overwrite
      if (injectedSet && oldSet && update && !overwrite) {
        res = old;
        old.putAllMetaData(injected);
        old.setScore(injected.getScore() != scoreInjected ? injected.getScore() : old.getScore());
        old.setFetchInterval(
            injected.getFetchInterval() != interval
                ? injected.getFetchInterval()
                : old.getFetchInterval());
      }

      // Old default behaviour
      if (injectedSet && !oldSet) {
        res = injected;
      } else {
        res = old;
      }

      output.collect(key, res);
    }

示例#17

0

显示文件

文件： IpCount1.java 项目： SunGuo/HadoopLesson

 @Override
 public void reduce(
     Text key, Iterator<Text> iterator, OutputCollector<Text, Text> output, Reporter reporter)
     throws IOException {
   long sum = 0;
   while (iterator.hasNext()) {
     sum = sum + Long.parseLong(iterator.next().toString());
   }
   output.collect(key, new Text(String.valueOf(sum)));
 }

示例#18

0

显示文件

文件： IpCount1.java 项目： SunGuo/HadoopLesson

 @Override
 public void map(
     LongWritable longWritable,
     Text value,
     OutputCollector<Text, Text> output,
     Reporter reporter)
     throws IOException {
   String ip = value.toString();
   output.collect(new Text(ip), new Text("1"));
 }

示例#19

0

显示文件

文件： FileTest.java 项目： RyanFu/shopJob

    public void map(Object key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      String[] _allCols = StringUtils.splitPreserveAllTokens(value.toString(), splitChar);
      StringBuffer stringBuffer = new StringBuffer();

      for (int i = 0; i < _allCols.length; i++) {
        stringBuffer.append(i).append('=').append(_allCols[i]).append("\t");
      }
      output.collect(new Text(_allCols[0]), new Text(stringBuffer.toString()));
    }

示例#20

0

显示文件

文件： SortValidator.java 项目： hramasamy/evosys

      public void map(
          BytesWritable key,
          BytesWritable value,
          OutputCollector<BytesWritable, IntWritable> output,
          Reporter reporter)
          throws IOException {
        // newKey = (key, value)
        BytesWritable keyValue = new BytesWritable(pair(key, value));

        // output (newKey, value)
        output.collect(keyValue, this.value);
      }

示例#21

0

显示文件

文件： ValueHistogram.java 项目： feldsherov/TS_hadoop

 public void map(
     LongWritable key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter)
     throws IOException {
   String arr[] = value.toString().split("\\r?\\n");
   for (String row : arr) {
     if (row.startsWith("\"")) {
       continue;
     }
     String parts[] = row.split(",");
     output.collect(new IntWritable(new Integer(parts[1])), new Text(parts[4]));
   }
 }

示例#22

0

显示文件

文件： WordCount.java 项目： brent8149/datascience-fall14

 public void reduce(
     Text key,
     Iterator<IntWritable> values,
     OutputCollector<Text, IntWritable> output,
     Reporter reporter)
     throws IOException {
   int sum = 0;
   while (values.hasNext()) {
     sum += values.next().get();
   }
   output.collect(key, new IntWritable(sum));
 }

示例#23

0

显示文件

文件： Sqrt2.java 项目： Totemika/repoHadoop

 public void map(
     LongWritable key,
     Text value,
     OutputCollector<DoubleWritable, DoubleWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   DoubleWritable clave = new DoubleWritable();
   DoubleWritable valor = new DoubleWritable();
   clave.set(Double.parseDouble(line));
   valor.set(Math.sqrt(Double.parseDouble(line)));
   output.collect(clave, valor);
 }

示例#24

0

显示文件

文件： Add1.java 项目： Totemika/repoHadoop

 public void map(
     LongWritable key,
     Text value,
     OutputCollector<IntWritable, IntWritable> output,
     Reporter reporter)
     throws IOException {
   String line = value.toString();
   IntWritable clave = new IntWritable();
   IntWritable valor = new IntWritable();
   clave.set(Integer.parseInt(line));
   valor.set(Integer.parseInt(line) + 1);
   output.collect(clave, valor);
 }

示例#25

0

显示文件

文件： Problem5bReducer.java 项目： ntroyer/HadoopWordCount

 // reduce method accepts the Key Value pairs from mappers, do the aggregation based on keys and
 // produce the final out put
 public void reduce(
     Text key,
     Iterator<IntWritable> values,
     OutputCollector<Text, IntWritable> output,
     Reporter reporter)
     throws IOException {
   int sum = 0;
   /*iterates through all the values available with a key and add them together and give the
   final result as the key and sum of its values*/
   while (values.hasNext()) {
     sum += values.next().get();
   }
   output.collect(key, new IntWritable(sum));
 }

示例#26

0

显示文件

文件： DFSCIOTest.java 项目： RiseOfApes/hadoop

    void collectStats(OutputCollector<Text, Text> output, String name, long execTime, Long objSize)
        throws IOException {
      long totalSize = objSize.longValue();
      float ioRateMbSec = (float) totalSize * 1000 / (execTime * MEGA);
      LOG.info("Number of bytes processed = " + totalSize);
      LOG.info("Exec time = " + execTime);
      LOG.info("IO rate = " + ioRateMbSec);

      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"), new Text(String.valueOf(1)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
          new Text(String.valueOf(totalSize)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
          new Text(String.valueOf(execTime)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
          new Text(String.valueOf(ioRateMbSec * 1000)));
      output.collect(
          new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
          new Text(String.valueOf(ioRateMbSec * ioRateMbSec * 1000)));
    }

示例#27

0

显示文件

文件： CassandraBulkLoader.java 项目： devdattakulkarni/Cassandra-KVAC

    public void reduce(
        Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
      ColumnFamily columnFamily;
      String keyspace = "Keyspace1";
      String cfName = "Super1";
      Message message;
      List<ColumnFamily> columnFamilies;
      columnFamilies = new LinkedList<ColumnFamily>();
      String line;

      /* Create a column family */
      columnFamily = ColumnFamily.create(keyspace, cfName);
      while (values.hasNext()) {
        // Split the value (line based on your own delimiter)
        line = values.next().toString();
        String[] fields = line.split("\1");
        String SuperColumnName = fields[1];
        String ColumnName = fields[2];
        String ColumnValue = fields[3];
        int timestamp = 0;
        columnFamily.addColumn(
            new QueryPath(
                cfName, ByteBufferUtil.bytes(SuperColumnName), ByteBufferUtil.bytes(ColumnName)),
            ByteBufferUtil.bytes(ColumnValue),
            timestamp);
      }

      columnFamilies.add(columnFamily);

      /* Get serialized message to send to cluster */
      message = createMessage(keyspace, key.getBytes(), cfName, columnFamilies);
      List<IAsyncResult> results = new ArrayList<IAsyncResult>();
      for (InetAddress endpoint :
          StorageService.instance.getNaturalEndpoints(keyspace, ByteBufferUtil.bytes(key))) {
        /* Send message to end point */
        results.add(MessagingService.instance().sendRR(message, endpoint));
      }
      /* wait for acks */
      for (IAsyncResult result : results) {
        try {
          result.get(DatabaseDescriptor.getRpcTimeout(), TimeUnit.MILLISECONDS);
        } catch (TimeoutException e) {
          // you should probably add retry logic here
          throw new RuntimeException(e);
        }
      }

      output.collect(key, new Text(" inserted into Cassandra node(s)"));
    }

示例#28

0

显示文件

文件： MaxTemperatureUsingSecondarySort.java 项目： BoydYang/hadoop-book

    public void map(
        LongWritable key,
        Text value,
        OutputCollector<IntPair, NullWritable> output,
        Reporter reporter)
        throws IOException {

      parser.parse(value);
      if (parser.isValidTemperature()) {
        /*[*/ output.collect(
            new IntPair(parser.getYearInt(), +parser.getAirTemperature()),
            NullWritable.get()); /*]*/
      }
    }

示例#29

0

显示文件

文件： OldMaxTemperature.java 项目： JuneShi0315/hadoop-book

    @Override
    public void reduce(
        Text key, /*[*/
        Iterator /*]*/<IntWritable> values,
        /*[*/ OutputCollector<Text, IntWritable> output,
        Reporter reporter /*]*/)
        throws IOException {

      int maxValue = Integer.MIN_VALUE;
      while (
      /*[*/ values.hasNext() /*]*/) {
        maxValue = Math.max(maxValue, /*[*/ values.next().get() /*]*/);
      }
      /*[*/ output.collect /*]*/(key, new IntWritable(maxValue));
    }

示例#30

0

显示文件

文件： StatePrinter.java 项目： hhu-stups/tlatools

  /**
   * Prints the state information if the TLC runs in print-diff-only mode and the last state is set,
   * it will print the diff only
   */
  public static void printState(TLCStateInfo currentStateInfo, TLCState lastState, int num) {
    String stateString;

    /* Added by rjoshi. */
    if (lastState != null && TLCGlobals.printDiffsOnly) {
      stateString = currentStateInfo.state.toString(lastState);
    } else {
      stateString = currentStateInfo.state.toString();
    }
    MP.printState(
        EC.TLC_STATE_PRINT2,
        new String[] {String.valueOf(num), currentStateInfo.info.toString(), stateString},
        currentStateInfo,
        num);
    OutputCollector.addStateToTrace(currentStateInfo);
  }