public void map( Object unused, Text line, OutputCollector<LongWritable, PostingSongArrayWritable> output, Reporter reporter) throws IOException { StringTokenizer str = new StringTokenizer(line.toString(), " |\t"); if (nRatings == 0) { userId = Long.parseLong(str.nextToken()); nRatings = Integer.parseInt(str.nextToken()); songsRatings.clear(); totalRate = 0; } else { long songId = Long.parseLong(str.nextToken()); int rate = Integer.parseInt(str.nextToken()); songsRatings.add(new PostingSong(songId, rate)); totalRate += rate; nRatings--; if (nRatings == 0) { nRatings = songsRatings.size(); songsValue.setArray(songsRatings); output.collect(userIdKey, songsValue); nRatings = 0; } } }
@Override public void sink(FlowProcess<JobConf> fp, SinkCall<Object[], OutputCollector> sink) throws IOException { TupleEntry tuple = sink.getOutgoingEntry(); OutputCollector outputCollector = sink.getOutput(); outputCollector.collect(null, tuple); }
public void map( Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { // vid neighbors_num n1 n2 ... // vid color 1/0 "COLOR" String str = value.toString(); if (str.endsWith(COLOR)) { // color table String[] tokens = str.substring(0, str.length() - 5).split("\\s+"); int change = Integer.parseInt(tokens[2]); if (change == 1) { IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); sb.append(tokens[1]); sb.append(" "); sb.append(tokens[2]); sb.append(COLOR); output.collect(SourceId, new Text(sb.toString())); } } else { // edge table String[] tokens = value.toString().split("\\s+"); IntWritable SourceId = new IntWritable(Integer.parseInt(tokens[0])); StringBuilder sb = new StringBuilder(); for (int i = 1; i < tokens.length; i++) { if (sb.length() != 0) sb.append(" "); sb.append(tokens[i]); } output.collect(SourceId, new Text(sb.toString())); } }
public void reduce( IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { int color = -1; int newColor = Integer.MAX_VALUE; int change = 0; while (values.hasNext()) { String str = values.next().toString(); if (str.endsWith(COLOR)) { // color table String[] tmp = str.substring(0, str.length() - 5).split("\\s+"); color = Integer.parseInt(tmp[0]); output.collect(key, new Text(str)); } else { // messages table String[] tmp = str.split("\\s+"); for (String i : tmp) { int tmpColor = Integer.parseInt(i); if (tmpColor < newColor) newColor = tmpColor; } } } if (newColor < Integer.MAX_VALUE) { output.collect(key, new Text(Integer.toString(newColor))); } }
public void map( LongWritable key, Text value, OutputCollector<JoinRecordKey, JoinRecordValue> output, Reporter reporter) throws IOException { // create a string from the 1-character field separator; // since this is a ^A, it should be a regex for itself. char[] fieldSepChars = new char[1]; fieldSepChars[0] = FIELD_SEPARATOR; String[] parts = value.toString().split(new String(fieldSepChars)); if (parts.length >= MINIMUM_PARTS) { try { // get the int components on this line. int[] asInts = getInts(parts); if (parts.length == IP_ONLY_LENGTH) { // it's just an IP address output.collect(new JoinRecordKey(asInts), JoinRecordValue.getLogEntryInstance()); } else if (parts.length == CITY_ID_LENGTH) { // it's an ip address and also the (city, country) ids output.collect(new JoinRecordKey(asInts), new JoinRecordValue(asInts[3], asInts[4])); } } catch (NumberFormatException nfe) { // unparsible line. do nothing, since we skipped // the uses of asInts by throwing to here. } } }
public void reduce( Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String rKey = key.toString(); String[] keyTokens = rKey.split("\\$\\$"); String reduceType = keyTokens[0]; // Main reduce if (AccessReduceTypeEnum.MAIN.toString().equals(reduceType)) { long useTime = 0l; long pv = 0l; while (values.hasNext()) { pv++; Text rValue = values.next(); useTime += Long.parseLong(rValue.toString()); } Text outKey = new Text(rKey); Text outValue = new Text(pv + spliter + useTime); output.collect(outKey, outValue); } else { long num = 0l; while (values.hasNext()) { values.next(); num++; } Text outKey = new Text(rKey); Text outValue = new Text(String.valueOf(num)); output.collect(outKey, outValue); } }
@Override public void reduce( Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = ""; String header = ""; TreeMap<String, String> ciudades = new TreeMap<String, String>(); // Obtenemos los datos y los metemos a un treemap para que los ordene por ciudad while (values.hasNext()) { String[] tmp = values.next().toString().split(","); String ciudad = tmp[0]; String mes = tmp[1]; String temperatura = tmp[2]; String fecha = tmp[3]; ciudades.put(ciudad, tmp[1] + "," + tmp[2] + "," + tmp[3]); } // Recorremos las ciudades y vamos imprimiendo for (String ciudad : ciudades.keySet()) { header += ciudad + ",,"; String[] temporal = ciudades.get(ciudad).split(","); line += temporal[2] + "," + temporal[1] + ","; } if (c == 0) { // Imprimimos cabezera output.collect(new Text("Año,"), new Text(header)); c++; } output.collect(new Text(key.toString() + ","), new Text(line)); }
public void map( LongWritable lineid, Text nodetxt, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Node node = new Node(); node.fromNodeMsg(nodetxt.toString()); List<String> bubbles = node.getBubbles(); if (bubbles != null) { for (String bubble : bubbles) { String[] vals = bubble.split("\\|"); String minor = vals[0]; String minord = vals[1]; String dead = vals[2]; String newd = vals[3]; String newid = vals[4]; String extracov = vals[5]; output.collect( new Text(minor), new Text( Node.KILLLINKMSG + "\t" + minord + "\t" + dead + "\t" + newd + "\t" + newid)); output.collect(new Text(dead), new Text(Node.KILLMSG)); output.collect(new Text(newid), new Text(Node.EXTRACOV + "\t" + extracov)); reporter.incrCounter("Contrail", "bubblespopped", 1); } node.clearBubbles(); } output.collect(new Text(node.getNodeId()), new Text(node.toNodeMsg())); reporter.incrCounter("Contrail", "nodes", 1); }
public void map( LongWritable lineid, Text nodetxt, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Node node = new Node(); node.fromNodeMsg(nodetxt.toString()); for (String adj : Node.dirs) { node.setCanCompress(adj, false); TailInfo next = node.gettail(adj); if (next != null /*&& node.getBlackEdges() == null*/) { if (next.id.equals(node.getNodeId())) { continue; } reporter.incrCounter("Brush", "remotemark", 1); output.collect( new Text(next.id), new Text(Node.HASUNIQUEP + "\t" + node.getNodeId() + "\t" + adj)); } } output.collect(new Text(node.getNodeId()), new Text(node.toNodeMsg())); reporter.incrCounter("Brush", "nodes", 1); }
void search( Vector<Star> v1, Vector<Star> v2, BlockIDWritable key, OutputCollector<BlockIDWritable, PairWritable> output) throws IOException { for (int i = 0; i < v1.size(); i++) { for (int j = 0; j < v2.size(); j++) { Star star1 = v1.get(i); Star star2 = v2.get(j); // what is this margin about if (star1.margin && star2.margin) continue; double dist = star1.x * star2.x + star1.y * star2.y + star1.z * star2.z; if (dist > costheta) { p.set(star1, star2, dist); output.collect(key, p); p.set(star2, star1, dist); output.collect(key, p); // num += 2; } } } // end for i,j }
/** * Map method. * * @param offset samples starting from the (offset+1)th sample. * @param size the number of samples for this map * @param out output {ture->numInside, false->numOutside} * @param reporter */ public void map( LongWritable offset, LongWritable size, OutputCollector<BooleanWritable, LongWritable> out, Reporter reporter) throws IOException { final HaltonSequence haltonsequence = new HaltonSequence(offset.get()); long numInside = 0L; long numOutside = 0L; for (long i = 0; i < size.get(); ) { // generate points in a unit square final double[] point = haltonsequence.nextPoint(); // count points inside/outside of the inscribed circle of the square final double x = point[0] - 0.5; final double y = point[1] - 0.5; if (x * x + y * y > 0.25) { numOutside++; } else { numInside++; } // report status i++; if (i % 1000 == 0) { reporter.setStatus("Generated " + i + " samples."); } } // output map results out.collect(new BooleanWritable(true), new LongWritable(numInside)); out.collect(new BooleanWritable(false), new LongWritable(numOutside)); }
public void map( WritableComparable key, Writable value, OutputCollector<WritableComparable, Writable> collector, Reporter reporter) throws IOException { LOG.info("Start Map"); if (err != null) { throw err; } DecimalFormat df = new DecimalFormat("00000000000000000000"); collector.collect(new Text(tableName), new Text("")); for (long i = 0; i < 50000; i++) { long randNum = rand.nextLong(); Row.Key rowKey = new Row.Key(df.format(randNum)); Row row = new Row(rowKey); row.addCell("Col1", new Cell(Cell.Key.EMPTY_KEY, this.data)); ctable.put(row); if (i % 1000 == 0) { reporter.progress(); } if (i % 10000 == 0) { LOG.info("uploaded: " + i); } collector.collect(new Text(df.format(randNum)), new Text("")); } LOG.info("End Map"); }
@Override public void reduce( GenericKey key, Iterator<GenericValue> values, OutputCollector<GenericKey, GenericValue> output, Reporter reporter) throws IOException { if (key.getSecondary() < Preprocesser.MINIMUM_ID) { // vector output.collect(key, values.next()); if (values.hasNext()) assert false : "Vectors should not get grouped by combiner: " + key; } else { // addend reporter.progress(); int counter = 0; float sim = 0; HalfPair hp = null; while (values.hasNext()) { hp = (HalfPair) values.next().get(); sim += hp.getSimilarity(); if (counter++ % REPORTER_INTERVAL == 0) reporter.progress(); } if (hp != null) { payload.set(hp.getID(), sim); outValue.set(payload); output.collect(key, outValue); } else { assert false : "There is nothing to combine!"; } } }
/* * (non-Javadoc) * * @see org.apache.hadoop.mapred.Reducer#reduce(java.lang.Object, * java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, * org.apache.hadoop.mapred.Reporter) * * So the first key that the reducer receives is taken as the source * node.The paper says any arbitrary node can be chosen as the source * node. * * There is only one reducer so all the keys come to the same reducer. * The first key is the source. * * For source node the output is * <Key>\t<neighbor1>,<neighbor2>,<neighbor3>,<neighbor4>|distance|GRAY|source|1 * The distance is the distance from * the parent node. If the distance is modified then it means the parent * of the node changedThey source node is GRAY indicating that this is * the node from which the processing should startThe 'source' indicates * that this is the parent node. * * For other nodes the output is * * <Key>\t<neighbor1>,<neighbor2>,<neighbor3>,<neighbor4>|Integer.MAX_VALUE * |WHITE|null|0 * * The color of these nodes is white since they are ready to be * processed first or they are in the search frontier.The distance is * set to some arbitrary number since the node is not yet reached. * * The null indicates that there is no parent for this node. */ public void reduce( Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { // TODO Auto-generated method stub StringBuffer b = new StringBuffer(); while (values.hasNext()) b.append(values.next().toString()); if (!source) { output.collect(key, new Text(b.toString() + "|0|GRAY|source|1")); source = true; } else output.collect(key, new Text(b.toString() + "|Integer.MAX_VALUE|WHITE|null|0")); }
/** * MAPPING FUNCTION. * * @param key Text which is the document link. * @param value MapReduceNode which represents a document * @param output Map from each link URL or a special key (Text) and final page rank. * (DoubleWritable) */ @Override public void map( Text key, MapReduceNode value, OutputCollector<Text, DoubleWritable> output, Reporter reporter) throws IOException { DoubleWritable pageRank = value.getCurrentPageRank(); output.collect(key, pageRank); output.collect(special_key, pageRank); }
/** * @param key the input file * @param value population of candiates * @param output output to reducer * @param reporter for reporting purposes * @throws IOException */ @Override public void map( Text key, Population value, OutputCollector<Text, BooleanWritable> output, Reporter reporter) throws IOException { try { try { /** * evolve the population of candidates of size (popsize) untill there is a match to the * key (problem) */ solution = Population.evolveSolution(key.toString(), popsize, Population.generatedpopulation); /** return the result of the best solution found */ result = EvolutionStalker.populationUpdates(EvolutionStalker.output); /** if solution found, output the best solution to reducer */ if (solution.contentEquals(key.toString())) { found = true; // set found to true result = EvolutionStalker.populationUpdates(EvolutionStalker.output); output.collect( new Text(result), new BooleanWritable(found)); // emit best solution to reducer } else { result = EvolutionStalker.populationUpdates(EvolutionStalker.output); output.collect(new Text(result), new BooleanWritable(found)); } } catch (NumberFormatException e) { log.warning("Unable to parse key," + key + " " + value + "" + e); // return; } } catch (Throwable e) { log.severe( "unexpected exception in mapper for key," + "value " + key + "," + value + "" + e); if (e instanceof IOException) { throw (IOException) e; } if (e instanceof RuntimeException) { throw (RuntimeException) e; } throw new IOException("unknown Exception occured", e); } }
public void map( LongWritable arg0, Text value, OutputCollector<LongWritable, LongWritable> output, Reporter arg3) throws IOException { String v = value.toString(); int edgeCount = v.trim().split(" |,").length; // This is corresponding to the edges output.collect(new LongWritable(-1), new LongWritable(edgeCount)); // This is corresponding to the vertices. FOr each vertex we emit <-2,1> KV pair output.collect(new LongWritable(-2), new LongWritable(1)); }
@SuppressWarnings("unchecked") public void reduce( Text key, Iterator<Text> values, OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException { OutputCollector collector = multipleOutputs.getCollector("station", key.toString().replace("-", ""), reporter); while (values.hasNext()) { collector.collect(NullWritable.get(), values.next()); } }
public void reduce( IntWritable key, Iterator<HITSNode> values, OutputCollector<IntWritable, HITSNode> output, Reporter reporter) throws IOException { // ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable(); adjList.clear(); // System.out.println(key.toString()); // System.out.println(adjList.toString()); while (values.hasNext()) { valIn = values.next(); ArrayListOfIntsWritable adjListIn = valIn.getAdjacencyList(); adjListIn.trimToSize(); adjList.addAll(adjListIn.getArray()); // System.out.println(adjList.toString()); } valOut.setType(HITSNode.TYPE_AUTH_COMPLETE); valOut.setHARank((float) 0.0); valOut.setAdjacencyList(adjList); valOut.setNodeId(key.get()); output.collect(key, valOut); }
@Override public void reduce( Text moviePair, Iterator<Text> ratings, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { ArrayList<Double> ratings1 = new ArrayList<Double>(); ArrayList<Double> ratings2 = new ArrayList<Double>(); double numRatings = 0; double totRating1 = 0, totrating2 = 0; while (ratings.hasNext()) { String[] rating = ratings.next().toString().split(","); ratings1.add(Double.parseDouble(rating[0])); ratings2.add(Double.parseDouble(rating[1])); totRating1 += Double.parseDouble(rating[0]); totrating2 += Double.parseDouble(rating[1]); numRatings += 1; } double avgRating1 = totRating1 / numRatings; double avgRating2 = totrating2 / numRatings; double sum1 = 0.0, sum2 = 0.0, sumProduct = 0.0; for (int i = 0; i < numRatings; i++) { sum1 += Math.pow((ratings1.get(i) - avgRating1), 2); sum2 += Math.pow((ratings2.get(i) - avgRating2), 2); sumProduct += (ratings1.get(i) - avgRating1) * (ratings2.get(i) - avgRating2); } double corr = sumProduct / (Math.sqrt(sum1) * Math.sqrt(sum2)); System.out.println(sum1 + "," + sum2 + "," + sumProduct + "," + corr); if (Double.isNaN(corr)) { corr = 0; } output.collect(moviePair, new Text(corr + "")); }
public void reduce( Text key, Iterator<NutchWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { StringBuffer dump = new StringBuffer(); dump.append("\nRecno:: ").append(recNo++).append("\n"); dump.append("URL:: " + key.toString() + "\n"); while (values.hasNext()) { Writable value = values.next().get(); // unwrap if (value instanceof CrawlDatum) { dump.append("\nCrawlDatum::\n").append(((CrawlDatum) value).toString()); } else if (value instanceof Content) { dump.append("\nContent::\n").append(((Content) value).toString()); } else if (value instanceof ParseData) { dump.append("\nParseData::\n").append(((ParseData) value).toString()); } else if (value instanceof ParseText) { dump.append("\nParseText::\n").append(((ParseText) value).toString()); } else if (LOG.isWarnEnabled()) { LOG.warn("Unrecognized type: " + value.getClass()); } } output.collect(key, new Text(dump.toString())); }
@Override public void reduce( IntWritable key, Iterator<Text> values, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { List<Text> list = new ArrayList<Text>(); while (values.hasNext()) { // Iteratorの要素は参照が再利用される実装であるため、別途Textをnewする必要がある。 list.add(new Text(values.next())); } // ageでソート Collections.sort( list, new Comparator<Text>() { private EmployeeRecordParser parser1 = new EmployeeRecordParser(); private EmployeeRecordParser parser2 = new EmployeeRecordParser(); @Override public int compare(Text o1, Text o2) { parser1.parse(o1); parser2.parse(o2); return Integer.valueOf(parser1.getEmployeeAge()).compareTo(parser2.getEmployeeAge()); } }); for (Text value : list) { output.collect(key, value); } }
@Override public void map( Object key, Text value, OutputCollector<IntWritable, Text> output, Reporter reporter) throws IOException { parser.parse(value); output.collect(new IntWritable(parser.getDepartmentId()), value); }
public void reduce( MyType key, Iterator<Text> iter, OutputCollector<MyType, Text> oc, Reporter reporter) throws IOException { while (iter.hasNext()) { oc.collect(null, iter.next()); } }
// specify input and out keys public void map( LongWritable key, Text value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { String line = value.toString(); // define new variable to be string ArrayList<Integer> range = new ArrayList<Integer>(); for (int i = 2000; i <= 2010; i++) { range.add(i); } // String[] inputs = line.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)"); String[] inputs = line.split(","); try { int year = Integer.parseInt(inputs[165]); if (range.contains(year)) { String dur = inputs[3]; String artist_name = inputs[2]; String song_title = inputs[1]; String final_input = artist_name + ',' + dur + ',' + song_title; Final_Value.set(final_input); output.collect(Final_Value, dummy); } } catch (NumberFormatException e) { // do nothing } }
/** * {@inheritDoc} * * @see org.apache.hadoop.mapred.Mapper#map(java.lang.Object, java.lang.Object, * org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter) */ @Override public void map( LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { String line = m_caseSensitive ? value.toString() : value.toString().toLowerCase(); for (String pattern : m_patternsToSkip) { line = line.replaceAll(pattern, ""); } StringTokenizer tokenizer = new StringTokenizer(line); while (tokenizer.hasMoreTokens()) { m_word.set(tokenizer.nextToken()); output.collect(m_word, ONE); reporter.incrCounter(Counters.INPUT_WORDS, 1); } if ((++m_numRecords % 100) == 0) { reporter.setStatus( "Finished processing " + m_numRecords + " records " + "from the input file: " + m_inputFile); } }
/** Run a FileOperation */ public void map( Text key, PolicyInfo policy, OutputCollector<WritableComparable, Text> out, Reporter reporter) throws IOException { this.reporter = reporter; try { LOG.info("Raiding file=" + key.toString() + " policy=" + policy); Path p = new Path(key.toString()); FileStatus fs = p.getFileSystem(jobconf).getFileStatus(p); st.clear(); RaidNode.doRaid(jobconf, policy, fs, st, reporter); ++succeedcount; reporter.incrCounter(Counter.PROCESSED_BLOCKS, st.numProcessedBlocks); reporter.incrCounter(Counter.PROCESSED_SIZE, st.processedSize); reporter.incrCounter(Counter.META_BLOCKS, st.numMetaBlocks); reporter.incrCounter(Counter.META_SIZE, st.metaSize); reporter.incrCounter(Counter.FILES_SUCCEEDED, 1); } catch (IOException e) { ++failcount; reporter.incrCounter(Counter.FILES_FAILED, 1); String s = "FAIL: " + policy + ", " + key + " " + StringUtils.stringifyException(e); out.collect(null, new Text(s)); LOG.info(s); } finally { reporter.setStatus(getCountString()); } }
public void map( LongWritable key, Text t, OutputCollector<IntWritable, PageRankNode> output, Reporter reporter) throws IOException { String[] arr = t.toString().trim().split("\\s+"); nid.set(Integer.parseInt(arr[0])); if (arr.length == 1) { node.setNodeId(Integer.parseInt(arr[0])); node.setAdjacencyList(new ArrayListOfIntsWritable()); } else { node.setNodeId(Integer.parseInt(arr[0])); int[] neighbors = new int[arr.length - 1]; for (int i = 1; i < arr.length; i++) { neighbors[i - 1] = Integer.parseInt(arr[i]); } node.setAdjacencyList(new ArrayListOfIntsWritable(neighbors)); } reporter.incrCounter("graph", "numNodes", 1); reporter.incrCounter("graph", "numEdges", arr.length - 1); if (arr.length > 1) { reporter.incrCounter("graph", "numActiveNodes", 1); } output.collect(nid, node); }
public void map( LongWritable key, Text value, OutputCollector<IntWritable, HITSNode> output, Reporter reporter) throws IOException { ArrayListOfIntsWritable links = new ArrayListOfIntsWritable(); String line = ((Text) value).toString(); StringTokenizer itr = new StringTokenizer(line); if (itr.hasMoreTokens()) { int curr = Integer.parseInt(itr.nextToken()); if (stopList.contains(curr)) { return; } valOut.setAdjacencyList(links); valOut.setHARank((float) 1.0); valOut.setType(HITSNode.TYPE_AUTH_COMPLETE); } while (itr.hasMoreTokens()) { keyOut.set(Integer.parseInt(itr.nextToken())); valOut.setNodeId(keyOut.get()); // System.out.println(keyOut.toString() + ", " + // valOut.toString()); if (!(stopList.contains(keyOut.get()))) { output.collect(keyOut, valOut); } } // emit mentioned mentioner -> mentioned (mentioners) in links // emit mentioner mentioned -> mentioner (mentions) outlinks // emit mentioned a // emit mentioner 1 }
public void reduce( CompositeKey key, Iterator<PairOfLongInt> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { // note that values are sorted (by using MR's secondary sort) // below, builder will generate: // CustoerID,Date1,Amount1,Date2,Amount2,...,DateN,AmountN // where Date1 <= Date2 <= ... <= DateN StringBuilder builder = new StringBuilder(); builder.append(key.toString()); while (values.hasNext()) { builder.append(","); PairOfLongInt pair = values.next(); long timestamp = pair.getLeftElement(); // date as milliseconds String date = DateUtil.getDateAsString(timestamp); builder.append(date); // date as String builder.append(","); builder.append(pair.getRightElement()); // amount } output.collect(null, new Text(builder.toString())); } // reduce