public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { String keyS = key.toString(); if (keyS.startsWith("O") || keyS.startsWith("P") || keyS.startsWith("S")) { String sum = new String(); for (Text val : values) { sum += (" " + val.toString()); } // String subKey = keyS.substring(0,keyS.length()-1); // Text t = new Text(); // t.set(subKey); result.set(sum); context.write(key, result); } if (keyS.startsWith("L")) { // String [] keyIdS = keyS.substring(1).split("[+]"); result.set(" "); context.write(key, result); // String KeyIdS1 = keyIdS[1]; // result.set(KeyIdS1); // context.write(key, result); // String KeyIdS2 = keyIdS[2]; // result.set(KeyIdS2); // context.write(key, result); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String cur_file = ((FileSplit) context.getInputSplit()).getPath().getParent().getParent().getName(); String train_file = context.getConfiguration().get("train_file"); if (cur_file.equals(train_file)) { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); myKey.set(word); myVal.set(f_id); context.write(myKey, myVal); } else { StringTokenizer st = new StringTokenizer(value.toString()); String word = st.nextToken(); String f_id = st.nextToken(); StringBuilder builder = new StringBuilder(dlt); while (st.hasMoreTokens()) { String filename = st.nextToken(); String tf_idf = st.nextToken(); builder.append(filename); builder.append(dlt); builder.append(tf_idf); builder.append("\t"); } myKey.set(word); myVal.set(builder.toString()); context.write(myKey, myVal); } }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); // skip the header if (line.startsWith(OTPConsts.HEADER_START)) return; // two possible input format: 1. prediction input, 2. query input String k; String v; String[] splits = line.split(","); if (line.startsWith("P")) { // handle prediction input k = extractKey(splits, 1, 5); v = splits[6] + OTPConsts.COMMA + splits[7] + OTPConsts.COMMA + splits[8] + OTPConsts.COMMA + splits[9]; context.write(new Text(k), new Text(v)); } else { k = extractKey(splits, 0, -1); v = "Q"; context.write(new Text(k), new Text(v)); } }
@Override public void reduce(IntWritable nid, Iterable<PageRankNode> values, Context context) throws IOException, InterruptedException { int massMessages = 0; // Remember, PageRank mass is stored as a log prob. float mass = Float.NEGATIVE_INFINITY; for (PageRankNode n : values) { if (n.getType() == PageRankNode.Type.Structure) { // Simply pass along node structure. context.write(nid, n); } else { // Accumulate PageRank mass contributions. mass = sumLogProbs(mass, n.getPageRank()); massMessages++; } } // Emit aggregated results. if (massMessages > 0) { intermediateMass.setNodeId(nid.get()); intermediateMass.setType(PageRankNode.Type.Mass); intermediateMass.setPageRank(mass); context.write(nid, intermediateMass); } }
@Override public void map(Object key, Text value, Context context) throws IOException, InterruptedException { // Parse the input string into a nice map Map<String, String> parsed = MRDPUtils.transformXmlToMap(value.toString()); String userId = parsed.get("UserId"); if (userId == null) { return; } String userInformation = userIdToInfo.get(userId); // If the user information is not null, then output if (userInformation != null) { outvalue.set(userInformation); context.write(value, outvalue); } else if (joinType.equalsIgnoreCase("leftouter")) { // If we are doing a left outer join, output the record with an // empty value context.write(value, new Text("")); } }
@Override public void reduce(IntWritable nid, Iterable<PersonalizedPageRankNode> values, Context context) throws IOException, InterruptedException { int massMessages = 0; // Remember, PageRank mass is stored as a log prob. float[] mass = new float[sources.size()]; for (int i = 0; i < sources.size(); i++) { mass[i] = Float.NEGATIVE_INFINITY; } for (PersonalizedPageRankNode n : values) { if (n.getType() == PersonalizedPageRankNode.Type.Structure) { // Simply pass along node structure. context.write(nid, n); } else { // Accumulate PageRank mass contributions. for (int j = 0; j < sources.size(); j++) { mass[j] = sumLogProbs(mass[j], n.getPageRank(j)); } massMessages++; } } // Emit aggregated results. if (massMessages > 0) { intermediateMass.setNodeId(nid.get()); intermediateMass.setType(PersonalizedPageRankNode.Type.Mass); for (int i = 0; i < sources.size(); i++) { intermediateMass.setPageRank(i, mass[i]); } context.write(nid, intermediateMass); } }
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { for (Text val : values) { treeset.add(val.toString()); } int treesize = treeset.size(); int limit = treesize - 10; Iterator itr = treeset.iterator(); while (itr.hasNext()) { String[] va; c++; if (c < 11) { va = itr.next().toString().split("!"); Put pa = new Put(va[1].getBytes()); // System.out.println(va[0]+" and "+va[1]); pa.add(Bytes.toBytes("stock"), Bytes.toBytes("volatility"), Bytes.toBytes(va[0])); context.write(new ImmutableBytesWritable(va[1].getBytes()), pa); } else if (c > limit) { va = itr.next().toString().split("!"); // System.out.println(va[0]+" and "+va[1]); Put pa = new Put(va[1].getBytes()); pa.add(Bytes.toBytes("stock"), Bytes.toBytes("volatility"), Bytes.toBytes(va[0])); context.write(new ImmutableBytesWritable(va[1].getBytes()), pa); } else { itr.next(); } } }
@Override public void reduce(IntWritable key, Iterable<WriteableData> values, Context context) throws IOException, InterruptedException { DaalContext daalContext = new DaalContext(); /* Create an algorithm to compute a sparse variance-covariance matrix on the master node */ DistributedStep2Master covarianceSparseMaster = new DistributedStep2Master(daalContext, Double.class, Method.fastCSR); for (WriteableData value : values) { PartialResult pr = (PartialResult) value.getObject(daalContext); covarianceSparseMaster.input.add(DistributedStep2MasterInputId.partialResults, pr); } /* Compute a sparse variance-covariance matrix on the master node */ covarianceSparseMaster.compute(); /* Finalize computations and retrieve the results */ Result result = covarianceSparseMaster.finalizeCompute(); HomogenNumericTable covariance = (HomogenNumericTable) result.get(ResultId.covariance); HomogenNumericTable mean = (HomogenNumericTable) result.get(ResultId.mean); context.write(new IntWritable(0), new WriteableData(covariance)); context.write(new IntWritable(1), new WriteableData(mean)); daalContext.dispose(); }
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; boolean filtered = true; String keypair = key.toString(); String[] keys = keypair.split(" "); if (keys.length == 1) { filtered = false; } else { if (keys[0].equals("*") || keys[1].equals("*")) { filtered = false; } } if (!filtered) { for (IntWritable val : values) { sum += val.get(); } context.write(key, new IntWritable(sum)); return; } for (IntWritable val : values) { if (val.get() == -1) { filtered = false; continue; } sum += val.get(); } // filter non-needed events if (filtered) return; context.write(key, new IntWritable(sum)); }
/** * Map method. * * @param offset samples starting from the (offset+1)th sample. * @param size the number of samples for this map * @param context output {ture->numInside, false->numOutside} */ public void map(LongWritable offset, LongWritable size, Context context) throws IOException, InterruptedException { final HaltonSequence haltonsequence = new HaltonSequence(offset.get()); long numInside = 0L; long numOutside = 0L; for (long i = 0; i < size.get(); ) { // generate points in a unit square final double[] point = haltonsequence.nextPoint(); // count points inside/outside of the inscribed circle of the square final double x = point[0] - 0.5; final double y = point[1] - 0.5; if (x * x + y * y > 0.25) { numOutside++; } else { numInside++; } // report status i++; if (i % 1000 == 0) { context.setStatus("Generated " + i + " samples."); } } // output map results context.write(new BooleanWritable(true), new LongWritable(numInside)); context.write(new BooleanWritable(false), new LongWritable(numOutside)); }
/** Called for every record in the data */ @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /** Skip enormous documents, due to memory problems and since regex cannot handle them. */ if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) { context.getCounter(ProcessingTime.SKIPPED).increment(1); return; } /** Parse document and measure time */ t1 = System.nanoTime(); Spinn3rDocument d = new Spinn3rDocument(value.toString()); t2 = System.nanoTime(); context.getCounter(ProcessingTime.PARSING).increment(t2 - t1); /** Return only those documents that satisfy search conditions */ t1 = System.nanoTime(); t = filter.documentSatisfies(d); t2 = System.nanoTime(); context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1); /** Output if satisfies */ if (t) { if (cmdMap.hasOption("formatF5")) { context.write(new Text(d.toStringF5()), NullWritable.get()); } else { context.write(new Text(d.toString()), NullWritable.get()); } } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 获取输入文件的全路径和名称 String pathName = ((FileSplit) context.getInputSplit()).getPath().toString(); if (pathName.contains("data.txt")) { String values[] = value.toString().split("\t"); if (values.length < 3) { // data数据格式不规范,字段小于3,抛弃数据 return; } else { // 数据格式规范,区分标识为1 TextPair tp = new TextPair(new Text(values[1]), new Text("1")); context.write(tp, new Text(values[0] + "\t" + values[2])); } } if (pathName.contains("info.txt")) { String values[] = value.toString().split("\t"); if (values.length < 2) { // data数据格式不规范,字段小于2,抛弃数据 return; } else { // 数据格式规范,区分标识为0 TextPair tp = new TextPair(new Text(values[0]), new Text("0")); context.write(tp, new Text(values[1])); } } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // System.out.println("in mapper, input "+ key + " " + value + ";"); // userRow = null; userRow = value.toString().split("\\s"); if (userRow.length == 1) { userRow = null; return; } // friendList = null; friendList = userRow[1].split(","); for (i = 0; i < friendList.length; i++) { keyUser.set(new Text(friendList[i])); for (j = 0; j < friendList.length; j++) { if (j == i) { continue; } suggTuple.set(friendList[j] + ",1"); context.write(keyUser, suggTuple); // System.out.println(keyUser + ",(" + suggTuple + ")"); } existingFriend.set(userRow[0] + ",-1"); context.write(keyUser, existingFriend); // System.out.println(keyUser + ",(" + existingFriend + ")"); } /*DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date = new Date(); System.out.println("Mapper done at: " + dateFormat.format(date)); //2014/08/06 15:59:48*/ }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { NodeWritable n = new NodeWritable(value.toString().trim()); // Emit node to carry forward the Model. NodeWritable p = new NodeWritable(value.toString().trim()); p.setIsNode(new Text("YES")); p.setIsInList(new Text("***")); context.write(new Text(p.getNid().toString()), p); // For Each OutLinks Emit This Node for (NodeWritable x : n.getOuts()) { if (!x.getNid().toString().equals(n.getNid().toString())) { n.setIsInList(new Text("YES")); n.setIsNode(new Text("NO")); context.write(new Text(x.getNid().toString()), n); } } // For Each Inlinks Emit This Node for (NodeWritable x : n.getIns()) { if (!x.getNid().toString().equals(n.getNid().toString())) { n.setIsInList(new Text("NO")); n.setIsNode(new Text("NO")); context.write(new Text(x.getNid().toString()), n); } } }
@Override protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx) throws IOException, InterruptedException { Vector similarities = similaritiesWritable.get(); // For performance, the creation of transposedPartial is moved out of the while loop and it is // reused inside Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1); TopElementsQueue topKQueue = new TopElementsQueue(maxSimilaritiesPerRow); Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); MutableElement top = topKQueue.top(); double candidateValue = nonZeroElement.get(); if (candidateValue > top.get()) { top.setIndex(nonZeroElement.index()); top.set(candidateValue); topKQueue.updateTop(); } transposedPartial.setQuick(row.get(), candidateValue); ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial)); transposedPartial.setQuick(row.get(), 0.0); } Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow); for (Vector.Element topKSimilarity : topKQueue.getTopElements()) { topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get()); } ctx.write(row, new VectorWritable(topKSimilarities)); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String val = value.toString(); String label = ""; String input_label = ""; if (val.contains("Processed")) context.getCounter(Driver.Progress.Completion).increment(1); if (val.contains("Training")) { if (!val.split(" ")[1].contains(":")) { label = val.split(" ")[1]; // FIRST LABEL for (int i = 2; i < val.split(" ").length; i++) { input_label = input_label + " " + val.split(" ")[i]; } input_label = val.split(" ")[0] + "#" + label + " " + input_label; context.write( new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("g") + 1) + label), new Text(input_label)); } } else { if (!val.split(" ")[2].contains(":")) { label = val.split(" ")[2]; for (int i = 1; i < val.split(" ").length; i++) { input_label = input_label + val.split(" ")[i]; } input_label = val.split(" ")[0] + "#" + label + " " + input_label; context.write( new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("t") + 1) + label), new Text(input_label)); } } } // map ends
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] valueArr = value.toString().split(MRConstants.SEPERATOR_IN); int loginTime = StringUtil.convertInt(valueArr[Index_LoginTime], 0); int onlineTime = StringUtil.convertInt(valueArr[Index_OnlineTime], 0); Calendar calendar = Calendar.getInstance(); Date date = ConfigManager.getInitialDate(context.getConfiguration()); if (date != null) { calendar.setTime(date); } calendar.add(Calendar.HOUR_OF_DAY, -1); // 默认取调度初始化时间的前一个小时 calendar.set(Calendar.MINUTE, 0); calendar.set(Calendar.SECOND, 0); int startPoint = (int) (calendar.getTimeInMillis() / 1000); // 找出起始时间点 if (loginTime > 0 && onlineTime > 0) { for (int i = 0; i < 12; i++) { int point = startPoint + i * 5 * 60; // 5分钟为步长 if (loginTime <= point && onlineTime > (point - loginTime)) { // 登陆时间在point点之前且在线时长大于point点-登陆时间记为一次在线 /* * ACU/PCU 计算调整为不分渠道统计,只按区服统计 * 在分区服统计的同时,另加一个不分区服的全量统计 String[] keyFields = new String[] { valueArr[Index_Appid], valueArr[Index_Platform], valueArr[Index_Channel], valueArr[Index_gameServer], point + "" };*/ // 分区服的统计 String[] keyFields = new String[] { valueArr[Index_Appid], valueArr[Index_Platform], valueArr[Index_gameServer], point + "" }; mapKeyObj.setOutFields(keyFields); context.write(mapKeyObj, one); // 不分区服的统计,gameServer 以 '-' 代替 String[] keyFieldsAll = new String[] { valueArr[Index_Appid], valueArr[Index_Platform], MRConstants.INVALID_PLACE_HOLDER_CHAR, point + "" }; mapKeyObj.setOutFields(keyFieldsAll); context.write(mapKeyObj, one); } } } }
@Override public void cleanup(Context context) throws IOException, InterruptedException { context.write(new IntWritable(-1), new FloatWritable((float) loss)); for (int i = 0; i < gradient.length; i++) { if (gradient[i] != (float) 0.0) { context.write(new IntWritable(i), new FloatWritable((float) gradient[i])); } } }
@Override protected void cleanup(Context ctx) throws IOException, InterruptedException { super.cleanup(ctx); // dirty trick ctx.write(new IntWritable(NORM_VECTOR_MARKER), new VectorWritable(norms)); ctx.write( new IntWritable(NUM_NON_ZERO_ENTRIES_VECTOR_MARKER), new VectorWritable(nonZeroEntries)); ctx.write(new IntWritable(MAXVALUE_VECTOR_MARKER), new VectorWritable(maxValues)); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { try { String line = value.toString(); String[] fields = line.split(FIELD_TAB_SEPARATOR, -1); if (filePath.toLowerCase().contains("play") && fields.length > PlayFormatEnum.MEDIA_TYPE_ID.ordinal()) { String playETLStr = getPlayFormatStr(line); String[] playField = playETLStr.split(FIELD_TAB_SEPARATOR, -1); String infohashStr = null; if (playField[PlayFormatEnum.MEDIA_TYPE_ID.ordinal()].trim().equals("1") || playField[PlayFormatEnum.URL.ordinal()].contains("subject/play")) { infohashStr = playField[PlayFormatEnum.INFOHASH_ID.ordinal()].toUpperCase(); } else { infohashStr = playField[PlayFormatEnum.MEDIA_ID.ordinal()]; } if (null != infohashStr && playField.length == PlayFormatEnum.SEIDCOUNT.ordinal() + 1) { keyText.set(infohashStr.trim()); valueText.set(playETLStr); context.write(keyText, valueText); } } else { String dimLine = ""; String dimInfo = null; if (filePath.toLowerCase().contains("infohash")) { if (fields.length > DMInfoHashEnum.MEDIA_ID.ordinal()) { dimLine = line.trim(); dimInfo = fields[DMInfoHashEnum.IH.ordinal()]; } } else if (filePath.toLowerCase().contains("mediainfo")) { StringBuilder dimStrSb = new StringBuilder(); dimStrSb.append(DEFAULT_INFOHASH + FIELD_TAB_SEPARATOR); dimStrSb.append(DEFAULT_SERIAL_ID + FIELD_TAB_SEPARATOR); dimStrSb.append(line.trim()); dimLine = dimStrSb.toString(); dimInfo = fields[DMInfoHashEnum.IH.ordinal()]; } if (null != dimInfo && !dimInfo.isEmpty()) { String mediaInfo = dimInfo.trim().toUpperCase(); keyText.set(mediaInfo); valueText.set(dimLine); context.write(keyText, valueText); } } } catch (Exception e) { multipleOutputs.write( new Text(null == e.getMessage() ? ("error:" + filePath) : e.getMessage()), new Text(value.toString()), "_error/part"); e.printStackTrace(); } }
public void map(Object key, Text value, Context context) throws IOException, InterruptedException { FlightDetails flight = new FlightDetails(value.toString()); Text origin = flight.getOrigin(); Text destination = flight.getDest(); context.write(origin, new Text("Out," + destination.toString())); context.write(destination, new Text("In," + origin.toString())); }
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] items = value.toString().split(","); outKey.set("u," + items[0]); outText.set(items[2]); context.write(outKey, outText); outKey.set("v," + items[1]); outText.set(items[2]); context.write(outKey, outText); }
public void reduce(GFKey key, Iterable<PEIWritable> values, Context context) throws IOException, InterruptedException { // For a particular key ... process all records and output what we would have expected in this // concKnownKeys test // Note that we either // 1. do a single create // 2. create + update // 3. create + destroy // look at all ops ... and output either // 1. create // 2. create (with value from update) // 3. do nothing (overall result is destroy, so do not create the entry in the gemfire // validation region String keyStr = (String) key.getKey(); ValueHolder updateValue = null; ValueHolder createValue = null; boolean destroyed = false; System.out.println("KnownKeysMRv2.reduce() invoked with " + keyStr); for (PEIWritable value : values) { PersistedEventImpl event = value.getEvent(); Operation op = event.getOperation(); ValueHolder vh = null; if (op.isDestroy()) { destroyed = true; } else { try { vh = (ValueHolder) event.getDeserializedValue(); } catch (ClassNotFoundException e) { System.out.println( "KnownKeysMRv2.map() caught " + e + " : " + TestHelper.getStackTrace(e)); } if (op.isUpdate()) { updateValue = vh; } else { createValue = vh; } } System.out.println( "KnownKeysMRv2.reduce() record: " + op.toString() + ": key = " + keyStr + " and op " + op.toString()); } if (!destroyed) { if (updateValue != null) { context.write(key.getKey(), updateValue); } else { context.write(key.getKey(), createValue); } } }
@Override protected void map(UndirectedEdgeWithDegrees edge, Object value, Context ctx) throws IOException, InterruptedException { VertexWithDegree first = edge.getFirstVertexWithDegree(); VertexWithDegree second = edge.getSecondVertexWithDegree(); if (first.getDegree() < second.getDegree()) { ctx.write(first.getVertex(), second.getVertex()); } else { ctx.write(second.getVertex(), first.getVertex()); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] strTemp = value.toString().split("\t"); if (path.toString().contains("hs_log")) { context.write(new Text(strTemp[3]), new Text("1" + "\t" + strTemp[0])); } else if (path.toString().contains("1_history_mac")) { context.write(new Text(strTemp[0]), new Text("2")); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int start = value.find("<title>"); int end = value.find("</title>", start); if (start == -1 || end == -1) return; start += 7; String title = Text.decode(value.getBytes(), start, end - start); title = title.replace(' ', '_'); Text titleKey = new Text(title); String outLinks = ""; start = value.find("<text"); if (start == -1) { context.write(titleKey, new Text(outLinks)); return; } start = value.find(">", start); if (start == -1) { context.write(titleKey, new Text(outLinks)); return; } end = value.find("</text>"); if (end == -1) { context.write(titleKey, new Text(outLinks)); return; } start += 1; String text = Text.decode(value.getBytes(), start, end - start); Matcher wikiLinksMatcher = patterLinks.matcher(text); LinkedList<String> duplicateRemover = new LinkedList<String>(); while (wikiLinksMatcher.find()) { String outLinkPage = wikiLinksMatcher.group(); outLinkPage = linksCatcher(outLinkPage); if (outLinkPage != null) { if (!outLinkPage.isEmpty()) { outLinkPage = outLinkPage.trim(); duplicateRemover.add(outLinkPage); } } } LinkedHashSet<String> duplicatePruning = new LinkedHashSet<String>(duplicateRemover); LinkedList<String> finalList = new LinkedList<String>(duplicatePruning); boolean first = true; for (String values : finalList) { if (!values.equals(title)) { if (!first) outLinks += "\t"; outLinks += values; first = false; } } context.write(titleKey, new Text(outLinks)); }
@Override public void reduce(NullWritable key, Iterable<TextArrayWritable> values, Context context) throws IOException, InterruptedException { Integer sum, mean, max, min, var; // TODO for (TextArrayWritable val : values) { Text[] pair = (Text[]) val.toArray(); String title = pair[0].toString(); Integer count = Integer.parseInt(pair[1].toString()); countToTitleMap.add(new Pair<Integer, String>(count, title)); if (countToTitleMap.size() > this.N) { countToTitleMap.remove(countToTitleMap.first()); } } // calculate the statistics Integer[] counts = new Integer[this.N]; int j = 0; for (Pair<Integer, String> pair : countToTitleMap) { counts[j++] = pair.first; } sum = 0; min = Integer.MAX_VALUE; max = Integer.MIN_VALUE; for (int i = 0; i < this.N; i++) { sum += counts[i]; min = Math.min(min, counts[i]); max = Math.max(max, counts[i]); } mean = sum / this.N; var = 0; for (int i = 0; i < this.N; i++) { var += ((counts[i] - mean) * (counts[i] - mean)); } var /= this.N; context.write(new Text("Mean"), new IntWritable(mean)); context.write(new Text("Sum"), new IntWritable(sum)); context.write(new Text("Min"), new IntWritable(min)); context.write(new Text("Max"), new IntWritable(max)); context.write(new Text("Var"), new IntWritable(var)); }
@Override protected void reduce(K row, Iterable<Put> vals, Context context) throws IOException, InterruptedException { // Using HeapSize to create an upper bound on the memory size of // the puts and flush some portion of the content while looping. This // flush could result in multiple Puts for a single rowkey. That is // acceptable because Combiner is run as an optimization and it's not // critical that all Puts are grouped perfectly. long threshold = context.getConfiguration().getLong("putcombiner.row.threshold", 1L * (1 << 30)); int cnt = 0; long curSize = 0; Put put = null; Map<byte[], List<Cell>> familyMap = null; for (Put p : vals) { cnt++; if (put == null) { put = p; familyMap = put.getFamilyCellMap(); } else { for (Entry<byte[], List<Cell>> entry : p.getFamilyCellMap().entrySet()) { List<Cell> cells = familyMap.get(entry.getKey()); List<Cell> kvs = (cells != null) ? (List<Cell>) cells : null; for (Cell cell : entry.getValue()) { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); curSize += kv.heapSize(); if (kvs != null) { kvs.add(kv); } } if (cells == null) { familyMap.put(entry.getKey(), entry.getValue()); } } if (cnt % 10 == 0) context.setStatus("Combine " + cnt); if (curSize > threshold) { LOG.info(String.format("Combined %d Put(s) into %d.", cnt, 1)); context.write(row, put); put = null; cnt = 0; } } } if (put != null) { LOG.info(String.format("Combined %d Put(s) into %d.", cnt, 1)); context.write(row, put); } }
/** * Collect all column values for the same Row. RowKey may be different if indexes are involved, so * it writes a separate record for each unique RowKey * * @param context Current mapper context * @param tableName Table index in tableNames list * @param lkv List of KV values that will be combined in a single ImmutableBytesWritable * @throws IOException * @throws InterruptedException */ private void writeAggregatedRow(Context context, String tableName, List<KeyValue> lkv) throws IOException, InterruptedException { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); DataOutputStream outputStream = new DataOutputStream(bos); ImmutableBytesWritable outputKey = null; if (!lkv.isEmpty()) { for (KeyValue cell : lkv) { if (outputKey == null || Bytes.compareTo( outputKey.get(), outputKey.getOffset(), outputKey.getLength(), cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()) != 0) { // This a the first RowKey or a different from previous if (outputKey != null) { // It's a different RowKey, so we need to write it ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray()); outputStream.close(); context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray); } outputKey = new ImmutableBytesWritable( cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); bos = new ByteArrayOutputStream(1024); outputStream = new DataOutputStream(bos); } /* The order of aggregation: type, index of column, length of value, value itself */ int i = findIndex(cell); if (i == -1) { // That may happen when we load only local indexes. Since KV pairs for both // table and local index are going to the same physical table at that point // we skip those KVs that are not belongs to loca index continue; } outputStream.writeByte(cell.getTypeByte()); WritableUtils.writeVInt(outputStream, i); WritableUtils.writeVInt(outputStream, cell.getValueLength()); outputStream.write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); } ImmutableBytesWritable aggregatedArray = new ImmutableBytesWritable(bos.toByteArray()); outputStream.close(); context.write(new TableRowkeyPair(tableName, outputKey), aggregatedArray); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String strId = ""; String strBody = ""; // Parse the xml and read data (page id and article body) // Using XOM library Builder builder = new Builder(); try { Document doc = builder.build(value.toString(), null); Nodes nodeId = doc.query("//eecs485_article_id"); strId = nodeId.get(0).getChild(0).getValue(); Nodes nodeBody = doc.query("//eecs485_article_body"); strBody = nodeBody.get(0).getChild(0).getValue(); } catch (ParsingException ex) { System.out.println("Not well-formed."); System.out.println(ex.getMessage()); } catch (IOException ex) { System.out.println("io exception"); } // Tokenize document body Pattern pattern = Pattern.compile("\\w+"); Matcher matcher = pattern.matcher(strBody); while (matcher.find()) { // Write the parsed token // key = term, docid value = 1 context.write(new Text(matcher.group() + "," + strId), one); } }