@Override protected void map(IntWritable column, VectorWritable occurrenceVector, Context ctx) throws IOException, InterruptedException { Vector.Element[] occurrences = Vectors.toArray(occurrenceVector); Arrays.sort(occurrences, BY_INDEX); int cooccurrences = 0; int prunedCooccurrences = 0; for (int n = 0; n < occurrences.length; n++) { Vector.Element occurrenceA = occurrences[n]; Vector dots = new RandomAccessSparseVector(Integer.MAX_VALUE); for (int m = n; m < occurrences.length; m++) { Vector.Element occurrenceB = occurrences[m]; if (threshold == NO_THRESHOLD || consider(occurrenceA, occurrenceB)) { dots.setQuick( occurrenceB.index(), similarity.aggregate(occurrenceA.get(), occurrenceB.get())); cooccurrences++; } else { prunedCooccurrences++; } } ctx.write(new IntWritable(occurrenceA.index()), new VectorWritable(dots)); } ctx.getCounter(Counters.COOCCURRENCES).increment(cooccurrences); ctx.getCounter(Counters.PRUNED_COOCCURRENCES).increment(prunedCooccurrences); }
/** Called for every record in the data */ @Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /** Skip enormous documents, due to memory problems and since regex cannot handle them. */ if (value.getLength() > MAX_DOC_SIZE_IN_BYTES) { context.getCounter(ProcessingTime.SKIPPED).increment(1); return; } /** Parse document and measure time */ t1 = System.nanoTime(); Spinn3rDocument d = new Spinn3rDocument(value.toString()); t2 = System.nanoTime(); context.getCounter(ProcessingTime.PARSING).increment(t2 - t1); /** Return only those documents that satisfy search conditions */ t1 = System.nanoTime(); t = filter.documentSatisfies(d); t2 = System.nanoTime(); context.getCounter(ProcessingTime.FILTERING).increment(t2 - t1); /** Output if satisfies */ if (t) { if (cmdMap.hasOption("formatF5")) { context.write(new Text(d.toStringF5()), NullWritable.get()); } else { context.write(new Text(d.toString()), NullWritable.get()); } } }
/** fix a stripe */ @Override public void map(LongWritable key, Text fileText, Context context) throws IOException, InterruptedException { BlockFixerHelper helper = new BlockFixerHelper(context.getConfiguration()); String fileStr = fileText.toString(); LOG.info("fixing " + fileStr); Path file = new Path(fileStr); try { boolean fixed = helper.fixFile(file, context); if (fixed) { context.getCounter(Counter.FILES_SUCCEEDED).increment(1L); } else { context.getCounter(Counter.FILES_NOACTION).increment(1L); } } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); // report file as failed context.getCounter(Counter.FILES_FAILED).increment(1L); String outkey = fileStr; String outval = "failed"; context.write(new Text(outkey), new Text(outval)); } context.progress(); }
@SuppressWarnings("deprecation") @Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); List<Cluster> newKMeansClusters = new ArrayList<Cluster>(); List<Cluster> newCanopyClusters = new ArrayList<Cluster>(); for (Cluster kMeansCluster : _clusters.keySet()) { Cluster canopyCluster = _kMeansToCanopyMap.get(kMeansCluster); // Set a new Cluster center Vector center = new Vector(); center.setElements(new double[kMeansCluster.getCenterVector().getElements().length]); List<Vector> vectors = new ArrayList<Vector>(); for (Vector currentVector : _clusters.get(kMeansCluster)) { vectors.add(new Vector(currentVector)); // Sums the vectors to a new vector in order to find the one that is the closest to all // others, it will be our new cluster center. for (int i = 0; i < currentVector.getElements().length; i++) center.getElements()[i] += currentVector.getElements()[i]; } // Divides the vector's elements in order to find its real location (it will be a fictive // vector) for (int i = 0; i < center.getElements().length; i++) center.getElements()[i] = center.getElements()[i] / vectors.size(); Cluster newKMeansCluster = new Cluster(center); canopyCluster.setIsCovered(newKMeansCluster.isConvergedWithOtherCluster(kMeansCluster)); newKMeansClusters.add(newKMeansCluster); newCanopyClusters.add(canopyCluster); // Adding the vectors to the new cluster center for (Vector vector : vectors) { context.write(newKMeansCluster, vector); } } Configuration conf = context.getConfiguration(); Path outPath = new Path(conf.get("centers.path")); FileSystem fs = FileSystem.get(conf); if (fs.exists(outPath)) fs.delete(outPath, true); SequenceFile.Writer writer = SequenceFile.createWriter( fs, context.getConfiguration(), outPath, Cluster.class, Cluster.class); context.getCounter(Counter.CONVERGED).setValue(0); for (int i = 0; i < newKMeansClusters.size(); i++) { writer.append(newCanopyClusters.get(i), newKMeansClusters.get(i)); if (newCanopyClusters.get(i).getIsCovered()) context.getCounter(Counter.CONVERGED).increment(1); } writer.close(); }
@Override protected void map(NullWritable key, PhoenixIndexDBWritable record, Context context) throws IOException, InterruptedException { context.getCounter(PhoenixJobCounters.INPUT_RECORDS).increment(1); try { final List<Object> values = record.getValues(); indxWritable.setValues(values); indxWritable.write(this.pStatement); this.pStatement.execute(); final PhoenixConnection pconn = connection.unwrap(PhoenixConnection.class); MutationState currentMutationState = pconn.getMutationState(); if (mutationState == null) { mutationState = currentMutationState; return; } // Keep accumulating Mutations till batch size mutationState.join(currentMutationState); // Write Mutation Batch if (context.getCounter(PhoenixJobCounters.INPUT_RECORDS).getValue() % batchSize == 0) { writeBatch(mutationState, context); mutationState = null; } // Make sure progress is reported to Application Master. context.progress(); } catch (SQLException e) { LOG.error(" Error {} while read/write of a record ", e.getMessage()); context.getCounter(PhoenixJobCounters.FAILED_RECORDS).increment(1); throw new RuntimeException(e); } }
private boolean line(String record, Context context) throws IOException, InterruptedException { context.getCounter("higo", "totalrecord").increment(1); String[] values = record.split(split, -1); if (!this.validate(values, record, context)) { return false; } HashMap<String, String> res = new HashMap<String, String>(fields.length); for (int i = 0; i < fields.length; i++) { String fieldName = fields[i]; String string = (i < values.length) ? values[i] : null; String val = parseDefault(string); if (this.isDate[i]) { res.put(fieldName, TdateFormat.ensureTdate(val, fieldName)); } else if (val != null) { res.put(fieldName, val); } else if (this.isString[i]) { res.put(fieldName, "_"); } } if (usedthedate) { if (thedate != null) { res.put("thedate", thedate); // 从文件的路径中获取 } res.put( "thedate", String.valueOf(res.get("thedate")).replaceAll("-", "").replaceAll("_", "")); if (res.get("thedate").length() != 8) { if (debuglines < 100) { debuglines++; System.out.println("miss thedate values: " + record.replaceAll(split, "#") + ""); } context.getCounter("higo", "skiprecords").increment(1); } context.getCounter("higo", "dayrecord_" + String.valueOf(res.get("thedate"))).increment(1); CRC32 crc32 = new CRC32(); crc32.update(java.util.UUID.randomUUID().toString().getBytes()); res.put("higo_uuid", Long.toString(crc32.getValue())); } if (printlines < 10) { printlines++; System.out.println("res: " + res.toString() + " arrays," + Arrays.toString(values)); } this.doclist.add(res); if (this.doclist.size() >= PER) { context.write(new LongWritable(this.Index++), new DocumentList(this.doclist)); this.doclist.clear(); } return true; }
public void reduce(KeyWritable key, Iterable<BytesWritable> values, Context ctx) throws IOException, InterruptedException { for (BytesWritable value : values) { long startTime = System.currentTimeMillis(); ctx.write(key, value); long elapsedTime = System.currentTimeMillis() - startTime; ctx.getCounter(Counters.ELAPSED_TIME_MS).increment(elapsedTime); ctx.getCounter(Counters.NUM_CELLS).increment(1); } }
@SuppressWarnings("deprecation") @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (conn == null) { throw new RuntimeException("Connection not initialized."); } try { RECORD record = null; try { record = getLineParser().parse(value.toString()); } catch (IOException e) { context.getCounter(COUNTER_GROUP_NAME, "Parser errors").increment(1L); return; } if (record == null) { context.getCounter(COUNTER_GROUP_NAME, "Empty records").increment(1L); return; } upsertExecutor.execute(ImmutableList.<RECORD>of(record)); Map<Integer, List<KeyValue>> map = new HashMap<>(); Iterator<Pair<byte[], List<KeyValue>>> uncommittedDataIterator = PhoenixRuntime.getUncommittedDataIterator(conn, true); while (uncommittedDataIterator.hasNext()) { Pair<byte[], List<KeyValue>> kvPair = uncommittedDataIterator.next(); List<KeyValue> keyValueList = kvPair.getSecond(); keyValueList = preUpdateProcessor.preUpsert(kvPair.getFirst(), keyValueList); byte[] first = kvPair.getFirst(); // Create a list of KV for each table for (int i = 0; i < tableNames.size(); i++) { if (Bytes.compareTo(Bytes.toBytes(tableNames.get(i)), first) == 0) { if (!map.containsKey(i)) { map.put(i, new ArrayList<KeyValue>()); } List<KeyValue> list = map.get(i); for (KeyValue kv : keyValueList) { list.add(kv); } break; } } } for (Map.Entry<Integer, List<KeyValue>> rowEntry : map.entrySet()) { int tableIndex = rowEntry.getKey(); List<KeyValue> lkv = rowEntry.getValue(); // All KV values combines to a single byte array writeAggregatedRow(context, tableNames.get(tableIndex), lkv); } conn.rollback(); } catch (Exception e) { throw new RuntimeException(e); } }
/** * Maps the data. * * @param row The current table row key. * @param values The columns. * @param context The current context. * @throws IOException When something is broken with the data. * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, * org.apache.hadoop.mapreduce.Mapper.Context) */ @Override public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException { String currentFamilyName = null; String currentQualifierName = null; String currentRowKey = null; Configuration config = context.getConfiguration(); String separator = config.get("ReportSeparator", ":"); try { if (values != null) { context.getCounter(Counters.ROWS).increment(1); context.write(new Text("Total ROWS"), new IntWritable(1)); } for (KeyValue value : values.list()) { currentRowKey = Bytes.toStringBinary(value.getRow()); String thisRowFamilyName = Bytes.toStringBinary(value.getFamily()); if (thisRowFamilyName != null && !thisRowFamilyName.equals(currentFamilyName)) { currentFamilyName = thisRowFamilyName; context.getCounter("CF", thisRowFamilyName).increment(1); context.write(new Text("Total Families Across all Rows"), new IntWritable(1)); context.write(new Text(thisRowFamilyName), new IntWritable(1)); } String thisRowQualifierName = thisRowFamilyName + separator + Bytes.toStringBinary(value.getQualifier()); if (thisRowQualifierName != null && !thisRowQualifierName.equals(currentQualifierName)) { currentQualifierName = thisRowQualifierName; context.getCounter("CFQL", thisRowQualifierName).increment(1); context.write(new Text("Total Qualifiers across all Rows"), new IntWritable(1)); context.write(new Text(thisRowQualifierName), new IntWritable(1)); // Intialize versions context .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName) .increment(1); context.write( new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"), new IntWritable(1)); } else { // Increment versions currentQualifierName = thisRowQualifierName; context .getCounter("QL_VERSIONS", currentRowKey + separator + thisRowQualifierName) .increment(1); context.write( new Text(currentRowKey + separator + thisRowQualifierName + "_Versions"), new IntWritable(1)); } } } catch (InterruptedException e) { e.printStackTrace(); } }
protected void baseQualityScoreRecalibration( Context context, String region, ChromosomeRange r, PreprocessingTools tools, GATKTools gatk, String input, String output) throws InterruptedException, IOException, URISyntaxException { String table = tmpFileBase + ".table"; // get snp database(s) String[] snpslocal = HalvadeFileUtils.downloadSites(context, taskId); String[] newKnownSites = new String[snpslocal.length]; for (int i = 0; i < snpslocal.length; i++) { if (filterDBsnp) { newKnownSites[i] = tools.filterDBSnps( ref.replaceAll("fasta", "dict"), snpslocal[i], r, tmpFileBase, threads); } else { newKnownSites[i] = snpslocal[i]; } if (newKnownSites[i].endsWith(".gz")) { newKnownSites[i] = HalvadeFileUtils.Unzip(newKnownSites[i]); } } // should be created automatically by GATK v3.0 or higher // Logger.DEBUG("build bam index"); // context.setStatus("build bam index"); // tools.runBuildBamIndex(tmpFile1); Logger.DEBUG("run baseRecalibrator"); context.setStatus("run baseRecalibrator"); context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1); gatk.runBaseRecalibrator(input, table, ref, newKnownSites, region); Logger.DEBUG("run printReads"); context.setStatus("run printReads"); context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1); gatk.runPrintReads(input, output, ref, table, region); HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai")); HalvadeFileUtils.removeLocalFile(keep, table, context, HalvadeCounters.FOUT_GATK_TMP); for (int i = 0; i < newKnownSites.length; i++) { if (filterDBsnp) { HalvadeFileUtils.removeLocalFile( keep, newKnownSites[i], context, HalvadeCounters.FOUT_GATK_TMP); } } }
protected void RnaVariantCalling( Context context, String region, GATKTools gatk, String input, String output) throws InterruptedException { // choose between unifiendgenotyper vs haplotypegenotyper Logger.DEBUG("run variantCaller"); context.setStatus("run variantCaller"); context.getCounter(HalvadeCounters.TOOLS_GATK).increment(1); gatk.runHaplotypeCaller(input, output, true, scc, sec, ref, null, region); context.setStatus("cleanup"); context.getCounter(HalvadeCounters.OUT_VCF_FILES).increment(1); HalvadeFileUtils.removeLocalFile(keep, input, context, HalvadeCounters.FOUT_GATK_TMP); HalvadeFileUtils.removeLocalFile(keep, input.replaceAll(".bam", ".bai")); }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String val = value.toString(); String label = ""; String input_label = ""; if (val.contains("Processed")) context.getCounter(Driver.Progress.Completion).increment(1); if (val.contains("Training")) { if (!val.split(" ")[1].contains(":")) { label = val.split(" ")[1]; // FIRST LABEL for (int i = 2; i < val.split(" ").length; i++) { input_label = input_label + " " + val.split(" ")[i]; } input_label = val.split(" ")[0] + "#" + label + " " + input_label; context.write( new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("g") + 1) + label), new Text(input_label)); } } else { if (!val.split(" ")[2].contains(":")) { label = val.split(" ")[2]; for (int i = 1; i < val.split(" ").length; i++) { input_label = input_label + val.split(" ")[i]; } input_label = val.split(" ")[0] + "#" + label + " " + input_label; context.write( new Text(val.split(" ")[0].substring(val.split(" ")[0].lastIndexOf("t") + 1) + label), new Text(input_label)); } } } // map ends
@Override protected void map(Object key, Object value, Context context) throws IOException, InterruptedException { try { final InputRow inputRow; try { inputRow = parseInputRow(value, parser); } catch (Exception e) { if (config.isIgnoreInvalidRows()) { log.debug(e, "Ignoring invalid row [%s] due to parsing error", value.toString()); context .getCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER) .increment(1); return; // we're ignoring this invalid row } else { throw e; } } if (!granularitySpec.bucketIntervals().isPresent() || granularitySpec .bucketInterval(new DateTime(inputRow.getTimestampFromEpoch())) .isPresent()) { innerMap(inputRow, value, context); } } catch (RuntimeException e) { throw new RE(e, "Failure on row[%s]", value); } }
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { HashMap<String, Integer> wordCounts = new HashMap<String, Integer>(); String article = value.toString(); String articleName = article.substring(0, article.indexOf("\t")); String articleContent = article.substring(article.indexOf("\t") + 1); Iterable<String> words = parseWords(context, articleContent); int wordCount = 0; for (String word : words) { if (!topWords.contains(word)) { addCount(wordCounts, word); } wordCount++; } for (java.util.Map.Entry<String, Integer> wordAndCount : wordCounts.entrySet()) { double frequency = ((double) wordAndCount.getValue()) / wordCount; String word = wordAndCount.getKey(); textWritable.set(word); articleNameAndFrequency.set(articleName, frequency); context.write(textWritable, articleNameAndFrequency); } context.getCounter(Counters.ARTICLE_COUNTER).increment(1); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { try { // Write the last & final Mutation Batch if (mutationState != null) { writeBatch(mutationState, context); } // We are writing some dummy key-value as map output here so that we commit only one // output to reducer. context.write( new ImmutableBytesWritable(UUID.randomUUID().toString().getBytes()), new IntWritable(0)); super.cleanup(context); } catch (SQLException e) { LOG.error(" Error {} while read/write of a record ", e.getMessage()); context.getCounter(PhoenixJobCounters.FAILED_RECORDS).increment(1); throw new RuntimeException(e); } finally { if (connection != null) { try { connection.close(); } catch (SQLException e) { LOG.error( "Error {} while closing connection in the PhoenixIndexMapper class ", e.getMessage()); } } if (writer != null) { writer.close(); } } }
@Override public void map(LongWritable key, Aquaint2Document doc, Context context) throws IOException, InterruptedException { context.getCounter(Count.DOCS).increment(1); docid.set(doc.getDocid()); context.write(docid, one); }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String valueStr = value.toString(); String arr[] = valueStr.split("\t", -1); if (arr.length == 2) { uid = arr[0]; keyword = arr[1]; // clean data for (String ic : invalid) { if (keyword.indexOf(ic) >= 0) { System.out.println(ic + "||" + keyword); return; } } byte[] rowkey = uid.getBytes(); Put put = new Put(rowkey); put.add(family, qualifier, Bytes.toBytes(keyword)); context.write(new ImmutableBytesWritable(rowkey), put); context.getCounter("hbase-import", "keyword-line").increment(1); } }
@Override protected void map(IntWritable row, VectorWritable vectorWritable, Context ctx) throws IOException, InterruptedException { Vector rowVector = similarity.normalize(vectorWritable.get()); int numNonZeroEntries = 0; double maxValue = Double.MIN_VALUE; Iterator<Vector.Element> nonZeroElements = rowVector.iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element element = nonZeroElements.next(); RandomAccessSparseVector partialColumnVector = new RandomAccessSparseVector(Integer.MAX_VALUE); partialColumnVector.setQuick(row.get(), element.get()); ctx.write(new IntWritable(element.index()), new VectorWritable(partialColumnVector)); numNonZeroEntries++; if (maxValue < element.get()) { maxValue = element.get(); } } if (threshold != NO_THRESHOLD) { nonZeroEntries.setQuick(row.get(), numNonZeroEntries); maxValues.setQuick(row.get(), maxValue); } norms.setQuick(row.get(), similarity.norm(rowVector)); ctx.getCounter(Counters.ROWS).increment(1); }
@Override protected void setup(Context context) throws IOException, InterruptedException { long start = ((FileSplit) context.getInputSplit()).getStart(); logger.info("Input Split : ", context.getInputSplit().toString()); logger.info("Input Split Start : {}", start); counter = context.getCounter(getClass().getName(), String.valueOf(start)); }
@Override protected void map(WritableComparable<?> key, Text point, Context context) throws IOException, InterruptedException { Cluster nearestCluster = null; double nearestDistance = Double.MAX_VALUE; Vector pointv = parse.parseVector(point.toString()); if (pointv == null) { return; } pointv.setNumPoints(1); for (Cluster cluster : clusters) { Vector clusterCenter = cluster.getCenter(); boolean isDeny = pointv.Deny(clusterCenter); if (isDeny) { continue; } double distance = clusterCenter.distiance(pointv); context.getCounter("Clustering", "similar").increment(1); if (distance <= nearestDistance || nearestCluster == null) { nearestCluster = cluster; nearestDistance = distance; } } if (nearestCluster != null) { context.write(new Text(String.valueOf(nearestCluster.getId())), pointv); } }
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int ratingsCount = 0; /* * For each movieId: */ // Define a counter for bad records. for (IntWritable value : values) { /* * Count the movie ratings. */ ratingsCount += value.get(); } /* * Reducer output is the userID and number of ratings. */ context.getCounter(UserIdCounter.NUM_USERS).increment(1); rValue.set(ratingsCount); context.write(key, rValue); }
/** Called once at the very beginning */ @Override public void setup(Context context) { t1 = System.nanoTime(); cmdMap = ParseCLI.parse(context.getConfiguration().getStrings("args")); filter = new DocumentFilter(cmdMap); t2 = System.nanoTime(); context.getCounter(ProcessingTime.SETUP).increment(t2 - t1); }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] fields = null; String id = null, symbol = null, date = null, cap = null, price = null, rowKey = null, rowVal = null; try { fields = value.toString().split(","); } catch (Exception ex) { context.getCounter("HBaseKVMapper", "PARSE_ERRORS").increment(1); return; } if (fields.length > 0 && fields[0] != null && !fields[0].equals("")) { id = fields[0]; } if (fields.length > 1 && fields[1] != null && !fields[1].equals("")) { date = fields[1]; } if (fields.length > 2 && fields[2] != null && !fields[2].equals("")) { symbol = fields[2]; } if (fields.length > 3 && fields[3] != null && !fields[3].equals("")) { price = fields[3]; } if (fields.length > 4 && fields[4] != null && !fields[4].equals("")) { cap = fields[4]; } if (id != null && symbol != null) { rowKey = id + "_" + symbol; rowVal = date + "_" + price + "_" + cap; context.write(new Text(rowKey), new Text(rowVal)); } context.getCounter("HBaseKVMapper", "NUM_MSGS").increment(1); }
/** * @param row 为主键,不能通过toString直接转换 * @param value 取出后得到jack/course:english/1436449989754/Put/vlen=2/seqid=0,不能单独取出 * @param context */ @Override public void map(ImmutableBytesWritable row, Result value, Context context) throws UnsupportedEncodingException { context.getCounter(Counters.ROWS).increment(1); System.out.println(context.getCounter(Counters.ROWS).getValue()); String s = new String(row.copyBytes(), "GB2312"); System.out.println(s); List<Cell> valuelist = value.getColumnCells("course".getBytes(), "english".getBytes()); // 得到结果为 valuelist[0] = "jack/course:english/1436449989754/Put/vlen=2/seqid=0" for (Cell cell : valuelist) { System.out.println(cell.toString()); } // System.out.println(value.getColumnCells("course".getBytes(),"english".getBytes())+"\n"); }
/** * Maps the data. * * @param row The current table row key. * @param values The columns. * @param context The current context. * @throws IOException When something is broken with the data. * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, * org.apache.hadoop.mapreduce.Mapper.Context) */ @Override public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException { for (KeyValue value : values.list()) { if (value.getValue().length > 0) { context.getCounter(Counters.ROWS).increment(1); break; } } }
@Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { int partitionId = context.getConfiguration().getInt("mapreduce.task.partition", 0); System.out.println("partitionId = " + partitionId + " " + this); context.write(new IntWritable(partitionId), new Text(counter + "_-_" + value)); context.getCounter("LN", "partition_" + partitionId).setValue(counter); counter++; }
@Override public void map(IntWritable nid, PersonalizedPageRankNode node, Context context) throws IOException, InterruptedException { // Pass along node structure. intermediateStructure.setNodeId(node.getNodeId()); intermediateStructure.setType(PersonalizedPageRankNode.Type.Structure); intermediateStructure.setAdjacencyList(node.getAdjacenyList()); context.write(nid, intermediateStructure); int massMessages = 0; // Distribute PageRank mass to neighbors (along outgoing edges). if (node.getAdjacenyList().size() > 0) { // Each neighbor gets an equal share of PageRank mass. ArrayListOfIntsWritable list = node.getAdjacenyList(); float mass[] = new float[sources.size()]; for (int i = 0; i < sources.size(); i++) { mass[i] = node.getPageRank(i) - (float) StrictMath.log(list.size()); } context.getCounter(PageRank.edges).increment(list.size()); // Iterate over neighbors. for (int i = 0; i < list.size(); i++) { neighbor.set(list.get(i)); intermediateMass.setNodeId(list.get(i)); intermediateMass.setType(PersonalizedPageRankNode.Type.Mass); for (int j = 0; j < sources.size(); j++) { intermediateMass.setPageRank(j, mass[j]); } // Emit messages with PageRank mass to neighbors. context.write(neighbor, intermediateMass); massMessages++; } } // Bookkeeping. context.getCounter(PageRank.nodes).increment(1); context.getCounter(PageRank.massMessages).increment(massMessages); }
private void safeMap(LongWritable filePosition, Text line, Context context) throws IOException, InterruptedException { String lineString = line.toString(); int colonOffet = lineString.indexOf(':'); if (colonOffet < 1) { return; } long userId = Long.parseLong(lineString.substring(0, colonOffet)); context.setStatus("User: "******"Friendster MR", "", user, visibility); context.write(key, AccumuloSession.createMutationFromRow(audit)); String friends = lineString.substring(colonOffet + 1).trim(); if ("notfound".equals(friends) || "private".equals(friends)) { // do nothing? } else { String[] friendsArray = friends.split(","); for (String friend : friendsArray) { friend = friend.trim(); if (friend.length() == 0) { continue; } long friendId = Long.parseLong(friend); Vertex friendVertex = createUserVertex(friendId); addEdge( ImportMR.getFriendEdgeId(userVertex, friendVertex), userVertex, friendVertex, FriendsterOntology.EDGE_LABEL_FRIEND, visibility, authorizations); context.getCounter(FriendsterImportCounters.FRIEND_EDGES_CREATED).increment(1); } } context.getCounter(FriendsterImportCounters.USERS_PROCESSED).increment(1); }
@Override protected void reduce(final Key key, final Iterable<Value> values, final Context context) throws IOException, InterruptedException { final Iterator<Value> iter = values.iterator(); final Value firstValue = iter.next(); final boolean isMulti = iter.hasNext(); context.write(key, reduceValue(key, isMulti, iter, firstValue)); context.getCounter("Bulk import", getCounterId(isMulti)).increment(1L); }
@Override public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable value : values) { sum += value.get(); } context.write(key, new Text(sum + "")); context.getCounter(EMRDriver.STATE_COUNTER_GROUP, EMRDriver.TOTAL_PROFILE_COUNT).increment(1); }