/* decrypt, then compute */ @Override public void map(BytesWritable key, BytesWritable val, Context context) throws IOException, InterruptedException { byte[] iv = key.copyBytes(); byte[] ct = val.copyBytes(); // String[] ss = new String(crypto.decrypt_word_rnd(ct, iv)).split("\\s+"); String text = new String(crypto.decrypt_word_rnd(ct, iv)); Matcher matcher = pattern.matcher(text); while (matcher.find()) { context.write(new Text(matcher.group(0)), new LongWritable(1)); } }
@Override protected void map(Writable key, BytesWritable value, Context context) throws IOException, InterruptedException { int percentOfWritten = context.getConfiguration().getInt("percentOfWritten", 100); DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes()); log.info( "work title = " + docWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText()); if ((i % 101) > 100 - percentOfWritten) { log.info("writing..."); context.write(new Text(docWrapper.getRowId()), new BytesWritable(value.copyBytes())); } i++; }
@Override public void reduce( BytesWritable topkRollupKey, Iterable<BytesWritable> timeSeriesIterable, Context context) throws IOException, InterruptedException { TopKRollupPhaseOneMapOutputKey wrapper = TopKRollupPhaseOneMapOutputKey.fromBytes(topkRollupKey.getBytes()); LOGGER.info( "DimensionName {} DimensionValue {}", wrapper.getDimensionName(), wrapper.getDimensionValue()); MetricTimeSeries aggregateSeries = new MetricTimeSeries(metricSchema); for (BytesWritable writable : timeSeriesIterable) { MetricTimeSeries series = MetricTimeSeries.fromBytes(writable.copyBytes(), metricSchema); aggregateSeries.aggregate(series); } Map<String, Long> metricValues = new HashMap<String, Long>(); for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { metricValues.put(metricSpec.getName(), 0L); } for (Long time : aggregateSeries.getTimeWindowSet()) { for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { String metricName = metricSpec.getName(); long metricValue = aggregateSeries.get(time, metricName).longValue(); metricValues.put(metricName, metricValues.get(metricName) + metricValue); } } boolean aboveThreshold = true; for (MetricSpec metricSpec : starTreeConfig.getMetrics()) { String metricName = metricSpec.getName(); long metricValue = metricValues.get(metricName); long metricSum = metricSums.get(metricName); double metricThreshold = metricThresholds.get(metricName); LOGGER.info("metricValue : {} metricSum : {}", metricValue, metricSum); if (metricValue < (metricThreshold / 100) * metricSum) { aboveThreshold = false; break; } } if (aboveThreshold) { LOGGER.info("Passed threshold"); valWritable.set(aggregateSeries.toBytes(), 0, aggregateSeries.toBytes().length); context.write(topkRollupKey, valWritable); } }
@Override public void map( Writable key, BytesWritable value, Mapper<Writable, BytesWritable, Text, BytesWritable>.Context context) throws IOException, InterruptedException { DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes()); String docKey = keyGen.generateKey(docWrapper.getDocumentMetadata(), 0); if (!docKey.isEmpty()) { DocumentWrapper thinDocWrapper = DocumentWrapperUtils.cloneDocumentMetadata(docWrapper); context.write(new Text(docKey), new BytesWritable(thinDocWrapper.toByteArray())); } }
@Override protected void map(Text key, BytesWritable value, Context context) throws IOException, InterruptedException { StatisticsProtos.InputEntry inputEntry = StatisticsProtos.InputEntry.parseFrom(value.copyBytes()); Set<SortedMapWritableComparable> outputKeyMaps = new HashSet<SortedMapWritableComparable>(); outputKeyMaps.add(new SortedMapWritableComparable()); Map<String, Partitioner> partitioners = statGenConfiguration.getPartitioners(); for (StatisticsProtos.KeyValue field : inputEntry.getFieldList()) { String fieldKey = field.getKey(); if (partitioners.containsKey(fieldKey)) { String[] partitions = partitioners.get(fieldKey).partition(field.getValue()); // the number of output values should be multiplied by number of partitions // (partitions.lenght). // if 0, then return nothing... if (partitions.length == 0) { return; } for (SortedMapWritableComparable outputKeyMap : outputKeyMaps) { outputKeyMap.put(new Text(fieldKey), new Text(partitions[0])); } // if more than 1, then copy output values for (int i = 1; i < partitions.length; i++) { Text mapKey = new Text(fieldKey); Text mapValue = new Text(partitions[i]); Set<SortedMapWritableComparable> newMaps = new HashSet<SortedMapWritableComparable>(); for (SortedMapWritableComparable outputKeyMap : outputKeyMaps) { SortedMapWritableComparable newMap = new SortedMapWritableComparable(outputKeyMap); newMap.put(mapKey, mapValue); newMaps.add(newMap); } outputKeyMaps.addAll(newMaps); } } } for (SortedMapWritableComparable outputKeyMap : outputKeyMaps) { context.write(outputKeyMap, value); } }
private List<byte[]> readResults(Path outputPath, Configuration config, FileSystem fs) throws IOException { List<byte[]> ret = new ArrayList<>(); for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(outputPath, false); it.hasNext(); ) { Path p = it.next().getPath(); if (p.getName().equals("_SUCCESS")) { fs.delete(p, false); continue; } SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(p)); LongWritable key = new LongWritable(); BytesWritable value = new BytesWritable(); while (reader.next(key, value)) { ret.add(value.copyBytes()); } reader.close(); fs.delete(p, false); } fs.delete(outputPath, false); if (LOG.isDebugEnabled()) { LOG.debug(outputPath + ": Returning " + ret.size()); } return ret; }
@Override protected void reduce(Writable key, Iterable<BytesWritable> values, Context context) throws IOException, InterruptedException { ArrayList<String> docIds = new ArrayList<String>(); HashSet<String> issns = new HashSet<String>(); HashSet<String> isbns = new HashSet<String>(); HashSet<String> titles = new HashSet<String>(); for (BytesWritable value : values) { DocumentProtos.DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes()); docIds.add(docWrapper.getRowId()); String issn = null; String isbn = null; if (docWrapper.hasDocumentMetadata() && docWrapper.getDocumentMetadata().hasBasicMetadata()) { DocumentProtos.BasicMetadataOrBuilder bm = docWrapper.getDocumentMetadata().getBasicMetadataOrBuilder(); if (bm.hasIssn()) { issn = bm.getIssn(); } if (bm.hasIsbn()) { isbn = bm.getIsbn(); } if (StringUtils.isNotBlank(isbn)) { isbn = isbn.trim().toUpperCase(); isbns.add(isbn); } if (StringUtils.isNotBlank(issn)) { issn = issn.trim().toUpperCase(); issns.add(issn); } if (bm.hasJournal()) { String title = bm.getJournal().trim(); if (StringUtils.isNotBlank(title)) { titles.add(title); } } } } String issn = null; String isbn = null; ArrayList<String> titleA = new ArrayList<String>(titles); Collections.sort( titleA, new Comparator<String>() { @Override public int compare(String o1, String o2) { // najdłuższe naprzód return o2.length() - o1.length(); } }); if (issns.size() > 0) { issn = issns.iterator().next(); } if (isbns.size() > 0) { isbn = isbns.iterator().next(); } String id = "http://comac.ceon.pl/source-"; if (issn != null) { id += ("issn-" + issn); if (isbn != null) { id += "-"; } } if (isbn != null) { id += ("isbn-" + isbn); } for (String docId : docIds) { ParentModelProtos.ParentDisambiguationOut.Builder parent = ParentModelProtos.ParentDisambiguationOut.newBuilder(); parent.setDocId(docId); parent.setParentId(id); parent.addAllParentName(titles); if (isbn != null) { parent.setType(DocumentProtos.BasicMetadata.ParentType.BOOK); } else { parent.setType(DocumentProtos.BasicMetadata.ParentType.JOURNAL); } context.write(new Text(docId), new BytesWritable(parent.build().toByteArray())); } }
@Override protected void map(LongWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { Text output = new Text(value.copyBytes()); context.write(output, output); }
private Stream<PacketInfo> filteredPacketInfo(BytesWritable value) throws IOException { return PcapHelper.toPacketInfo(value.copyBytes()).stream().filter(filter); }