Beispiel #1
0
  /* decrypt, then compute
   */
  @Override
  public void map(BytesWritable key, BytesWritable val, Context context)
      throws IOException, InterruptedException {
    byte[] iv = key.copyBytes();
    byte[] ct = val.copyBytes();
    // String[] ss = new String(crypto.decrypt_word_rnd(ct, iv)).split("\\s+");

    String text = new String(crypto.decrypt_word_rnd(ct, iv));
    Matcher matcher = pattern.matcher(text);

    while (matcher.find()) {
      context.write(new Text(matcher.group(0)), new LongWritable(1));
    }
  }
  @Override
  protected void map(Writable key, BytesWritable value, Context context)
      throws IOException, InterruptedException {

    int percentOfWritten = context.getConfiguration().getInt("percentOfWritten", 100);

    DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes());

    log.info(
        "work title = "
            + docWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText());

    if ((i % 101) > 100 - percentOfWritten) {
      log.info("writing...");
      context.write(new Text(docWrapper.getRowId()), new BytesWritable(value.copyBytes()));
    }

    i++;
  }
    @Override
    public void reduce(
        BytesWritable topkRollupKey, Iterable<BytesWritable> timeSeriesIterable, Context context)
        throws IOException, InterruptedException {

      TopKRollupPhaseOneMapOutputKey wrapper =
          TopKRollupPhaseOneMapOutputKey.fromBytes(topkRollupKey.getBytes());
      LOGGER.info(
          "DimensionName {} DimensionValue {}",
          wrapper.getDimensionName(),
          wrapper.getDimensionValue());

      MetricTimeSeries aggregateSeries = new MetricTimeSeries(metricSchema);
      for (BytesWritable writable : timeSeriesIterable) {
        MetricTimeSeries series = MetricTimeSeries.fromBytes(writable.copyBytes(), metricSchema);
        aggregateSeries.aggregate(series);
      }

      Map<String, Long> metricValues = new HashMap<String, Long>();
      for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
        metricValues.put(metricSpec.getName(), 0L);
      }
      for (Long time : aggregateSeries.getTimeWindowSet()) {
        for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
          String metricName = metricSpec.getName();
          long metricValue = aggregateSeries.get(time, metricName).longValue();
          metricValues.put(metricName, metricValues.get(metricName) + metricValue);
        }
      }

      boolean aboveThreshold = true;
      for (MetricSpec metricSpec : starTreeConfig.getMetrics()) {
        String metricName = metricSpec.getName();

        long metricValue = metricValues.get(metricName);
        long metricSum = metricSums.get(metricName);
        double metricThreshold = metricThresholds.get(metricName);

        LOGGER.info("metricValue : {} metricSum : {}", metricValue, metricSum);
        if (metricValue < (metricThreshold / 100) * metricSum) {
          aboveThreshold = false;
          break;
        }
      }

      if (aboveThreshold) {
        LOGGER.info("Passed threshold");
        valWritable.set(aggregateSeries.toBytes(), 0, aggregateSeries.toBytes().length);
        context.write(topkRollupKey, valWritable);
      }
    }
  @Override
  public void map(
      Writable key,
      BytesWritable value,
      Mapper<Writable, BytesWritable, Text, BytesWritable>.Context context)
      throws IOException, InterruptedException {

    DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes());

    String docKey = keyGen.generateKey(docWrapper.getDocumentMetadata(), 0);

    if (!docKey.isEmpty()) {
      DocumentWrapper thinDocWrapper = DocumentWrapperUtils.cloneDocumentMetadata(docWrapper);
      context.write(new Text(docKey), new BytesWritable(thinDocWrapper.toByteArray()));
    }
  }
    @Override
    protected void map(Text key, BytesWritable value, Context context)
        throws IOException, InterruptedException {

      StatisticsProtos.InputEntry inputEntry =
          StatisticsProtos.InputEntry.parseFrom(value.copyBytes());

      Set<SortedMapWritableComparable> outputKeyMaps = new HashSet<SortedMapWritableComparable>();
      outputKeyMaps.add(new SortedMapWritableComparable());

      Map<String, Partitioner> partitioners = statGenConfiguration.getPartitioners();

      for (StatisticsProtos.KeyValue field : inputEntry.getFieldList()) {
        String fieldKey = field.getKey();
        if (partitioners.containsKey(fieldKey)) {
          String[] partitions = partitioners.get(fieldKey).partition(field.getValue());
          // the number of output values should be multiplied by number of partitions
          // (partitions.lenght).
          // if 0, then return nothing...
          if (partitions.length == 0) {
            return;
          }
          for (SortedMapWritableComparable outputKeyMap : outputKeyMaps) {
            outputKeyMap.put(new Text(fieldKey), new Text(partitions[0]));
          }
          // if more than 1, then copy output values
          for (int i = 1; i < partitions.length; i++) {
            Text mapKey = new Text(fieldKey);
            Text mapValue = new Text(partitions[i]);
            Set<SortedMapWritableComparable> newMaps = new HashSet<SortedMapWritableComparable>();
            for (SortedMapWritableComparable outputKeyMap : outputKeyMaps) {
              SortedMapWritableComparable newMap = new SortedMapWritableComparable(outputKeyMap);
              newMap.put(mapKey, mapValue);
              newMaps.add(newMap);
            }
            outputKeyMaps.addAll(newMaps);
          }
        }
      }

      for (SortedMapWritableComparable outputKeyMap : outputKeyMaps) {
        context.write(outputKeyMap, value);
      }
    }
 private List<byte[]> readResults(Path outputPath, Configuration config, FileSystem fs)
     throws IOException {
   List<byte[]> ret = new ArrayList<>();
   for (RemoteIterator<LocatedFileStatus> it = fs.listFiles(outputPath, false); it.hasNext(); ) {
     Path p = it.next().getPath();
     if (p.getName().equals("_SUCCESS")) {
       fs.delete(p, false);
       continue;
     }
     SequenceFile.Reader reader = new SequenceFile.Reader(config, SequenceFile.Reader.file(p));
     LongWritable key = new LongWritable();
     BytesWritable value = new BytesWritable();
     while (reader.next(key, value)) {
       ret.add(value.copyBytes());
     }
     reader.close();
     fs.delete(p, false);
   }
   fs.delete(outputPath, false);
   if (LOG.isDebugEnabled()) {
     LOG.debug(outputPath + ": Returning " + ret.size());
   }
   return ret;
 }
  @Override
  protected void reduce(Writable key, Iterable<BytesWritable> values, Context context)
      throws IOException, InterruptedException {
    ArrayList<String> docIds = new ArrayList<String>();
    HashSet<String> issns = new HashSet<String>();
    HashSet<String> isbns = new HashSet<String>();
    HashSet<String> titles = new HashSet<String>();
    for (BytesWritable value : values) {

      DocumentProtos.DocumentWrapper docWrapper =
          DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes());
      docIds.add(docWrapper.getRowId());
      String issn = null;
      String isbn = null;
      if (docWrapper.hasDocumentMetadata() && docWrapper.getDocumentMetadata().hasBasicMetadata()) {
        DocumentProtos.BasicMetadataOrBuilder bm =
            docWrapper.getDocumentMetadata().getBasicMetadataOrBuilder();
        if (bm.hasIssn()) {
          issn = bm.getIssn();
        }
        if (bm.hasIsbn()) {
          isbn = bm.getIsbn();
        }
        if (StringUtils.isNotBlank(isbn)) {
          isbn = isbn.trim().toUpperCase();
          isbns.add(isbn);
        }
        if (StringUtils.isNotBlank(issn)) {
          issn = issn.trim().toUpperCase();
          issns.add(issn);
        }
        if (bm.hasJournal()) {
          String title = bm.getJournal().trim();
          if (StringUtils.isNotBlank(title)) {
            titles.add(title);
          }
        }
      }
    }
    String issn = null;
    String isbn = null;
    ArrayList<String> titleA = new ArrayList<String>(titles);
    Collections.sort(
        titleA,
        new Comparator<String>() {
          @Override
          public int compare(String o1, String o2) {
            // najdłuższe naprzód
            return o2.length() - o1.length();
          }
        });

    if (issns.size() > 0) {
      issn = issns.iterator().next();
    }
    if (isbns.size() > 0) {
      isbn = isbns.iterator().next();
    }
    String id = "http://comac.ceon.pl/source-";
    if (issn != null) {
      id += ("issn-" + issn);
      if (isbn != null) {
        id += "-";
      }
    }
    if (isbn != null) {
      id += ("isbn-" + isbn);
    }
    for (String docId : docIds) {
      ParentModelProtos.ParentDisambiguationOut.Builder parent =
          ParentModelProtos.ParentDisambiguationOut.newBuilder();
      parent.setDocId(docId);
      parent.setParentId(id);
      parent.addAllParentName(titles);
      if (isbn != null) {
        parent.setType(DocumentProtos.BasicMetadata.ParentType.BOOK);
      } else {
        parent.setType(DocumentProtos.BasicMetadata.ParentType.JOURNAL);
      }
      context.write(new Text(docId), new BytesWritable(parent.build().toByteArray()));
    }
  }
 @Override
 protected void map(LongWritable key, BytesWritable value, Context context)
     throws IOException, InterruptedException {
   Text output = new Text(value.copyBytes());
   context.write(output, output);
 }
 private Stream<PacketInfo> filteredPacketInfo(BytesWritable value) throws IOException {
   return PcapHelper.toPacketInfo(value.copyBytes()).stream().filter(filter);
 }