@Override
  public DataBag exec(Tuple input) throws IOException {

    if (input == null || input.size() == 0) {
      return null;
    }

    try {
      DataByteArray dba = null;
      try {
        dba = (DataByteArray) input.get(0);
      } catch (ExecException e) {
        logger.error("Error in reading field:", e);
        throw e;
      }

      DocumentWrapper dm = null;
      try {
        dm = DocumentWrapper.parseFrom(dba.get());
      } catch (Exception e) {
        logger.error("Error in reading ByteArray to DocumentMetadata:", e);
        throw e;
      }

      DataBag ret = new DefaultDataBag();
      DataByteArray metadata = new DataByteArray(dm.getDocumentMetadata().toByteArray());

      List<Author> authors = dm.getDocumentMetadata().getBasicMetadata().getAuthorList();

      for (int i = 0; i < authors.size(); i++) {
        String sname = authors.get(i).getSurname();
        Object[] to = new Object[] {sname, metadata, i};
        Tuple t = TupleFactory.getInstance().newTuple(Arrays.asList(to));
        ret.add(t);
      }

      return ret;

    } catch (Exception e) {
      logger.error("Error in processing input row:", e);
      throw new IOException(
          "Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e));
    }
  }
  @Override
  public void map(
      Writable key,
      BytesWritable value,
      Mapper<Writable, BytesWritable, Text, BytesWritable>.Context context)
      throws IOException, InterruptedException {

    DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes());

    String docKey = keyGen.generateKey(docWrapper.getDocumentMetadata(), 0);

    if (!docKey.isEmpty()) {
      DocumentWrapper thinDocWrapper = DocumentWrapperUtils.cloneDocumentMetadata(docWrapper);
      context.write(new Text(docKey), new BytesWritable(thinDocWrapper.toByteArray()));
    }
  }
Ejemplo n.º 3
0
  @Override
  protected void map(Writable key, BytesWritable value, Context context)
      throws IOException, InterruptedException {

    int percentOfWritten = context.getConfiguration().getInt("percentOfWritten", 100);

    DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(value.copyBytes());

    log.info(
        "work title = "
            + docWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText());

    if ((i % 101) > 100 - percentOfWritten) {
      log.info("writing...");
      context.write(new Text(docWrapper.getRowId()), new BytesWritable(value.copyBytes()));
    }

    i++;
  }