Beispiel #1
0
  @Override
  public Long exec(Tuple input) throws IOException {
    try {
      String tinput = "";
      if (input == null || input.size() == 0) return null;
      else {
        if (input.getType(0) == DataType.CHARARRAY) tinput = (String) input.get(0);
        else
          throw new RuntimeException(
              "Input type expected to be chararray but got: " + input.getType(0));
      }
      tinput = tinput.replaceAll("[-+.^:, ]", "");

      if (tinput.length() > 14) return Long.parseLong(tinput.substring(0, 14));
      else if (tinput.length() < 14)
        return Long.parseLong(String.format("%-14s", tinput).replace(' ', '0'));
      else return Long.parseLong(tinput);

    } catch (ExecException exp) {
      throw exp;
    } catch (Exception e) {
      int errCode = 2107;
      String msg = "Error while computing date_format in " + this.getClass().getSimpleName();
      throw new ExecException(msg, errCode, PigException.BUG, e);
    }
  }
  @Override
  protected void doHadoopWork() throws BuildException {
    Tuple tuple = ContextManager.getCurrentTuple();
    if (tuple == null) {
      throw new BuildException(
          this.getTaskName()
              + " should be put inside task container which provides tuple to execution context");
    }

    try {
      if (tuple.getType(fieldNumber) != DataType.TUPLE
          || !(tuple.get(fieldNumber) instanceof Tuple)) {
        throw new BuildException("Tuple field " + fieldNumber + " doesn't represent a Tuple");
      }

      ContextManager.setCurrentTupleContext((Tuple) tuple.get(fieldNumber));

      try {
        for (Task task : tasks) {
          task.perform();
        }
      } finally {
        ContextManager.resetCurrentTupleContext();
      }
    } catch (ExecException e) {
      throw new BuildException("Failed to check type of tuple field " + fieldNumber, e);
    }
  }
  @Override
  public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() == 0) {
      return null;
    }

    Object obj = null;
    Integer limnum = null;
    try {
      obj = (DataByteArray) input.get(1);

    } catch (ExecException e) {
      logger.error("Error in reading field proto:", e);
      throw e;
    }

    try {
      limnum = (Integer) input.get(2);
    } catch (ExecException e) {
      logger.error("Error in reading baglimit:", e);
      throw e;
    }

    DataByteArray dba = null;
    try {
      dba = (DataByteArray) obj;
    } catch (ClassCastException e) {
      logger.error("Error in casting Object (" + input.getType(1) + ") to DataByteArray:", e);
      throw e;
    }

    DocumentMetadata dm = null;
    try {
      dm = DocumentMetadata.parseFrom(dba.get());
    } catch (InvalidProtocolBufferException e) {
      logger.error("Error in reading ByteArray to DocumentMetadata:", e);
      throw e;
    }

    String key = dm.getKey();
    DataBag db = new DefaultDataBag();
    int bagsize = 0;
    for (ClassifCode code : dm.getBasicMetadata().getClassifCodeList()) {
      for (String co_str : code.getValueList()) {
        bagsize++;
        db.add(TupleFactory.getInstance().newTuple(co_str));
      }
    }
    if (bagsize > limnum) {
      Object[] to = new Object[] {key, db, bagsize};
      return TupleFactory.getInstance().newTuple(Arrays.asList(to));
    }
    return null;
  }
 @Override
 public byte getType(int fieldNum) throws ExecException {
   return t.getType(fieldNum);
 }