コード例 #1
0
    @Override
    @SuppressWarnings("unchecked")
    public void initialize(InputSplit split, TaskAttemptContext ctx)
        throws IOException, InterruptedException {
      // set up columns that needs to read from the RCFile.

      tDesc = TStructDescriptor.getInstance(typeRef.getRawClass());
      thriftWritable = ThriftWritable.newInstance((Class<TBase<?, ?>>) typeRef.getRawClass());
      final List<Field> tFields = tDesc.getFields();

      FileSplit fsplit = (FileSplit) split;
      Path file = fsplit.getPath();

      LOG.info(
          String.format(
              "reading %s from %s:%d:%d",
              typeRef.getRawClass().getName(),
              file.toString(),
              fsplit.getStart(),
              fsplit.getStart() + fsplit.getLength()));

      ColumnarMetadata storedInfo = RCFileUtil.readMetadata(ctx.getConfiguration(), file);

      // list of field numbers
      List<Integer> tFieldIds =
          Lists.transform(
              tFields,
              new Function<Field, Integer>() {
                public Integer apply(Field fd) {
                  return Integer.valueOf(fd.getFieldId());
                }
              });

      columnsBeingRead =
          RCFileUtil.findColumnsToRead(ctx.getConfiguration(), tFieldIds, storedInfo);

      for (int idx : columnsBeingRead) {
        int fid = storedInfo.getFieldId(idx);
        if (fid >= 0) {
          knownRequiredFields.add(tFields.get(tFieldIds.indexOf(fid)));
        } else {
          readUnknownsColumn = true;
        }
      }

      ColumnProjectionUtils.setReadColumnIDs(ctx.getConfiguration(), columnsBeingRead);

      // finally!
      super.initialize(split, ctx);
    }
コード例 #2
0
    /**
     * Builds Thrift object from the raw bytes returned by RCFile reader.
     *
     * @throws TException
     */
    @SuppressWarnings({"unchecked", "rawtypes"})
    public TBase<?, ?> getCurrentThriftValue()
        throws IOException, InterruptedException, TException {

      BytesRefArrayWritable byteRefs = getCurrentBytesRefArrayWritable();

      if (byteRefs == null) {
        return null;
      }

      TBase tObj = tDesc.newThriftObject();

      for (int i = 0; i < knownRequiredFields.size(); i++) {
        BytesRefWritable buf = byteRefs.get(columnsBeingRead.get(i));
        if (buf.getLength() > 0) {
          memTransport.reset(buf.getData(), buf.getStart(), buf.getLength());

          Field field = knownRequiredFields.get(i);
          tObj.setFieldValue(field.getFieldIdEnum(), ThriftUtils.readFieldNoTag(tProto, field));
        }
        // else no need to set default value since any default value
        // would have been serialized when this record was written.
      }

      // parse unknowns column if required
      if (readUnknownsColumn) {
        int last = columnsBeingRead.get(columnsBeingRead.size() - 1);
        BytesRefWritable buf = byteRefs.get(last);
        if (buf.getLength() > 0) {
          memTransport.reset(buf.getData(), buf.getStart(), buf.getLength());
          tObj.read(tProto);
        }
      }

      return tObj;
    }