Beispiel #1
0
  public static RecordReader<?, ?> createRecordReader(
      Configuration configuration,
      Path path,
      long start,
      long length,
      Properties schema,
      List<HiveColumnHandle> columns) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns =
        ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes =
        ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));

    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented
    // files
    setReadColumns(configuration, readHiveColumnIndexes);

    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = new JobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);

    // propagate serialization configuration to getRecordReader
    schema
        .stringPropertyNames()
        .stream()
        .filter(name -> name.startsWith("serialization."))
        .forEach(name -> jobConf.set(name, schema.getProperty(name)));

    try {
      return retry()
          .stopOnIllegalExceptions()
          .run(
              "createRecordReader",
              () -> inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL));
    } catch (Exception e) {
      throw new PrestoException(
          HIVE_CANNOT_OPEN_SPLIT,
          format(
              "Error opening Hive split %s (offset=%s, length=%s) using %s: %s",
              path, start, length, getInputFormatName(schema), e.getMessage()),
          e);
    }
  }
    @Override
    public void initialize(InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {

      org.apache.hadoop.mapred.InputSplit oldSplit;

      if (split.getClass() == FileSplit.class) {
        oldSplit =
            new org.apache.hadoop.mapred.FileSplit(
                ((FileSplit) split).getPath(),
                ((FileSplit) split).getStart(),
                ((FileSplit) split).getLength(),
                split.getLocations());
      } else {
        oldSplit = ((InputSplitWrapper) split).realSplit;
      }

      @SuppressWarnings("unchecked")
      Reporter reporter = new Reporter() { // Reporter interface over ctx

            final TaskInputOutputContext ioCtx =
                context instanceof TaskInputOutputContext ? (TaskInputOutputContext) context : null;

            public void progress() {
              HadoopCompat.progress(context);
            }

            // @Override
            public float getProgress() {
              return (ioCtx != null) ? ioCtx.getProgress() : 0;
            }

            public void setStatus(String status) {
              if (ioCtx != null) HadoopCompat.setStatus(ioCtx, status);
            }

            public void incrCounter(String group, String counter, long amount) {
              if (ioCtx != null)
                HadoopCompat.incrementCounter(ioCtx.getCounter(group, counter), amount);
            }

            @SuppressWarnings("unchecked")
            public void incrCounter(Enum<?> key, long amount) {
              if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(key), amount);
            }

            public org.apache.hadoop.mapred.InputSplit getInputSplit()
                throws UnsupportedOperationException {
              throw new UnsupportedOperationException();
            }

            public Counter getCounter(String group, String name) {
              return ioCtx != null ? (Counter) HadoopCompat.getCounter(ioCtx, group, name) : null;
            }

            @SuppressWarnings("unchecked")
            public Counter getCounter(Enum<?> name) {
              return ioCtx != null ? (Counter) ioCtx.getCounter(name) : null;
            }
          };

      realReader =
          realInputFormat.getRecordReader(
              oldSplit, (JobConf) HadoopCompat.getConfiguration(context), reporter);

      keyObj = realReader.createKey();
      valueObj = realReader.createValue();
    }
 @Override
 public RecordReader<MapWritable, MapWritable> getRecordReader(
     InputSplit split, JobConf job, Reporter reporter) throws IOException {
   return baseInputFormat.getRecordReader(split, job, reporter);
 }