Java InputFormat примеры использования

Язык программирования: Java

Пространство имен/Пакет: org.apache.hadoop.mapred

Класс/Тип: InputFormat

Примеров на hotexamples.com: 8

Java InputFormat - 8 примеров найдено. Это лучшие примеры Java кода для org.apache.hadoop.mapred.InputFormat, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getSplits(4)

getRecordReader(3)

getClass(1)

Пример #1

Показать файл

Файл: HiveUtil.java Проект: albertocsm/presto

  static boolean isSplittable(InputFormat<?, ?> inputFormat, FileSystem fileSystem, Path path) {
    // ORC uses a custom InputFormat but is always splittable
    if (inputFormat.getClass().getSimpleName().equals("OrcInputFormat")) {
      return true;
    }

    // use reflection to get isSplittable method on FileInputFormat
    Method method = null;
    for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz = clazz.getSuperclass()) {
      try {
        method = clazz.getDeclaredMethod("isSplitable", FileSystem.class, Path.class);
        break;
      } catch (NoSuchMethodException ignored) {
      }
    }

    if (method == null) {
      return false;
    }
    try {
      method.setAccessible(true);
      return (boolean) method.invoke(inputFormat, fileSystem, path);
    } catch (InvocationTargetException | IllegalAccessException e) {
      throw Throwables.propagate(e);
    }
  }

Пример #2

Показать файл

Файл: MultiInputFormat.java Проект: haykinson/cascading

  private long[] getInputSizes(InputFormat[] inputFormats, JobConf[] jobConfs) throws IOException {
    long[] inputSizes = new long[inputFormats.length];

    for (int i = 0; i < inputFormats.length; i++) {
      InputFormat inputFormat = inputFormats[i];
      InputSplit[] splits = inputFormat.getSplits(jobConfs[i], 1);

      for (InputSplit split : splits) inputSizes[i] = inputSizes[i] + split.getLength();
    }

    return inputSizes;
  }

Пример #3

Показать файл

Файл: MultiInputFormat.java Проект: Galuvian/cascading

  private long[] getInputSplitSizes(InputFormat[] inputFormats, JobConf[] jobConfs, int numSplits)
      throws IOException {
    long[] inputSizes = new long[inputFormats.length];

    for (int i = 0; i < inputFormats.length; i++) {
      InputFormat inputFormat = inputFormats[i];
      InputSplit[] splits = inputFormat.getSplits(jobConfs[i], numSplits);

      inputSizes[i] = splits.length;
    }

    return inputSizes;
  }

Пример #4

Показать файл

Файл: MapReduceInputFormatWrapper.java Проект: HimanshuBhardwaj/elephant-bird

  @Override
  public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {

    JobConf jobConf = (JobConf) HadoopCompat.getConfiguration(context);

    initInputFormat(jobConf);

    org.apache.hadoop.mapred.InputSplit[] splits =
        realInputFormat.getSplits(jobConf, jobConf.getNumMapTasks());

    if (splits == null) {
      return null;
    }

    List<InputSplit> resultSplits = new ArrayList<InputSplit>(splits.length);

    for (org.apache.hadoop.mapred.InputSplit split : splits) {
      if (split.getClass() == org.apache.hadoop.mapred.FileSplit.class) {
        org.apache.hadoop.mapred.FileSplit mapredFileSplit =
            ((org.apache.hadoop.mapred.FileSplit) split);
        resultSplits.add(
            new FileSplit(
                mapredFileSplit.getPath(),
                mapredFileSplit.getStart(),
                mapredFileSplit.getLength(),
                mapredFileSplit.getLocations()));
      } else {
        resultSplits.add(new InputSplitWrapper(split));
      }
    }

    return resultSplits;
  }

Пример #5

Показать файл

Файл: HiveUtil.java Проект: albertocsm/presto

  public static RecordReader<?, ?> createRecordReader(
      Configuration configuration,
      Path path,
      long start,
      long length,
      Properties schema,
      List<HiveColumnHandle> columns) {
    // determine which hive columns we will read
    List<HiveColumnHandle> readColumns =
        ImmutableList.copyOf(filter(columns, column -> column.getColumnType() == REGULAR));
    List<Integer> readHiveColumnIndexes =
        ImmutableList.copyOf(transform(readColumns, HiveColumnHandle::getHiveColumnIndex));

    // Tell hive the columns we would like to read, this lets hive optimize reading column oriented
    // files
    setReadColumns(configuration, readHiveColumnIndexes);

    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, true);
    JobConf jobConf = new JobConf(configuration);
    FileSplit fileSplit = new FileSplit(path, start, length, (String[]) null);

    // propagate serialization configuration to getRecordReader
    schema
        .stringPropertyNames()
        .stream()
        .filter(name -> name.startsWith("serialization."))
        .forEach(name -> jobConf.set(name, schema.getProperty(name)));

    try {
      return retry()
          .stopOnIllegalExceptions()
          .run(
              "createRecordReader",
              () -> inputFormat.getRecordReader(fileSplit, jobConf, Reporter.NULL));
    } catch (Exception e) {
      throw new PrestoException(
          HIVE_CANNOT_OPEN_SPLIT,
          format(
              "Error opening Hive split %s (offset=%s, length=%s) using %s: %s",
              path, start, length, getInputFormatName(schema), e.getMessage()),
          e);
    }
  }

Пример #6

Показать файл

Файл: MapReduceInputFormatWrapper.java Проект: HimanshuBhardwaj/elephant-bird

    @Override
    public void initialize(InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {

      org.apache.hadoop.mapred.InputSplit oldSplit;

      if (split.getClass() == FileSplit.class) {
        oldSplit =
            new org.apache.hadoop.mapred.FileSplit(
                ((FileSplit) split).getPath(),
                ((FileSplit) split).getStart(),
                ((FileSplit) split).getLength(),
                split.getLocations());
      } else {
        oldSplit = ((InputSplitWrapper) split).realSplit;
      }

      @SuppressWarnings("unchecked")
      Reporter reporter = new Reporter() { // Reporter interface over ctx

            final TaskInputOutputContext ioCtx =
                context instanceof TaskInputOutputContext ? (TaskInputOutputContext) context : null;

            public void progress() {
              HadoopCompat.progress(context);
            }

            // @Override
            public float getProgress() {
              return (ioCtx != null) ? ioCtx.getProgress() : 0;
            }

            public void setStatus(String status) {
              if (ioCtx != null) HadoopCompat.setStatus(ioCtx, status);
            }

            public void incrCounter(String group, String counter, long amount) {
              if (ioCtx != null)
                HadoopCompat.incrementCounter(ioCtx.getCounter(group, counter), amount);
            }

            @SuppressWarnings("unchecked")
            public void incrCounter(Enum<?> key, long amount) {
              if (ioCtx != null) HadoopCompat.incrementCounter(ioCtx.getCounter(key), amount);
            }

            public org.apache.hadoop.mapred.InputSplit getInputSplit()
                throws UnsupportedOperationException {
              throw new UnsupportedOperationException();
            }

            public Counter getCounter(String group, String name) {
              return ioCtx != null ? (Counter) HadoopCompat.getCounter(ioCtx, group, name) : null;
            }

            @SuppressWarnings("unchecked")
            public Counter getCounter(Enum<?> name) {
              return ioCtx != null ? (Counter) ioCtx.getCounter(name) : null;
            }
          };

      realReader =
          realInputFormat.getRecordReader(
              oldSplit, (JobConf) HadoopCompat.getConfiguration(context), reporter);

      keyObj = realReader.createKey();
      valueObj = realReader.createValue();
    }

Пример #7

Показать файл

Файл: SSTableInputFormat.java Проект: richardalow/cassowary

 @Override
 public RecordReader<MapWritable, MapWritable> getRecordReader(
     InputSplit split, JobConf job, Reporter reporter) throws IOException {
   return baseInputFormat.getRecordReader(split, job, reporter);
 }

Пример #8

Показать файл

Файл: SSTableInputFormat.java Проект: richardalow/cassowary

 @Override
 public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
   return baseInputFormat.getSplits(job, numSplits);
 }