/**
   * Instantiate a RecordWriter as required. This will create an RecordWriter from the internal
   * AccumuloOutputFormat
   */
  @Override
  public RecordWriter getRecordWriter(TaskAttemptContext context)
      throws IOException, InterruptedException {

    if (zoomLevel == -1) {
      zoomLevel =
          Integer.parseInt(
              context.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL));
    }

    if (_innerFormat == null) {
      initialize(context);
    }

    if (_innerRecordWriter == null) {
      _innerRecordWriter = _innerFormat.getRecordWriter(context);
    }
    String pl = context.getConfiguration().get(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ);
    if (colViz == null) {
      colViz = new ColumnVisibility(pl);
    }
    AccumuloMrGeoRecordWriter outRW =
        new AccumuloMrGeoRecordWriter(
            zoomLevel, table, _innerRecordWriter, new String(colViz.getExpression()));

    return outRW;
  } // end getRecordWriter
Beispiel #2
0
    private TaskAttemptContext getContext(String namedOutput, TaskAttemptContext baseContext)
        throws IOException {
      Job job = getJob(baseContext.getJobID(), namedOutput, baseContext.getConfiguration());
      configureJob(namedOutput, job, outputs.get(namedOutput));

      return getTaskContext(baseContext, job);
    }
  @Override
  public void initialize(InputSplit split, TaskAttemptContext context)
      throws IOException, InterruptedException {
    ObjectMapper jsonMapper = DruidInitialization.getInstance().getObjectMapper();
    SegmentLoadSpec spec = readSegmentJobSpec(context.getConfiguration(), jsonMapper);

    final List<String> dimensions = spec.getDimensions();
    final List<String> metrics = spec.getMetrics();
    final DimFilter filter = spec.getFilter();
    final Interval interval =
        new Interval(context.getConfiguration().get(DruidInputFormat.CONF_DRUID_INTERVAL));

    String hdfsPath = ((DruidInputSplit) split).getPath();
    logger.info("Reading segment from " + hdfsPath);

    segmentDir = Files.createTempDir();
    logger.info("segment dir: " + segmentDir);

    FileSystem fs = FileSystem.get(context.getConfiguration());
    getSegmentFiles(hdfsPath, segmentDir, fs);
    logger.info("finished getting segment files");

    QueryableIndex index = IndexIO.loadIndex(segmentDir);
    StorageAdapter adapter = new QueryableIndexStorageAdapter(index);
    List<StorageAdapter> adapters = Lists.newArrayList(adapter);
    rowYielder =
        new IngestSegmentFirehose(
            adapters, dimensions, metrics, filter, interval, QueryGranularity.NONE);
  }
  @Override
  public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {

    // Obtain path to input list of input images and open input stream
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();
    FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fileSystem.open(path);

    // Note the start and length fields in the FileSplit object are being used to
    // convey a range of lines in the input list of image URLs
    startLine = fileSplit.getStart();
    numLines = fileSplit.getLength();
    linesRead = 0; // total lines read by this particular record reader instance
    linesPerRecord = 100; // can be modified to change key/value pair size (may improve efficiency)

    // If it exists, get the relevant compression codec for the FileSplit
    CompressionCodecFactory codecFactory = new CompressionCodecFactory(context.getConfiguration());
    CompressionCodec codec = codecFactory.getCodec(path);

    // If the codec was found, use it to create an decompressed input stream.
    // Otherwise, assume input stream is already decompressed
    if (codec != null) {
      reader = new BufferedReader(new InputStreamReader(codec.createInputStream(fileIn)));
    } else {
      reader = new BufferedReader(new InputStreamReader(fileIn));
    }
  }
    public RecordReader<IntWritable, IntWritable> createRecordReader(
        InputSplit ignored, TaskAttemptContext taskContext) throws IOException {
      Configuration conf = taskContext.getConfiguration();

      final int count = conf.getInt(MAP_SLEEP_COUNT, 1);
      if (count < 0) {
        throw new IOException("Invalid map count: " + count);
      }

      int totalIReduces = conf.getInt(IREDUCE_STAGES_COUNT, 1);

      int reduceTasks =
          totalIReduces == 0
              ? taskContext.getNumReduceTasks()
              : conf.getInt(IREDUCE_TASKS_COUNT, 1);
      int sleepCount =
          totalIReduces == 0
              ? conf.getInt(REDUCE_SLEEP_COUNT, 1)
              : conf.getInt(IREDUCE_SLEEP_COUNT, 1);
      final int emitPerMapTask = sleepCount * reduceTasks;

      return new RecordReader<IntWritable, IntWritable>() {
        private int records = 0;
        private int emitCount = 0;
        private IntWritable key = null;
        private IntWritable value = null;

        public void initialize(InputSplit split, TaskAttemptContext context) {}

        public boolean nextKeyValue() throws IOException {
          if (count == 0) {
            return false;
          }
          key = new IntWritable();
          key.set(emitCount);
          int emit = emitPerMapTask / count;
          if ((emitPerMapTask) % count > records) {
            ++emit;
          }
          emitCount += emit;
          value = new IntWritable();
          value.set(emit);
          return records++ < count;
        }

        public IntWritable getCurrentKey() {
          return key;
        }

        public IntWritable getCurrentValue() {
          return value;
        }

        public void close() throws IOException {}

        public float getProgress() throws IOException {
          return count == 0 ? 100 : records / ((float) count);
        }
      };
    }
Beispiel #6
0
 public static TaskAttemptContext createTaskAttemptContext(
     org.apache.hadoop.mapreduce.TaskAttemptContext context) {
   return createTaskAttemptContext(
       new JobConf(context.getConfiguration()),
       org.apache.hadoop.mapred.TaskAttemptID.forName(context.getTaskAttemptID().toString()),
       Reporter.NULL);
 }
Beispiel #7
0
    private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(
        boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
      org.apache.hadoop.mapreduce.OutputCommitter committer = null;

      LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class"));

      if (newApiCommitter) {
        org.apache.hadoop.mapreduce.TaskID taskId =
            new org.apache.hadoop.mapreduce.TaskID(jobId, true, 0);
        org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
            new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
        org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
            new TaskAttemptContextImpl(conf, taskAttemptID);
        OutputFormat outputFormat =
            ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
        committer = outputFormat.getOutputCommitter(taskContext);
      } else {
        committer =
            ReflectionUtils.newInstance(
                conf.getClass(
                    "mapred.output.committer.class",
                    FileOutputCommitter.class,
                    org.apache.hadoop.mapred.OutputCommitter.class),
                conf);
      }
      LOG.info("OutputCommitter is " + committer.getClass().getName());
      return committer;
    }
  @Override
  public RecordWriter<IEtlKey, CamusWrapper> getDataRecordWriter(
      TaskAttemptContext context, String fileName, CamusWrapper data, FileOutputCommitter committer)
      throws IOException, InterruptedException {

    // If recordDelimiter hasn't been initialized, do so now
    if (recordDelimiter == null) {
      recordDelimiter =
          context.getConfiguration().get(ETL_OUTPUT_RECORD_DELIMITER, DEFAULT_RECORD_DELIMITER);
    }

    // Get the filename for this RecordWriter.
    Path path =
        new Path(
            committer.getWorkPath(),
            EtlMultiOutputFormat.getUniqueFile(context, fileName, getFilenameExtension()));

    //		final FSDataOutputStream writer =
    // path.getFileSystem(context.getConfiguration()).create(path);

    FileSystem fs = path.getFileSystem(context.getConfiguration());
    DataOutputStream writer; // = fs.create(path, false);
    if (isCompressed) {
      return new ByteRecordWriter(
          new DataOutputStream(codec.createOutputStream(fs.create(path, false))), recordDelimiter);
    } else {
      return new ByteRecordWriter(fs.create(path, false), recordDelimiter);
    }
  }
  @Test
  public void testDeleteMissing() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String sourceBase;
    String targetBase;
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      sourceBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      targetBase = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      String targetBaseAdd = TestDistCpUtils.createTestSetup(fs, FsPermission.getDefault());
      fs.rename(new Path(targetBaseAdd), new Path(targetBase));

      DistCpOptions options =
          new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
      options.setSyncFolder(true);
      options.setDeleteMissing(true);
      options.appendToConf(conf);

      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
      listing.buildListing(listingFile, options);

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);
      conf.set(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH, targetBase);

      committer.commitJob(jobContext);
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
        Assert.fail("Source and target folders are not in sync");
      }

      // Test for idempotent commit
      committer.commitJob(jobContext);
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, targetBase, sourceBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
      if (!TestDistCpUtils.checkIfFoldersAreInSync(fs, sourceBase, targetBase)) {
        Assert.fail("Source and target folders are not in sync");
      }
    } catch (Throwable e) {
      LOG.error("Exception encountered while testing for delete missing", e);
      Assert.fail("Delete missing failure");
    } finally {
      TestDistCpUtils.delete(fs, "/tmp1");
      conf.set(DistCpConstants.CONF_LABEL_DELETE_MISSING, "false");
    }
  }
    @Override
    public void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
      // 初始化函数
      // System.out.println("initialize");
      // Logger.getLogger("KirchhoffMigration").log(Level.INFO,
      // "enter initialize()");
      FileSplit inputsplit = (FileSplit) split;
      Configuration conf = context.getConfiguration();
      LENGTH = conf.getLong("FileSplitLength", 0);
      SPILL = LENGTH / conf.getInt("SplitPerMap", 1);
      // LENGTH = 8;
      // SPILL = 8;
      assert (LENGTH >= SPILL);
      // System.out.println("length:" + LENGTH);
      // System.out.println("spill:" + SPILL);
      String filename = inputsplit.getPath().toString();
      // System.out.println("filename:" + filename);
      //            String buf = filename.substring(filename.lastIndexOf("fcxy") + 4,
      //                    filename.lastIndexOf("."));
      //            int count = Integer.parseInt(buf);
      // System.out.println(filename);
      // start = inputsplit.getStart(); // 得到此分片开始位置
      start = inputsplit.getStart();
      shotNum += start * 8 / Float.SIZE;
      long offset = LENGTH >= inputsplit.getLength() ? inputsplit.getLength() : LENGTH;
      end = start + offset; // 结束此分片位置
      // System.out.println("inputSplitLength:" + split.getLength());
      // System.out.println("end:" + end);
      // start = inputsplit.getStart(); // 得到此分片开始位置
      // end = start + inputsplit.getLength();// 结束此分片位置
      // System.out.println("start:" + start + " ,end:" + end);
      final Path file = inputsplit.getPath();
      // System.out.println(file.toString());
      // 打开文件
      FileSystem fs = file.getFileSystem(context.getConfiguration());
      fileIn = fs.open(inputsplit.getPath());

      // 关键位置2
      // 将文件指针移动到当前分片,因为每次默认打开文件时,`其指针指向开头
      fileIn.seek(start);

      // in = new LineReader(fileIn, context.getConfiguration());

      // if (start != 0) {
      // System.out.println("not the first split");
      // // 关键解决位置1
      // //
      // 如果这不是第一个分片,那么假设第一个分片是0——4,那么,第4个位置已经被读取,则需要跳过4,否则会产生读入错误,因为你回头又去读之前读过的地方
      // start += (end - pos + 1);
      // }
      pos = start;
    }
  @Override
  public void initialize(InputSplit input, TaskAttemptContext tac)
      throws IOException, InterruptedException {
    super.initialize(input, tac);
    skipNonArticles = tac.getConfiguration().getBoolean(SKIP_NON_ARTICLES, false);
    skipRedirect = tac.getConfiguration().getBoolean(SKIP_REDIRECT, false);

    LOG.info(
        "Splitting option: [skip non-article: "
            + skipNonArticles
            + ", skip redirect: "
            + SKIP_REDIRECT
            + "]");
  }
    @Override
    @SuppressWarnings("unchecked")
    public void initialize(InputSplit split, TaskAttemptContext ctx)
        throws IOException, InterruptedException {
      // set up columns that needs to read from the RCFile.

      tDesc = TStructDescriptor.getInstance(typeRef.getRawClass());
      thriftWritable = ThriftWritable.newInstance((Class<TBase<?, ?>>) typeRef.getRawClass());
      final List<Field> tFields = tDesc.getFields();

      FileSplit fsplit = (FileSplit) split;
      Path file = fsplit.getPath();

      LOG.info(
          String.format(
              "reading %s from %s:%d:%d",
              typeRef.getRawClass().getName(),
              file.toString(),
              fsplit.getStart(),
              fsplit.getStart() + fsplit.getLength()));

      ColumnarMetadata storedInfo = RCFileUtil.readMetadata(ctx.getConfiguration(), file);

      // list of field numbers
      List<Integer> tFieldIds =
          Lists.transform(
              tFields,
              new Function<Field, Integer>() {
                public Integer apply(Field fd) {
                  return Integer.valueOf(fd.getFieldId());
                }
              });

      columnsBeingRead =
          RCFileUtil.findColumnsToRead(ctx.getConfiguration(), tFieldIds, storedInfo);

      for (int idx : columnsBeingRead) {
        int fid = storedInfo.getFieldId(idx);
        if (fid >= 0) {
          knownRequiredFields.add(tFields.get(tFieldIds.indexOf(fid)));
        } else {
          readUnknownsColumn = true;
        }
      }

      ColumnProjectionUtils.setReadColumnIDs(ctx.getConfiguration(), columnsBeingRead);

      // finally!
      super.initialize(split, ctx);
    }
  @Override
  public RecordReader<Text, PairOfByteBuffers> createRecordReader(
      final InputSplit split, final TaskAttemptContext context) {
    String delimiter = context.getConfiguration().get("textinputformat.record.delimiter");
    byte[] recordDelimiterBytes = null;
    if (null != delimiter) {
      recordDelimiterBytes = delimiter.getBytes(Charsets.UTF_8);
    }
    LOG.info("recordDelimiterBytes = " + recordDelimiterBytes);

    int recordSize = context.getConfiguration().getInt(CONF_KEY_RECORD_SIZE, DEFAULT_RECORD_SIZE);
    LOG.info("recordSize = " + recordSize);
    return new ARFFManyLineRecordReader(recordDelimiterBytes, recordSize);
  }
Beispiel #14
0
 public static OutputCommitter getOutputCommitter(TaskAttemptContext tac)
     throws IOException, InterruptedException {
   Map<String, OutputConfig> outputs = getNamedOutputs(tac.getConfiguration());
   Map<String, OutputCommitter> committers = Maps.newHashMap();
   for (Map.Entry<String, OutputConfig> e : outputs.entrySet()) {
     String namedOutput = e.getKey();
     Job job = getJob(tac.getJobID(), e.getKey(), tac.getConfiguration());
     OutputFormat fmt = getOutputFormat(namedOutput, job, e.getValue());
     TaskAttemptContext taskContext = getTaskContext(tac, job);
     OutputCommitter oc = fmt.getOutputCommitter(taskContext);
     committers.put(namedOutput, oc);
   }
   return new CompositeOutputCommitter(outputs, committers);
 }
  @Test
  public void testPreserveStatus() {
    TaskAttemptContext taskAttemptContext = getTaskAttemptContext(config);
    JobContext jobContext =
        new JobContextImpl(
            taskAttemptContext.getConfiguration(),
            taskAttemptContext.getTaskAttemptID().getJobID());
    Configuration conf = jobContext.getConfiguration();

    String sourceBase;
    String targetBase;
    FileSystem fs = null;
    try {
      OutputCommitter committer = new CopyCommitter(null, taskAttemptContext);
      fs = FileSystem.get(conf);
      FsPermission sourcePerm = new FsPermission((short) 511);
      FsPermission initialPerm = new FsPermission((short) 448);
      sourceBase = TestDistCpUtils.createTestSetup(fs, sourcePerm);
      targetBase = TestDistCpUtils.createTestSetup(fs, initialPerm);

      DistCpOptions options =
          new DistCpOptions(Arrays.asList(new Path(sourceBase)), new Path("/out"));
      options.preserve(FileAttribute.PERMISSION);
      options.appendToConf(conf);

      CopyListing listing = new GlobbedCopyListing(conf, CREDENTIALS);
      Path listingFile = new Path("/tmp1/" + String.valueOf(rand.nextLong()));
      listing.buildListing(listingFile, options);

      conf.set(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH, targetBase);

      committer.commitJob(jobContext);
      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
        Assert.fail("Permission don't match");
      }

      // Test for idempotent commit
      committer.commitJob(jobContext);
      if (!checkDirectoryPermissions(fs, targetBase, sourcePerm)) {
        Assert.fail("Permission don't match");
      }

    } catch (IOException e) {
      LOG.error("Exception encountered while testing for preserve status", e);
      Assert.fail("Preserve status failure");
    } finally {
      TestDistCpUtils.delete(fs, "/tmp1");
    }
  }
Beispiel #16
0
  @Override
  public void initialize(InputSplit split, TaskAttemptContext context)
      throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) split;
    LOG.info("Initializing TFileRecordReader : " + fileSplit.getPath().toString());
    start = fileSplit.getStart();
    end = start + fileSplit.getLength();

    FileSystem fs = fileSplit.getPath().getFileSystem(context.getConfiguration());
    splitPath = fileSplit.getPath();
    fin = fs.open(splitPath);
    reader =
        new TFile.Reader(fin, fs.getFileStatus(splitPath).getLen(), context.getConfiguration());
    scanner = reader.createScannerByByteRange(start, fileSplit.getLength());
  }
 @Override
 public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job)
     throws IOException, InterruptedException {
   return new RedisHashRecordWriter(
       job.getConfiguration().get(REDIS_HASH_KEY_CONF),
       job.getConfiguration().get(REDIS_HOSTS_CONF));
 }
  @Override
  public RecordWriter<IntWritable, Double2DArrayWritable> getRecordWriter(
      TaskAttemptContext context) throws IOException, InterruptedException {

    // setup variables for image generation
    FileSystem fs = FileSystem.get(context.getConfiguration());
    Path picTempPath = FileOutputFormat.getOutputPath(context);
    fs.mkdirs(picTempPath);
    int k = context.getConfiguration().getInt("k", -1);

    Path imgPath = picTempPath.suffix("/points.png");

    if (k == -1) throw new RuntimeException("k is -1");

    return new PicRecordWriter(imgPath, k, context.getConfiguration());
  }
  @SuppressWarnings("unchecked")
  @Override
  public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context)
      throws IOException, InterruptedException {

    Configuration conf = context.getConfiguration();
    String keyValueSeparator = conf.get(MAPRED_SEPARATOR, "\t");
    String extension = "";

    // Create the output streams
    Path files[] = getDefaultWorkFiles(context, extension);
    DataOutputStream[] outStreams = null;

    if (files != null && files.length != 0) {
      outStreams = new DataOutputStream[files.length];
      for (int i = 0; i < files.length; ++i) {
        outStreams[i] = files[i].getFileSystem(conf).create(files[i], false);
      }
    }

    // Create the record writer selector
    Class<? extends MultiTextRecordWriterSelector> selectorClass =
        getRecordWriterSelectorClass(context);
    MultiTextRecordWriterSelector<K, V> selector =
        (MultiTextRecordWriterSelector<K, V>) ReflectionUtils.newInstance(selectorClass, conf);

    return new MultiTextRecordWriter<K, V>(outStreams, keyValueSeparator, selector);
  }
    public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
      FileSplit split = (FileSplit) genericSplit;
      Configuration job = context.getConfiguration();
      m_Sb.setLength(0);
      m_Start = split.getStart();
      m_End = m_Start + split.getLength();
      final Path file = split.getPath();
      compressionCodecs = new CompressionCodecFactory(job);
      final CompressionCodec codec = compressionCodecs.getCodec(file);

      // open the file and seek to the m_Start of the split
      FileSystem fs = file.getFileSystem(job);
      //  getFileStatus fileStatus = fs.getFileStatus(split.getPath());
      //noinspection deprecation
      @SuppressWarnings(value = "deprecated")
      long length = fs.getLength(file);
      FSDataInputStream fileIn = fs.open(split.getPath());
      if (m_Start > 0) fileIn.seek(m_Start);
      if (codec != null) {
        CompressionInputStream inputStream = codec.createInputStream(fileIn);
        m_Input = new BufferedReader(new InputStreamReader(inputStream));
        m_End = length;
      } else {
        m_Input = new BufferedReader(new InputStreamReader(fileIn));
      }
      m_Current = m_Start;
      m_Key = split.getPath().getName();
    }
 @Override
 public void open(TaskAttemptContext job) throws IOException {
   Configuration conf = job.getConfiguration();
   solr = SolrUtils.getCommonsHttpSolrServer(conf);
   commitSize = conf.getInt(SolrConstants.COMMIT_SIZE, 1000);
   solrMapping = SolrMappingReader.getInstance(conf);
 }
  /**
   * Write random values to the writer assuming a table created using {@link #FAMILIES} as column
   * family descriptors
   */
  private void writeRandomKeyValues(
      RecordWriter<ImmutableBytesWritable, Cell> writer,
      TaskAttemptContext context,
      Set<byte[]> families,
      int numRows)
      throws IOException, InterruptedException {
    byte keyBytes[] = new byte[Bytes.SIZEOF_INT];
    int valLength = 10;
    byte valBytes[] = new byte[valLength];

    int taskId = context.getTaskAttemptID().getTaskID().getId();
    assert taskId < Byte.MAX_VALUE : "Unit tests dont support > 127 tasks!";
    final byte[] qualifier = Bytes.toBytes("data");
    Random random = new Random();
    for (int i = 0; i < numRows; i++) {

      Bytes.putInt(keyBytes, 0, i);
      random.nextBytes(valBytes);
      ImmutableBytesWritable key = new ImmutableBytesWritable(keyBytes);

      for (byte[] family : families) {
        Cell kv = new KeyValue(keyBytes, family, qualifier, valBytes);
        writer.write(key, kv);
      }
    }
  }
  private DataOutputStream createRawOutputStream(TaskAttemptContext ctx) throws IOException {
    boolean isCompressed = getCompressOutput(ctx);

    if (!isCompressed) {
      Path file = getDefaultWorkFile(ctx, ".nt");
      FileSystem fs = file.getFileSystem(ctx.getConfiguration());
      return fs.create(file, false);
    } else {
      Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(ctx, GzipCodec.class);

      CompressionCodec codec = ReflectionUtils.newInstance(codecClass, ctx.getConfiguration());
      Path file = getDefaultWorkFile(ctx, ".nt" + codec.getDefaultExtension());
      FileSystem fs = file.getFileSystem(ctx.getConfiguration());
      FSDataOutputStream fileOut = fs.create(file, false);
      return new DataOutputStream(codec.createOutputStream(fileOut));
    }
  }
 public RecordReader<Text, SequencedFragment> createRecordReader(
     InputSplit genericSplit, TaskAttemptContext context)
     throws IOException, InterruptedException {
   context.setStatus(genericSplit.toString());
   return new QseqRecordReader(
       ContextUtil.getConfiguration(context),
       (FileSplit) genericSplit); // cast as per example in TextInputFormat
 }
Beispiel #25
0
 @Override
 public void initialize(InputSplit genericSplit, TaskAttemptContext ctxt)
     throws IOException, InterruptedException {
   final GridmixSplit split = (GridmixSplit) genericSplit;
   final Configuration conf = ctxt.getConfiguration();
   factory =
       new ReadRecordFactory(
           split.getLength(), split.getInputRecords(), new FileQueue(split, conf), conf);
 }
 public WikipediaRecordReader(FileSplit split, TaskAttemptContext context) throws IOException {
   // open the file and seek to the start of the split
   start = split.getStart();
   end = start + split.getLength();
   Path file = split.getPath();
   FileSystem fs = file.getFileSystem(context.getConfiguration());
   fsin = fs.open(file);
   fsin.seek(start);
 }
 /* (non-Javadoc)
  * @see org.apache.hadoop.mapreduce.lib.output.FileOutputFormat#getRecordWriter(org.apache.hadoop.mapreduce.TaskAttemptContext)
  */
 @Override
 public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext job)
     throws IOException, InterruptedException {
   Configuration conf = job.getConfiguration();
   Path file = getDefaultWorkFile(job, "");
   FileSystem fs = file.getFileSystem(conf);
   FSDataOutputStream fileOut = fs.create(file, false);
   return new InterRecordWriter(fileOut);
 }
 @Override
 public RecordReader<LongWritable, Writable> createRecordReader(
     InputSplit split, TaskAttemptContext taskAttempt) throws IOException, InterruptedException {
   if (typeRef == null) {
     typeRef =
         ThriftUtils.getTypeRef(taskAttempt.getConfiguration(), RCFileThriftInputFormat.class);
   }
   return new ThriftReader(createUnwrappedRecordReader(split, taskAttempt));
 }
 public void initialize(InputSplit split, TaskAttemptContext context)
     throws IOException, InterruptedException {
   super.initialize(split, context);
   assert schemas.size() == 2;
   Configuration conf = context.getConfiguration();
   conf.set(props.getProperty("AVRO_INPUT"), Submitter.AvroIO.KV.name());
   conf.set(props.getProperty("AVRO_KEY_INPUT_SCHEMA"), schemas.get(0).toString());
   conf.set(props.getProperty("AVRO_VALUE_INPUT_SCHEMA"), schemas.get(1).toString());
 }
  /** {@inheritDoc} */
  @Override
  public RecordWriter<FixedByteRecord, NullWritable> getRecordWriter(final TaskAttemptContext arg0)
      throws IOException, InterruptedException {

    final Configuration conf = arg0.getConfiguration();
    final String outputPath = conf.get(DataGenerator.OUTPUT_PATH);

    return new GeneratorRecordWriter(new Path(outputPath), conf);
  }