Ejemplo n.º 1
0
  private void verifyCompressedFile(Path f, int expectedNumLines) throws IOException {
    Configuration conf = new Configuration();
    conf.set("fs.default.name", "file:///");
    FileSystem fs = FileSystem.get(conf);
    InputStream is = fs.open(f);
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(f);
    LOG.info("gzip check codec is " + codec);
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    if (null == decompressor) {
      LOG.info("Verifying gzip sanity with null decompressor");
    } else {
      LOG.info("Verifying gzip sanity with decompressor: " + decompressor.toString());
    }
    is = codec.createInputStream(is, decompressor);
    BufferedReader r = new BufferedReader(new InputStreamReader(is));
    int numLines = 0;
    while (true) {
      String ln = r.readLine();
      if (ln == null) {
        break;
      }
      numLines++;
    }

    r.close();
    assertEquals("Did not read back correct number of lines", expectedNumLines, numLines);
    LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
  }
Ejemplo n.º 2
0
 public void returnDecompressor(Decompressor decompressor) {
   if (decompressor != null) {
     CodecPool.returnDecompressor(decompressor);
     if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
       decompressor.end();
     }
   }
 }
Ejemplo n.º 3
0
 public void returnDecompressor(Decompressor decompressor) {
   if (decompressor != null) {
     if (LOG.isTraceEnabled()) LOG.trace("Returning decompressor " + decompressor + " to pool.");
     CodecPool.returnDecompressor(decompressor);
     if (decompressor.getClass().isAnnotationPresent(DoNotPool.class)) {
       if (LOG.isTraceEnabled()) LOG.trace("Ending decompressor " + decompressor);
       decompressor.end();
     }
   }
 }
Ejemplo n.º 4
0
 public synchronized void close() throws IOException {
   try {
     if (in != null) {
       in.close();
     }
   } finally {
     if (decompressor != null) {
       CodecPool.returnDecompressor(decompressor);
       decompressor = null;
     }
   }
 }
Ejemplo n.º 5
0
 public Compressor getCompressor() {
   CompressionCodec codec = getCodec(conf);
   if (codec != null) {
     Compressor compressor = CodecPool.getCompressor(codec);
     if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool.");
     if (compressor != null) {
       if (compressor.finished()) {
         // Somebody returns the compressor to CodecPool but is still using it.
         LOG.warn("Compressor obtained from CodecPool is already finished()");
       }
       compressor.reset();
     }
     return compressor;
   }
   return null;
 }
Ejemplo n.º 6
0
 public Compressor getCompressor() {
   CompressionCodec codec = getCodec(conf);
   if (codec != null) {
     Compressor compressor = CodecPool.getCompressor(codec);
     if (compressor != null) {
       if (compressor.finished()) {
         // Somebody returns the compressor to CodecPool but is still using
         // it.
         LOG.warn("Compressor obtained from CodecPool is already finished()");
         // throw new AssertionError(
         // "Compressor obtained from CodecPool is already finished()");
       }
       compressor.reset();
     }
     return compressor;
   }
   return null;
 }
Ejemplo n.º 7
0
  public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);

    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
      isCompressedInput = true;
      decompressor = CodecPool.getDecompressor(codec);
      if (codec instanceof SplittableCompressionCodec) {
        final SplitCompressionInputStream cIn =
            ((SplittableCompressionCodec) codec)
                .createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
        in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
        start = cIn.getAdjustedStart();
        end = cIn.getAdjustedEnd();
        filePosition = cIn;
      } else {
        in =
            new SplitLineReader(
                codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes);
        filePosition = fileIn;
      }
    } else {
      fileIn.seek(start);
      in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
      start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
  }
  public Fetcher(
      Configuration job,
      ShuffleScheduler scheduler,
      MergeManager merger,
      ShuffleClientMetrics metrics,
      Shuffle shuffle,
      SecretKey jobTokenSecret,
      TezInputContext inputContext)
      throws IOException {
    this.job = job;
    this.scheduler = scheduler;
    this.merger = merger;
    this.metrics = metrics;
    this.shuffle = shuffle;
    this.id = ++nextId;
    this.jobTokenSecret = jobTokenSecret;
    ioErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.IO_ERROR.toString());
    wrongLengthErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_LENGTH.toString());
    badIdErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.BAD_ID.toString());
    wrongMapErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_MAP.toString());
    connectionErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.CONNECTION.toString());
    wrongReduceErrs =
        inputContext
            .getCounters()
            .findCounter(SHUFFLE_ERR_GRP_NAME, ShuffleErrors.WRONG_REDUCE.toString());

    if (ConfigUtils.isIntermediateInputCompressed(job)) {
      Class<? extends CompressionCodec> codecClass =
          ConfigUtils.getIntermediateInputCompressorClass(job, DefaultCodec.class);
      codec = ReflectionUtils.newInstance(codecClass, job);
      decompressor = CodecPool.getDecompressor(codec);
    } else {
      codec = null;
      decompressor = null;
    }

    this.connectionTimeout =
        job.getInt(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_CONNECT_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_STALLED_COPY_TIMEOUT);
    this.readTimeout =
        job.getInt(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_READ_TIMEOUT);

    setName("fetcher#" + id);
    setDaemon(true);

    synchronized (Fetcher.class) {
      sslShuffle =
          job.getBoolean(
              TezJobConfig.TEZ_RUNTIME_SHUFFLE_ENABLE_SSL,
              TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_ENABLE_SSL);
      if (sslShuffle && sslFactory == null) {
        sslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, job);
        try {
          sslFactory.init();
        } catch (Exception ex) {
          sslFactory.destroy();
          throw new RuntimeException(ex);
        }
      }
    }
  }
Ejemplo n.º 9
0
 public void returnCompressor(Compressor compressor) {
   if (compressor != null) {
     if (LOG.isTraceEnabled()) LOG.trace("Returning compressor " + compressor + " to pool.");
     CodecPool.returnCompressor(compressor);
   }
 }
Ejemplo n.º 10
0
 public void returnDecompressor(Decompressor decompressor) {
   if (decompressor != null) {
     CodecPool.returnDecompressor(decompressor);
   }
 }
Ejemplo n.º 11
0
 public void returnCompressor(Compressor compressor) {
   if (compressor != null) {
     CodecPool.returnCompressor(compressor);
   }
 }
Ejemplo n.º 12
0
  @Test
  public void testHadoop20JHParser() throws Exception {
    // Disabled
    if (true) return;

    final Configuration conf = new Configuration();
    final FileSystem lfs = FileSystem.getLocal(conf);

    boolean success = false;

    final Path rootInputDir =
        new Path(System.getProperty("test.tools.input.dir", "")).makeQualified(lfs);
    final Path rootTempDir =
        new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(lfs);

    final Path rootInputPath = new Path(rootInputDir, "rumen/small-trace-test");
    final Path tempDir = new Path(rootTempDir, "TestHadoop20JHParser");
    lfs.delete(tempDir, true);

    final Path inputPath = new Path(rootInputPath, "v20-single-input-log.gz");
    final Path goldPath = new Path(rootInputPath, "v20-single-input-log-event-classes.text.gz");

    InputStream inputLogStream = new PossiblyDecompressedInputStream(inputPath, conf);

    InputStream inputGoldStream = new PossiblyDecompressedInputStream(goldPath, conf);

    BufferedInputStream bis = new BufferedInputStream(inputLogStream);
    bis.mark(10000);
    Hadoop20JHParser parser = new Hadoop20JHParser(bis);

    final Path resultPath = new Path(tempDir, "result.text");

    System.out.println("testHadoop20JHParser sent its output to " + resultPath);

    Compressor compressor;

    FileSystem fs = resultPath.getFileSystem(conf);
    CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(resultPath);
    OutputStream output;
    if (codec != null) {
      compressor = CodecPool.getCompressor(codec);
      output = codec.createOutputStream(fs.create(resultPath), compressor);
    } else {
      output = fs.create(resultPath);
    }

    PrintStream printStream = new PrintStream(output);

    try {
      assertEquals(
          "Hadoop20JHParser can't parse the test file",
          true,
          Hadoop20JHParser.canParse(inputLogStream));

      bis.reset();

      HistoryEvent event = parser.nextEvent();

      while (event != null) {
        printStream.println(event.getClass().getCanonicalName());
        event = parser.nextEvent();
      }

      printStream.close();

      LineReader goldLines = new LineReader(inputGoldStream);
      LineReader resultLines =
          new LineReader(new PossiblyDecompressedInputStream(resultPath, conf));

      int lineNumber = 1;

      try {
        Text goldLine = new Text();
        Text resultLine = new Text();

        int goldRead = goldLines.readLine(goldLine);
        int resultRead = resultLines.readLine(resultLine);

        while (goldRead * resultRead != 0) {
          if (!goldLine.equals(resultLine)) {
            assertEquals("Type mismatch detected", goldLine, resultLine);
            break;
          }

          goldRead = goldLines.readLine(goldLine);
          resultRead = resultLines.readLine(resultLine);

          ++lineNumber;
        }

        if (goldRead != resultRead) {
          assertEquals(
              "the "
                  + (goldRead > resultRead ? "gold" : resultRead)
                  + " file contains more text at line "
                  + lineNumber,
              goldRead,
              resultRead);
        }

        success = true;
      } finally {
        goldLines.close();
        resultLines.close();

        if (success) {
          lfs.delete(resultPath, false);
        }
      }

    } finally {
      if (parser == null) {
        inputLogStream.close();
      } else {
        if (parser != null) {
          parser.close();
        }
      }

      if (inputGoldStream != null) {
        inputGoldStream.close();
      }

      // it's okay to do this twice [if we get an error on input]
      printStream.close();
    }
  }