Пример #1
0
 /** Ensures orcReader is initialized for the split. */
 private void ensureOrcReader() throws IOException {
   if (orcReader != null) return;
   Path path = HdfsUtils.getFileIdPath(fs, split.getPath(), fileId);
   if (DebugUtils.isTraceOrcEnabled()) {
     LOG.info("Creating reader for " + path + " (" + split.getPath() + ")");
   }
   long startTime = counters.startTimeCounter();
   ReaderOptions opts = OrcFile.readerOptions(conf).filesystem(fs).fileMetadata(fileMetadata);
   orcReader = EncodedOrcFile.createReader(path, opts);
   counters.incrTimeCounter(Counter.HDFS_TIME_US, startTime);
 }
  private void processKeyValuePairs(Object key, Object value) throws HiveException {
    String filePath = "";
    try {
      OrcFileValueWrapper v;
      OrcFileKeyWrapper k;
      if (key instanceof CombineHiveKey) {
        k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey();
      } else {
        k = (OrcFileKeyWrapper) key;
      }

      // skip incompatible file, files that are missing stripe statistics are set to incompatible
      if (k.isIncompatFile()) {
        LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath());
        incompatFileSet.add(k.getInputPath());
        return;
      }

      filePath = k.getInputPath().toUri().getPath();

      fixTmpPath(k.getInputPath().getParent());

      v = (OrcFileValueWrapper) value;

      if (prevPath == null) {
        prevPath = k.getInputPath();
        reader = OrcFile.createReader(fs, k.getInputPath());
        if (isLogInfoEnabled) {
          LOG.info("ORC merge file input path: " + k.getInputPath());
        }
      }

      // store the orc configuration from the first file. All other files should
      // match this configuration before merging else will not be merged
      if (outWriter == null) {
        compression = k.getCompression();
        compressBuffSize = k.getCompressBufferSize();
        version = k.getVersion();
        columnCount = k.getTypes().get(0).getSubtypesCount();
        rowIndexStride = k.getRowIndexStride();

        OrcFile.WriterOptions options =
            OrcFile.writerOptions(jc)
                .compress(compression)
                .version(version)
                .rowIndexStride(rowIndexStride)
                .inspector(reader.getObjectInspector());
        // compression buffer size should only be set if compression is enabled
        if (compression != CompressionKind.NONE) {
          // enforce is required to retain the buffer sizes of old files instead of orc writer
          // inferring the optimal buffer size
          options.bufferSize(compressBuffSize).enforceBufferSize();
        }

        outWriter = OrcFile.createWriter(outPath, options);
        if (isLogDebugEnabled) {
          LOG.info("ORC merge file output path: " + outPath);
        }
      }

      if (!checkCompatibility(k)) {
        incompatFileSet.add(k.getInputPath());
        return;
      }

      // next file in the path
      if (!k.getInputPath().equals(prevPath)) {
        reader = OrcFile.createReader(fs, k.getInputPath());
      }

      // initialize buffer to read the entire stripe
      byte[] buffer = new byte[(int) v.getStripeInformation().getLength()];
      fdis = fs.open(k.getInputPath());
      fdis.readFully(
          v.getStripeInformation().getOffset(),
          buffer,
          0,
          (int) v.getStripeInformation().getLength());

      // append the stripe buffer to the new ORC file
      outWriter.appendStripe(
          buffer, 0, buffer.length, v.getStripeInformation(), v.getStripeStatistics());

      if (isLogInfoEnabled) {
        LOG.info(
            "Merged stripe from file "
                + k.getInputPath()
                + " [ offset : "
                + v.getStripeInformation().getOffset()
                + " length: "
                + v.getStripeInformation().getLength()
                + " row: "
                + v.getStripeStatistics().getColStats(0).getNumberOfValues()
                + " ]");
      }

      // add user metadata to footer in case of any
      if (v.isLastStripeInFile()) {
        outWriter.appendUserMetadata(v.getUserMetadata());
      }
    } catch (Throwable e) {
      this.exception = true;
      LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e));
      throw new HiveException(e);
    } finally {
      if (exception) {
        closeOp(true);
      }
      if (fdis != null) {
        try {
          fdis.close();
        } catch (IOException e) {
          throw new HiveException(String.format("Unable to close file %s", filePath), e);
        } finally {
          fdis = null;
        }
      }
    }
  }