Example #1
0
 private int getRgCount(StripeInformation stripe, int rowIndexStride) {
   return (int) Math.ceil((double) stripe.getNumberOfRows() / rowIndexStride);
 }
Example #2
0
  /** Determine which stripes to read for a split. Populates stripeIxFrom and readState. */
  public void determineStripesToRead() {
    // The unit of caching for ORC is (rg x column) (see OrcBatchKey).
    List<StripeInformation> stripes = fileMetadata.getStripes();
    long offset = split.getStart(), maxOffset = offset + split.getLength();
    stripeIxFrom = -1;
    int stripeIxTo = -1;
    if (LlapIoImpl.LOGL.isDebugEnabled()) {
      String tmp = "FileSplit {" + split.getStart() + ", " + split.getLength() + "}; stripes ";
      for (StripeInformation stripe : stripes) {
        tmp += "{" + stripe.getOffset() + ", " + stripe.getLength() + "}, ";
      }
      LlapIoImpl.LOG.debug(tmp);
    }

    int stripeIx = 0;
    for (StripeInformation stripe : stripes) {
      long stripeStart = stripe.getOffset();
      if (offset > stripeStart) {
        // We assume splits will never start in the middle of the stripe.
        ++stripeIx;
        continue;
      }
      if (stripeIxFrom == -1) {
        if (DebugUtils.isTraceOrcEnabled()) {
          LlapIoImpl.LOG.info(
              "Including stripes from " + stripeIx + " (" + stripeStart + " >= " + offset + ")");
        }
        stripeIxFrom = stripeIx;
      }
      if (stripeStart >= maxOffset) {
        stripeIxTo = stripeIx;
        if (DebugUtils.isTraceOrcEnabled()) {
          LlapIoImpl.LOG.info(
              "Including stripes until "
                  + stripeIxTo
                  + " ("
                  + stripeStart
                  + " >= "
                  + maxOffset
                  + "); "
                  + (stripeIxTo - stripeIxFrom)
                  + " stripes");
        }
        break;
      }
      ++stripeIx;
    }
    if (stripeIxFrom == -1) {
      if (LlapIoImpl.LOG.isInfoEnabled()) {
        LlapIoImpl.LOG.info("Not including any stripes - empty split");
      }
    }
    if (stripeIxTo == -1 && stripeIxFrom != -1) {
      stripeIxTo = stripeIx;
      if (DebugUtils.isTraceOrcEnabled()) {
        LlapIoImpl.LOG.info(
            "Including stripes until "
                + stripeIx
                + " (end of file); "
                + (stripeIxTo - stripeIxFrom)
                + " stripes");
      }
    }
    readState = new boolean[stripeIxTo - stripeIxFrom][][];
  }
Example #3
0
  protected Void performDataRead() throws IOException {
    long startTime = counters.startTimeCounter();
    if (LlapIoImpl.LOGL.isInfoEnabled()) {
      LlapIoImpl.LOG.info("Processing data for " + split.getPath());
    }
    if (processStop()) {
      recordReaderTime(startTime);
      return null;
    }
    counters.setDesc(QueryFragmentCounters.Desc.TABLE, getDbAndTableName(split.getPath()));
    orcReader = null;
    // 1. Get file metadata from cache, or create the reader and read it.
    // Don't cache the filesystem object for now; Tez closes it and FS cache will fix all that
    fs = split.getPath().getFileSystem(conf);
    fileId = determineFileId(fs, split);
    counters.setDesc(QueryFragmentCounters.Desc.FILE, fileId);

    try {
      fileMetadata = getOrReadFileMetadata();
      consumer.setFileMetadata(fileMetadata);
      validateFileMetadata();
      if (columnIds == null) {
        columnIds = createColumnIds(fileMetadata);
      }

      // 2. Determine which stripes to read based on the split.
      determineStripesToRead();
    } catch (Throwable t) {
      recordReaderTime(startTime);
      consumer.setError(t);
      return null;
    }

    if (readState.length == 0) {
      consumer.setDone();
      recordReaderTime(startTime);
      return null; // No data to read.
    }
    counters.setDesc(QueryFragmentCounters.Desc.STRIPES, stripeIxFrom + "," + readState.length);

    // 3. Apply SARG if needed, and otherwise determine what RGs to read.
    int stride = fileMetadata.getRowIndexStride();
    ArrayList<OrcStripeMetadata> stripeMetadatas = null;
    boolean[] globalIncludes = null;
    boolean[] sargColumns = null;
    try {
      globalIncludes = OrcInputFormat.genIncludedColumns(fileMetadata.getTypes(), columnIds, true);
      if (sarg != null && stride != 0) {
        // TODO: move this to a common method
        int[] filterColumns =
            RecordReaderImpl.mapSargColumnsToOrcInternalColIdx(sarg.getLeaves(), columnNames, 0);
        // included will not be null, row options will fill the array with trues if null
        sargColumns = new boolean[globalIncludes.length];
        for (int i : filterColumns) {
          // filter columns may have -1 as index which could be partition column in SARG.
          if (i > 0) {
            sargColumns[i] = true;
          }
        }

        // If SARG is present, get relevant stripe metadata from cache or readers.
        stripeMetadatas = readStripesMetadata(globalIncludes, sargColumns);
      }

      // Now, apply SARG if any; w/o sarg, this will just initialize readState.
      boolean hasData = determineRgsToRead(globalIncludes, stride, stripeMetadatas);
      if (!hasData) {
        consumer.setDone();
        recordReaderTime(startTime);
        return null; // No data to read.
      }
    } catch (Throwable t) {
      cleanupReaders();
      consumer.setError(t);
      recordReaderTime(startTime);
      return null;
    }

    if (processStop()) {
      cleanupReaders();
      recordReaderTime(startTime);
      return null;
    }

    // 4. Get data from high-level cache.
    //    If some cols are fully in cache, this will also give us the modified list of columns to
    //    read for every stripe (null means read all of them - the usual path). In any case,
    //    readState will be modified for column x rgs that were fetched from high-level cache.
    List<Integer>[] stripeColsToRead = null;
    if (cache != null) {
      try {
        stripeColsToRead = produceDataFromCache(stride);
      } catch (Throwable t) {
        // produceDataFromCache handles its own cleanup.
        consumer.setError(t);
        cleanupReaders();
        recordReaderTime(startTime);
        return null;
      }
    }

    // 5. Create encoded data reader.
    // In case if we have high-level cache, we will intercept the data and add it there;
    // otherwise just pass the data directly to the consumer.
    Consumer<OrcEncodedColumnBatch> dataConsumer = (cache == null) ? this.consumer : this;
    try {
      ensureOrcReader();
      // Reader creating updates HDFS counters, don't do it here.
      DataWrapperForOrc dw = new DataWrapperForOrc();
      stripeReader = orcReader.encodedReader(fileId, dw, dw, POOL_FACTORY);
      stripeReader.setDebugTracing(DebugUtils.isTraceOrcEnabled());
    } catch (Throwable t) {
      consumer.setError(t);
      recordReaderTime(startTime);
      cleanupReaders();
      return null;
    }

    // 6. Read data.
    // TODO: I/O threadpool could be here - one thread per stripe; for now, linear.
    OrcBatchKey stripeKey = new OrcBatchKey(fileId, -1, 0);
    for (int stripeIxMod = 0; stripeIxMod < readState.length; ++stripeIxMod) {
      if (processStop()) {
        cleanupReaders();
        recordReaderTime(startTime);
        return null;
      }
      int stripeIx = stripeIxFrom + stripeIxMod;
      boolean[][] colRgs = null;
      boolean[] stripeIncludes = null;
      OrcStripeMetadata stripeMetadata = null;
      StripeInformation stripe;
      try {
        List<Integer> cols = stripeColsToRead == null ? null : stripeColsToRead[stripeIxMod];
        if (cols != null && cols.isEmpty()) continue; // No need to read this stripe.
        stripe = fileMetadata.getStripes().get(stripeIx);

        if (DebugUtils.isTraceOrcEnabled()) {
          LlapIoImpl.LOG.info(
              "Reading stripe " + stripeIx + ": " + stripe.getOffset() + ", " + stripe.getLength());
        }
        colRgs = readState[stripeIxMod];
        // We assume that NO_RGS value is only set from SARG filter and for all columns;
        // intermediate changes for individual columns will unset values in the array.
        // Skip this case for 0-column read. We could probably special-case it just like we do
        // in EncodedReaderImpl, but for now it's not that important.
        if (colRgs.length > 0 && colRgs[0] == SargApplier.READ_NO_RGS) continue;

        // 6.1. Determine the columns to read (usually the same as requested).
        if (cache == null || cols == null || cols.size() == colRgs.length) {
          cols = columnIds;
          stripeIncludes = globalIncludes;
        } else {
          // We are reading subset of the original columns, remove unnecessary bitmasks/etc.
          // This will never happen w/o high-level cache.
          stripeIncludes = OrcInputFormat.genIncludedColumns(fileMetadata.getTypes(), cols, true);
          colRgs = genStripeColRgs(cols, colRgs);
        }

        // 6.2. Ensure we have stripe metadata. We might have read it before for RG filtering.
        boolean isFoundInCache = false;
        if (stripeMetadatas != null) {
          stripeMetadata = stripeMetadatas.get(stripeIxMod);
        } else {
          stripeKey.stripeIx = stripeIx;
          stripeMetadata = metadataCache.getStripeMetadata(stripeKey);
          isFoundInCache = (stripeMetadata != null);
          if (!isFoundInCache) {
            counters.incrCounter(Counter.METADATA_CACHE_MISS);
            ensureMetadataReader();
            long startTimeHdfs = counters.startTimeCounter();
            stripeMetadata =
                new OrcStripeMetadata(
                    stripeKey, metadataReader, stripe, stripeIncludes, sargColumns);
            counters.incrTimeCounter(Counter.HDFS_TIME_US, startTimeHdfs);
            stripeMetadata = metadataCache.putStripeMetadata(stripeMetadata);
            if (DebugUtils.isTraceOrcEnabled()) {
              LlapIoImpl.LOG.info(
                  "Caching stripe "
                      + stripeKey.stripeIx
                      + " metadata with includes: "
                      + DebugUtils.toString(stripeIncludes));
            }
            stripeKey = new OrcBatchKey(fileId, -1, 0);
          }
          consumer.setStripeMetadata(stripeMetadata);
        }
        if (!stripeMetadata.hasAllIndexes(stripeIncludes)) {
          if (DebugUtils.isTraceOrcEnabled()) {
            LlapIoImpl.LOG.info(
                "Updating indexes in stripe "
                    + stripeKey.stripeIx
                    + " metadata for includes: "
                    + DebugUtils.toString(stripeIncludes));
          }
          assert isFoundInCache;
          counters.incrCounter(Counter.METADATA_CACHE_MISS);
          ensureMetadataReader();
          updateLoadedIndexes(stripeMetadata, stripe, stripeIncludes, sargColumns);
        } else if (isFoundInCache) {
          counters.incrCounter(Counter.METADATA_CACHE_HIT);
        }
      } catch (Throwable t) {
        consumer.setError(t);
        cleanupReaders();
        recordReaderTime(startTime);
        return null;
      }
      if (processStop()) {
        cleanupReaders();
        recordReaderTime(startTime);
        return null;
      }

      // 6.3. Finally, hand off to the stripe reader to produce the data.
      //      This is a sync call that will feed data to the consumer.
      try {
        // TODO: readEncodedColumns is not supposed to throw; errors should be propagated thru
        // consumer. It is potentially holding locked buffers, and must perform its own cleanup.
        // Also, currently readEncodedColumns is not stoppable. The consumer will discard the
        // data it receives for one stripe. We could probably interrupt it, if it checked that.
        stripeReader.readEncodedColumns(
            stripeIx,
            stripe,
            stripeMetadata.getRowIndexes(),
            stripeMetadata.getEncodings(),
            stripeMetadata.getStreams(),
            stripeIncludes,
            colRgs,
            dataConsumer);
      } catch (Throwable t) {
        consumer.setError(t);
        cleanupReaders();
        recordReaderTime(startTime);
        return null;
      }
    }

    // Done with all the things.
    recordReaderTime(startTime);
    dataConsumer.setDone();
    if (DebugUtils.isTraceMttEnabled()) {
      LlapIoImpl.LOG.info("done processing " + split);
    }

    // Close the stripe reader, we are done reading.
    cleanupReaders();
    return null;
  }