Ejemplo n.º 1
0
  @Override
  public boolean next(RecordIdentifier recordIdentifier, OrcStruct prev) throws IOException {
    boolean keysSame = true;
    while (keysSame && primary != null) {

      // The primary's nextRecord is the next value to return
      OrcStruct current = primary.nextRecord;
      recordIdentifier.set(primary.key);

      // Advance the primary reader to the next record
      primary.next(extraValue);

      // Save the current record as the new extraValue for next time so that
      // we minimize allocations
      extraValue = current;

      // now that the primary reader has advanced, we need to see if we
      // continue to read it or move to the secondary.
      if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) {

        // if the primary isn't done, push it back into the readers
        if (primary.nextRecord != null) {
          readers.put(primary.key, primary);
        }

        // update primary and secondaryKey
        Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
        if (entry != null) {
          primary = entry.getValue();
          if (readers.isEmpty()) {
            secondaryKey = null;
          } else {
            secondaryKey = readers.firstKey();
          }
        } else {
          primary = null;
        }
      }

      // if this transaction isn't ok, skip over it
      if (!validTxnList.isTxnValid(((ReaderKey) recordIdentifier).getCurrentTransactionId())) {
        continue;
      }

      // if we are collapsing, figure out if this is a new row
      if (collapse) {
        keysSame = prevKey.compareRow(recordIdentifier) == 0;
        if (!keysSame) {
          prevKey.set(recordIdentifier);
        }
      } else {
        keysSame = false;
      }

      // set the output record by fiddling with the pointers so that we can
      // avoid a copy.
      prev.linkFields(current);
    }
    return !keysSame;
  }
Ejemplo n.º 2
0
  /**
   * Create a reader that merge sorts the ACID events together.
   *
   * @param conf the configuration
   * @param collapseEvents should the events on the same row be collapsed
   * @param isOriginal is the base file a pre-acid file
   * @param bucket the bucket we are reading
   * @param options the options to read with
   * @param deltaDirectory the list of delta directories to include
   * @throws IOException
   */
  OrcRawRecordMerger(
      Configuration conf,
      boolean collapseEvents,
      Reader reader,
      boolean isOriginal,
      int bucket,
      ValidTxnList validTxnList,
      Reader.Options options,
      Path[] deltaDirectory)
      throws IOException {
    this.conf = conf;
    this.collapse = collapseEvents;
    this.offset = options.getOffset();
    this.length = options.getLength();
    this.validTxnList = validTxnList;
    // modify the optins to reflect the event instead of the base row
    Reader.Options eventOptions = createEventOptions(options);
    if (reader == null) {
      baseReader = null;
    } else {

      // find the min/max based on the offset and length
      if (isOriginal) {
        discoverOriginalKeyBounds(reader, bucket, options);
      } else {
        discoverKeyBounds(reader, options);
      }
      LOG.info("min key = " + minKey + ", max key = " + maxKey);
      // use the min/max instead of the byte range
      ReaderPair pair;
      ReaderKey key = new ReaderKey();
      if (isOriginal) {
        options = options.clone();
        options.range(options.getOffset(), Long.MAX_VALUE);
        pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options);
      } else {
        pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions);
      }

      // if there is at least one record, put it in the map
      if (pair.nextRecord != null) {
        readers.put(key, pair);
      }
      baseReader = pair.recordReader;
    }

    // we always want to read all of the deltas
    eventOptions.range(0, Long.MAX_VALUE);
    // Turn off the sarg before pushing it to delta.  We never want to push a sarg to a delta as
    // it can produce wrong results (if the latest valid version of the record is filtered out by
    // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record)
    eventOptions.searchArgument(null, null);
    if (deltaDirectory != null) {
      for (Path delta : deltaDirectory) {
        ReaderKey key = new ReaderKey();
        Path deltaFile = AcidUtils.createBucketFile(delta, bucket);
        FileSystem fs = deltaFile.getFileSystem(conf);
        long length = getLastFlushLength(fs, deltaFile);
        if (fs.exists(deltaFile) && length != -1) {
          Reader deltaReader =
              OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length));
          ReaderPair deltaPair =
              new ReaderPair(key, deltaReader, bucket, minKey, maxKey, eventOptions);
          if (deltaPair.nextRecord != null) {
            readers.put(key, deltaPair);
          }
        }
      }
    }

    // get the first record
    Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry();
    if (entry == null) {
      columns = 0;
      primary = null;
    } else {
      primary = entry.getValue();
      if (readers.isEmpty()) {
        secondaryKey = null;
      } else {
        secondaryKey = readers.firstKey();
      }
      // get the number of columns in the user's rows
      columns = primary.getColumns();
    }
  }