@Override public boolean next(RecordIdentifier recordIdentifier, OrcStruct prev) throws IOException { boolean keysSame = true; while (keysSame && primary != null) { // The primary's nextRecord is the next value to return OrcStruct current = primary.nextRecord; recordIdentifier.set(primary.key); // Advance the primary reader to the next record primary.next(extraValue); // Save the current record as the new extraValue for next time so that // we minimize allocations extraValue = current; // now that the primary reader has advanced, we need to see if we // continue to read it or move to the secondary. if (primary.nextRecord == null || primary.key.compareTo(secondaryKey) > 0) { // if the primary isn't done, push it back into the readers if (primary.nextRecord != null) { readers.put(primary.key, primary); } // update primary and secondaryKey Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry != null) { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } } else { primary = null; } } // if this transaction isn't ok, skip over it if (!validTxnList.isTxnValid(((ReaderKey) recordIdentifier).getCurrentTransactionId())) { continue; } // if we are collapsing, figure out if this is a new row if (collapse) { keysSame = prevKey.compareRow(recordIdentifier) == 0; if (!keysSame) { prevKey.set(recordIdentifier); } } else { keysSame = false; } // set the output record by fiddling with the pointers so that we can // avoid a copy. prev.linkFields(current); } return !keysSame; }
/** * Create a reader that merge sorts the ACID events together. * * @param conf the configuration * @param collapseEvents should the events on the same row be collapsed * @param isOriginal is the base file a pre-acid file * @param bucket the bucket we are reading * @param options the options to read with * @param deltaDirectory the list of delta directories to include * @throws IOException */ OrcRawRecordMerger( Configuration conf, boolean collapseEvents, Reader reader, boolean isOriginal, int bucket, ValidTxnList validTxnList, Reader.Options options, Path[] deltaDirectory) throws IOException { this.conf = conf; this.collapse = collapseEvents; this.offset = options.getOffset(); this.length = options.getLength(); this.validTxnList = validTxnList; // modify the optins to reflect the event instead of the base row Reader.Options eventOptions = createEventOptions(options); if (reader == null) { baseReader = null; } else { // find the min/max based on the offset and length if (isOriginal) { discoverOriginalKeyBounds(reader, bucket, options); } else { discoverKeyBounds(reader, options); } LOG.info("min key = " + minKey + ", max key = " + maxKey); // use the min/max instead of the byte range ReaderPair pair; ReaderKey key = new ReaderKey(); if (isOriginal) { options = options.clone(); options.range(options.getOffset(), Long.MAX_VALUE); pair = new OriginalReaderPair(key, reader, bucket, minKey, maxKey, options); } else { pair = new ReaderPair(key, reader, bucket, minKey, maxKey, eventOptions); } // if there is at least one record, put it in the map if (pair.nextRecord != null) { readers.put(key, pair); } baseReader = pair.recordReader; } // we always want to read all of the deltas eventOptions.range(0, Long.MAX_VALUE); // Turn off the sarg before pushing it to delta. We never want to push a sarg to a delta as // it can produce wrong results (if the latest valid version of the record is filtered out by // the sarg) or ArrayOutOfBounds errors (when the sarg is applied to a delete record) eventOptions.searchArgument(null, null); if (deltaDirectory != null) { for (Path delta : deltaDirectory) { ReaderKey key = new ReaderKey(); Path deltaFile = AcidUtils.createBucketFile(delta, bucket); FileSystem fs = deltaFile.getFileSystem(conf); long length = getLastFlushLength(fs, deltaFile); if (fs.exists(deltaFile) && length != -1) { Reader deltaReader = OrcFile.createReader(deltaFile, OrcFile.readerOptions(conf).maxLength(length)); ReaderPair deltaPair = new ReaderPair(key, deltaReader, bucket, minKey, maxKey, eventOptions); if (deltaPair.nextRecord != null) { readers.put(key, deltaPair); } } } } // get the first record Map.Entry<ReaderKey, ReaderPair> entry = readers.pollFirstEntry(); if (entry == null) { columns = 0; primary = null; } else { primary = entry.getValue(); if (readers.isEmpty()) { secondaryKey = null; } else { secondaryKey = readers.firstKey(); } // get the number of columns in the user's rows columns = primary.getColumns(); } }