/** Ensures orcReader is initialized for the split. */ private void ensureOrcReader() throws IOException { if (orcReader != null) return; Path path = HdfsUtils.getFileIdPath(fs, split.getPath(), fileId); if (DebugUtils.isTraceOrcEnabled()) { LOG.info("Creating reader for " + path + " (" + split.getPath() + ")"); } long startTime = counters.startTimeCounter(); ReaderOptions opts = OrcFile.readerOptions(conf).filesystem(fs).fileMetadata(fileMetadata); orcReader = EncodedOrcFile.createReader(path, opts); counters.incrTimeCounter(Counter.HDFS_TIME_US, startTime); }
private void processKeyValuePairs(Object key, Object value) throws HiveException { String filePath = ""; try { OrcFileValueWrapper v; OrcFileKeyWrapper k; if (key instanceof CombineHiveKey) { k = (OrcFileKeyWrapper) ((CombineHiveKey) key).getKey(); } else { k = (OrcFileKeyWrapper) key; } // skip incompatible file, files that are missing stripe statistics are set to incompatible if (k.isIncompatFile()) { LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; } filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); v = (OrcFileValueWrapper) value; if (prevPath == null) { prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (isLogInfoEnabled) { LOG.info("ORC merge file input path: " + k.getInputPath()); } } // store the orc configuration from the first file. All other files should // match this configuration before merging else will not be merged if (outWriter == null) { compression = k.getCompression(); compressBuffSize = k.getCompressBufferSize(); version = k.getVersion(); columnCount = k.getTypes().get(0).getSubtypesCount(); rowIndexStride = k.getRowIndexStride(); OrcFile.WriterOptions options = OrcFile.writerOptions(jc) .compress(compression) .version(version) .rowIndexStride(rowIndexStride) .inspector(reader.getObjectInspector()); // compression buffer size should only be set if compression is enabled if (compression != CompressionKind.NONE) { // enforce is required to retain the buffer sizes of old files instead of orc writer // inferring the optimal buffer size options.bufferSize(compressBuffSize).enforceBufferSize(); } outWriter = OrcFile.createWriter(outPath, options); if (isLogDebugEnabled) { LOG.info("ORC merge file output path: " + outPath); } } if (!checkCompatibility(k)) { incompatFileSet.add(k.getInputPath()); return; } // next file in the path if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); } // initialize buffer to read the entire stripe byte[] buffer = new byte[(int) v.getStripeInformation().getLength()]; fdis = fs.open(k.getInputPath()); fdis.readFully( v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength()); // append the stripe buffer to the new ORC file outWriter.appendStripe( buffer, 0, buffer.length, v.getStripeInformation(), v.getStripeStatistics()); if (isLogInfoEnabled) { LOG.info( "Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: " + v.getStripeStatistics().getColStats(0).getNumberOfValues() + " ]"); } // add user metadata to footer in case of any if (v.isLastStripeInFile()) { outWriter.appendUserMetadata(v.getUserMetadata()); } } catch (Throwable e) { this.exception = true; LOG.error("Closing operator..Exception: " + ExceptionUtils.getStackTrace(e)); throw new HiveException(e); } finally { if (exception) { closeOp(true); } if (fdis != null) { try { fdis.close(); } catch (IOException e) { throw new HiveException(String.format("Unable to close file %s", filePath), e); } finally { fdis = null; } } } }