/** * Find the key range for bucket files. * * @param reader the reader * @param options the options for reading with * @throws IOException */ private void discoverKeyBounds(Reader reader, Reader.Options options) throws IOException { RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader); long offset = options.getOffset(); long maxOffset = options.getMaxOffset(); int firstStripe = 0; int stripeCount = 0; boolean isTail = true; List<StripeInformation> stripes = reader.getStripes(); for (StripeInformation stripe : stripes) { if (offset > stripe.getOffset()) { firstStripe += 1; } else if (maxOffset > stripe.getOffset()) { stripeCount += 1; } else { isTail = false; break; } } if (firstStripe != 0) { minKey = keyIndex[firstStripe - 1]; } if (!isTail) { maxKey = keyIndex[firstStripe + stripeCount - 1]; } }
void next(OrcStruct next) throws IOException { if (recordReader.hasNext()) { nextRecord = (OrcStruct) recordReader.next(next); // set the key key.setValues( OrcRecordUpdater.getOriginalTransaction(nextRecord), OrcRecordUpdater.getBucket(nextRecord), OrcRecordUpdater.getRowId(nextRecord), OrcRecordUpdater.getCurrentTransaction(nextRecord)); // if this record is larger than maxKey, we need to stop if (maxKey != null && key.compareRow(maxKey) > 0) { LOG.debug("key " + key + " > maxkey " + maxKey); nextRecord = null; recordReader.close(); } } else { nextRecord = null; recordReader.close(); } }
/** * Read the side file to get the last flush length. * * @param fs the file system to use * @param deltaFile the path of the delta file * @return the maximum size of the file to use * @throws IOException */ private static long getLastFlushLength(FileSystem fs, Path deltaFile) throws IOException { Path lengths = OrcRecordUpdater.getSideFile(deltaFile); long result = Long.MAX_VALUE; try { FSDataInputStream stream = fs.open(lengths); result = -1; while (stream.available() > 0) { result = stream.readLong(); } stream.close(); return result; } catch (IOException ioe) { return result; } }
@Override public ObjectInspector getObjectInspector() { // Read the configuration parameters String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS); // NOTE: if "columns.types" is missing, all columns will be of String type String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES); // Parse the configuration parameters ArrayList<String> columnNames = new ArrayList<String>(); Deque<Integer> virtualColumns = new ArrayDeque<Integer>(); if (columnNameProperty != null && columnNameProperty.length() > 0) { String[] colNames = columnNameProperty.split(","); for (int i = 0; i < colNames.length; i++) { if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) { virtualColumns.addLast(i); } else { columnNames.add(colNames[i]); } } } if (columnTypeProperty == null) { // Default type: all string StringBuilder sb = new StringBuilder(); for (int i = 0; i < columnNames.size(); i++) { if (i > 0) { sb.append(":"); } sb.append("string"); } columnTypeProperty = sb.toString(); } ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); while (virtualColumns.size() > 0) { fieldTypes.remove(virtualColumns.removeLast()); } StructTypeInfo rowType = new StructTypeInfo(); rowType.setAllStructFieldNames(columnNames); rowType.setAllStructFieldTypeInfos(fieldTypes); return OrcRecordUpdater.createEventSchema(OrcStruct.createObjectInspector(rowType)); }
@Override void next(OrcStruct next) throws IOException { if (recordReader.hasNext()) { long nextRowId = recordReader.getRowNumber(); // have to do initialization here, because the super's constructor // calls next and thus we need to initialize before our constructor // runs if (next == null) { nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS); IntWritable operation = new IntWritable(OrcRecordUpdater.INSERT_OPERATION); nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation); nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION, new LongWritable(0)); nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION, new LongWritable(0)); nextRecord.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket)); nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(nextRowId)); nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(null)); } else { nextRecord = next; ((IntWritable) next.getFieldValue(OrcRecordUpdater.OPERATION)) .set(OrcRecordUpdater.INSERT_OPERATION); ((LongWritable) next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)).set(0); ((IntWritable) next.getFieldValue(OrcRecordUpdater.BUCKET)).set(bucket); ((LongWritable) next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION)).set(0); ((LongWritable) next.getFieldValue(OrcRecordUpdater.ROW_ID)).set(0); nextRecord.setFieldValue( OrcRecordUpdater.ROW, recordReader.next(OrcRecordUpdater.getRow(next))); } key.setValues(0L, bucket, nextRowId, 0L); if (maxKey != null && key.compareRow(maxKey) > 0) { if (LOG.isDebugEnabled()) { LOG.debug("key " + key + " > maxkey " + maxKey); } nextRecord = null; recordReader.close(); } } else { nextRecord = null; recordReader.close(); } }
@Override public boolean isDelete(OrcStruct value) { return OrcRecordUpdater.getOperation(value) == OrcRecordUpdater.DELETE_OPERATION; }