コード例 #1
0
 /**
  * Find the key range for bucket files.
  *
  * @param reader the reader
  * @param options the options for reading with
  * @throws IOException
  */
 private void discoverKeyBounds(Reader reader, Reader.Options options) throws IOException {
   RecordIdentifier[] keyIndex = OrcRecordUpdater.parseKeyIndex(reader);
   long offset = options.getOffset();
   long maxOffset = options.getMaxOffset();
   int firstStripe = 0;
   int stripeCount = 0;
   boolean isTail = true;
   List<StripeInformation> stripes = reader.getStripes();
   for (StripeInformation stripe : stripes) {
     if (offset > stripe.getOffset()) {
       firstStripe += 1;
     } else if (maxOffset > stripe.getOffset()) {
       stripeCount += 1;
     } else {
       isTail = false;
       break;
     }
   }
   if (firstStripe != 0) {
     minKey = keyIndex[firstStripe - 1];
   }
   if (!isTail) {
     maxKey = keyIndex[firstStripe + stripeCount - 1];
   }
 }
コード例 #2
0
    void next(OrcStruct next) throws IOException {
      if (recordReader.hasNext()) {
        nextRecord = (OrcStruct) recordReader.next(next);
        // set the key
        key.setValues(
            OrcRecordUpdater.getOriginalTransaction(nextRecord),
            OrcRecordUpdater.getBucket(nextRecord),
            OrcRecordUpdater.getRowId(nextRecord),
            OrcRecordUpdater.getCurrentTransaction(nextRecord));

        // if this record is larger than maxKey, we need to stop
        if (maxKey != null && key.compareRow(maxKey) > 0) {
          LOG.debug("key " + key + " > maxkey " + maxKey);
          nextRecord = null;
          recordReader.close();
        }
      } else {
        nextRecord = null;
        recordReader.close();
      }
    }
コード例 #3
0
 /**
  * Read the side file to get the last flush length.
  *
  * @param fs the file system to use
  * @param deltaFile the path of the delta file
  * @return the maximum size of the file to use
  * @throws IOException
  */
 private static long getLastFlushLength(FileSystem fs, Path deltaFile) throws IOException {
   Path lengths = OrcRecordUpdater.getSideFile(deltaFile);
   long result = Long.MAX_VALUE;
   try {
     FSDataInputStream stream = fs.open(lengths);
     result = -1;
     while (stream.available() > 0) {
       result = stream.readLong();
     }
     stream.close();
     return result;
   } catch (IOException ioe) {
     return result;
   }
 }
コード例 #4
0
  @Override
  public ObjectInspector getObjectInspector() {
    // Read the configuration parameters
    String columnNameProperty = conf.get(serdeConstants.LIST_COLUMNS);
    // NOTE: if "columns.types" is missing, all columns will be of String type
    String columnTypeProperty = conf.get(serdeConstants.LIST_COLUMN_TYPES);

    // Parse the configuration parameters
    ArrayList<String> columnNames = new ArrayList<String>();
    Deque<Integer> virtualColumns = new ArrayDeque<Integer>();
    if (columnNameProperty != null && columnNameProperty.length() > 0) {
      String[] colNames = columnNameProperty.split(",");
      for (int i = 0; i < colNames.length; i++) {
        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) {
          virtualColumns.addLast(i);
        } else {
          columnNames.add(colNames[i]);
        }
      }
    }
    if (columnTypeProperty == null) {
      // Default type: all string
      StringBuilder sb = new StringBuilder();
      for (int i = 0; i < columnNames.size(); i++) {
        if (i > 0) {
          sb.append(":");
        }
        sb.append("string");
      }
      columnTypeProperty = sb.toString();
    }

    ArrayList<TypeInfo> fieldTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    while (virtualColumns.size() > 0) {
      fieldTypes.remove(virtualColumns.removeLast());
    }
    StructTypeInfo rowType = new StructTypeInfo();
    rowType.setAllStructFieldNames(columnNames);
    rowType.setAllStructFieldTypeInfos(fieldTypes);
    return OrcRecordUpdater.createEventSchema(OrcStruct.createObjectInspector(rowType));
  }
コード例 #5
0
 @Override
 void next(OrcStruct next) throws IOException {
   if (recordReader.hasNext()) {
     long nextRowId = recordReader.getRowNumber();
     // have to do initialization here, because the super's constructor
     // calls next and thus we need to initialize before our constructor
     // runs
     if (next == null) {
       nextRecord = new OrcStruct(OrcRecordUpdater.FIELDS);
       IntWritable operation = new IntWritable(OrcRecordUpdater.INSERT_OPERATION);
       nextRecord.setFieldValue(OrcRecordUpdater.OPERATION, operation);
       nextRecord.setFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION, new LongWritable(0));
       nextRecord.setFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION, new LongWritable(0));
       nextRecord.setFieldValue(OrcRecordUpdater.BUCKET, new IntWritable(bucket));
       nextRecord.setFieldValue(OrcRecordUpdater.ROW_ID, new LongWritable(nextRowId));
       nextRecord.setFieldValue(OrcRecordUpdater.ROW, recordReader.next(null));
     } else {
       nextRecord = next;
       ((IntWritable) next.getFieldValue(OrcRecordUpdater.OPERATION))
           .set(OrcRecordUpdater.INSERT_OPERATION);
       ((LongWritable) next.getFieldValue(OrcRecordUpdater.ORIGINAL_TRANSACTION)).set(0);
       ((IntWritable) next.getFieldValue(OrcRecordUpdater.BUCKET)).set(bucket);
       ((LongWritable) next.getFieldValue(OrcRecordUpdater.CURRENT_TRANSACTION)).set(0);
       ((LongWritable) next.getFieldValue(OrcRecordUpdater.ROW_ID)).set(0);
       nextRecord.setFieldValue(
           OrcRecordUpdater.ROW, recordReader.next(OrcRecordUpdater.getRow(next)));
     }
     key.setValues(0L, bucket, nextRowId, 0L);
     if (maxKey != null && key.compareRow(maxKey) > 0) {
       if (LOG.isDebugEnabled()) {
         LOG.debug("key " + key + " > maxkey " + maxKey);
       }
       nextRecord = null;
       recordReader.close();
     }
   } else {
     nextRecord = null;
     recordReader.close();
   }
 }
コード例 #6
0
 @Override
 public boolean isDelete(OrcStruct value) {
   return OrcRecordUpdater.getOperation(value) == OrcRecordUpdater.DELETE_OPERATION;
 }