private void inferOutputFieldsFromLeftSide() {
   outputFields = Lists.newArrayList();
   Iterator<MaterializedField> iter = leftSide.getRecordBatch().getSchema().iterator();
   while (iter.hasNext()) {
     MaterializedField field = iter.next();
     outputFields.add(MaterializedField.create(field.getPath(), field.getType()));
   }
 }
  @Test
  public void testFixedType() {
    // Build a required uint field definition
    MajorType.Builder typeBuilder = MajorType.newBuilder();
    FieldDef.Builder defBuilder = FieldDef.newBuilder();
    typeBuilder.setMinorType(MinorType.UINT4).setMode(DataMode.REQUIRED).setWidth(4);
    defBuilder.setMajorType(typeBuilder.build());
    MaterializedField field = MaterializedField.create(defBuilder.build());

    // Create a new value vector for 1024 integers
    UInt4Vector v = new UInt4Vector(field, allocator);
    UInt4Vector.Mutator m = v.getMutator();
    v.allocateNew(1024);

    // Put and set a few values
    m.set(0, 100);
    m.set(1, 101);
    m.set(100, 102);
    m.set(1022, 103);
    m.set(1023, 104);
    assertEquals(100, v.getAccessor().get(0));
    assertEquals(101, v.getAccessor().get(1));
    assertEquals(102, v.getAccessor().get(100));
    assertEquals(103, v.getAccessor().get(1022));
    assertEquals(104, v.getAccessor().get(1023));

    // Ensure unallocated space returns 0
    assertEquals(0, v.getAccessor().get(3));
  }
  @Test
  public void testReAllocNullableVariableWidthVector() {
    final MaterializedField field =
        MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE);

    // Create a new value vector for 1024 integers
    try (final NullableVarCharVector vector =
        (NullableVarCharVector) TypeHelper.getNewVector(field, allocator)) {
      final NullableVarCharVector.Mutator m = vector.getMutator();
      vector.allocateNew();

      int initialCapacity = vector.getValueCapacity();

      // Put values in indexes that fall within the initial allocation
      m.setSafe(0, STR1, 0, STR1.length);
      m.setSafe(initialCapacity - 1, STR2, 0, STR2.length);

      // Now try to put values in space that falls beyond the initial allocation
      m.setSafe(initialCapacity + 200, STR3, 0, STR3.length);

      // Check valueCapacity is more than initial allocation
      assertEquals((initialCapacity + 1) * 2 - 1, vector.getValueCapacity());

      final NullableVarCharVector.Accessor accessor = vector.getAccessor();
      assertArrayEquals(STR1, accessor.get(0));
      assertArrayEquals(STR2, accessor.get(initialCapacity - 1));
      assertArrayEquals(STR3, accessor.get(initialCapacity + 200));

      // Set the valueCount to be more than valueCapacity of current allocation. This is possible
      // for NullableValueVectors
      // as we don't call setSafe for null values, but we do call setValueCount when the current
      // batch is processed.
      m.setValueCount(vector.getValueCapacity() + 200);
    }
  }
  @Test
  public void testNullableVarLen2() {
    final MaterializedField field =
        MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE);

    // Create a new value vector for 1024 integers.
    try (final NullableVarCharVector vector = new NullableVarCharVector(field, allocator)) {
      final NullableVarCharVector.Mutator m = vector.getMutator();
      vector.allocateNew(1024 * 10, 1024);

      m.set(0, STR1);
      m.set(1, STR2);
      m.set(2, STR3);

      // Check the sample strings.
      final NullableVarCharVector.Accessor accessor = vector.getAccessor();
      assertArrayEquals(STR1, accessor.get(0));
      assertArrayEquals(STR2, accessor.get(1));
      assertArrayEquals(STR3, accessor.get(2));

      // Ensure null value throws.
      boolean b = false;
      try {
        vector.getAccessor().get(3);
      } catch (IllegalStateException e) {
        b = true;
      } finally {
        assertTrue(b);
      }
    }
  }
  @Test(expected = OversizedAllocationException.class)
  public void testVariableVectorReallocation() {
    final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
    final VarCharVector vector = new VarCharVector(field, allocator);
    // edge case 1: value count = MAX_VALUE_ALLOCATION
    final int expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE;
    final int expectedOffsetSize = 10;
    try {
      vector.allocateNew(expectedAllocationInBytes, 10);
      assertEquals(expectedOffsetSize, vector.getValueCapacity());
      assertEquals(expectedAllocationInBytes, vector.getBuffer().capacity());
      vector.reAlloc();
      assertEquals(expectedOffsetSize * 2, vector.getValueCapacity());
      assertEquals(expectedAllocationInBytes * 2, vector.getBuffer().capacity());
    } finally {
      vector.close();
    }

    // common: value count < MAX_VALUE_ALLOCATION
    try {
      vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0);
      vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
      vector.reAlloc(); // this tests if it overflows
    } finally {
      vector.close();
    }
  }
  @Test(expected = OversizedAllocationException.class)
  public void testBitVectorReallocation() {
    final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
    final BitVector vector = new BitVector(field, allocator);
    // edge case 1: buffer size ~ max value capacity
    final int expectedValueCapacity = 1 << 29;
    try {
      vector.allocateNew(expectedValueCapacity);
      assertEquals(expectedValueCapacity, vector.getValueCapacity());
      vector.reAlloc();
      assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
    } finally {
      vector.close();
    }

    // common: value count < MAX_VALUE_ALLOCATION
    try {
      vector.allocateNew(expectedValueCapacity);
      for (int i = 0; i < 3; i++) {
        vector.reAlloc(); // expand buffer size
      }
      assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
      vector.reAlloc(); // buffer size ~ max allocation
      assertEquals(Integer.MAX_VALUE, vector.getValueCapacity());
      vector.reAlloc(); // overflow
    } finally {
      vector.close();
    }
  }
  public OrderedPartitionRecordBatch(
      OrderedPartitionSender pop, RecordBatch incoming, FragmentContext context)
      throws OutOfMemoryException {
    super(pop, context);
    this.incoming = incoming;
    this.partitions = pop.getDestinations().size();
    this.sendingMajorFragmentWidth = pop.getSendingWidth();
    this.recordsToSample = pop.getRecordsToSample();
    this.samplingFactor = pop.getSamplingFactor();
    this.completionFactor = pop.getCompletionFactor();

    DistributedCache cache = context.getDrillbitContext().getCache();
    this.mmap = cache.getMultiMap(MULTI_CACHE_CONFIG);
    this.tableMap = cache.getMap(SINGLE_CACHE_CONFIG);
    Preconditions.checkNotNull(tableMap);

    this.mapKey =
        String.format(
            "%s_%d", context.getHandle().getQueryId(), context.getHandle().getMajorFragmentId());
    this.minorFragmentSampleCount = cache.getCounter(mapKey);

    SchemaPath outputPath = popConfig.getRef();
    MaterializedField outputField =
        MaterializedField.create(outputPath, Types.required(TypeProtos.MinorType.INT));
    this.partitionKeyVector =
        (IntVector) TypeHelper.getNewVector(outputField, oContext.getAllocator());
  }
 @Override
 public <T extends ValueVector> T addField(MaterializedField field, Class<T> clazz)
     throws SchemaChangeException {
   ValueVector v = fieldVectorMap.get(field.key());
   if (v == null || v.getClass() != clazz) {
     // Field does not exist add it to the map
     v = TypeHelper.getNewVector(field, oContext.getAllocator());
     if (!clazz.isAssignableFrom(v.getClass())) {
       throw new SchemaChangeException(
           String.format(
               "Class %s was provided, expected %s.",
               clazz.getSimpleName(), v.getClass().getSimpleName()));
     }
     fieldVectorMap.put(field.key(), v);
   }
   return clazz.cast(v);
 }
Example #9
0
  /**
   * Creates a copy a record batch, converting any fields as necessary to coerce it into the
   * provided schema
   *
   * @param in
   * @param toSchema
   * @param context
   * @return
   */
  public static VectorContainer coerceContainer(
      VectorAccessible in, BatchSchema toSchema, OperatorContext context) {
    int recordCount = in.getRecordCount();
    Map<SchemaPath, ValueVector> vectorMap = Maps.newHashMap();
    for (VectorWrapper w : in) {
      ValueVector v = w.getValueVector();
      vectorMap.put(v.getField().getPath(), v);
    }

    VectorContainer c = new VectorContainer(context);

    for (MaterializedField field : toSchema) {
      ValueVector v = vectorMap.remove(field.getPath());
      if (v != null) {
        int valueCount = v.getAccessor().getValueCount();
        TransferPair tp = v.getTransferPair();
        tp.transfer();
        if (v.getField().getType().getMinorType().equals(field.getType().getMinorType())) {
          if (field.getType().getMinorType() == MinorType.UNION) {
            UnionVector u = (UnionVector) tp.getTo();
            for (MinorType t : field.getType().getSubTypeList()) {
              if (u.getField().getType().getSubTypeList().contains(t)) {
                continue;
              }
              u.addSubType(t);
            }
          }
          c.add(tp.getTo());
        } else {
          ValueVector newVector = TypeHelper.getNewVector(field, context.getAllocator());
          Preconditions.checkState(
              field.getType().getMinorType() == MinorType.UNION,
              "Can only convert vector to Union vector");
          UnionVector u = (UnionVector) newVector;
          u.addVector(tp.getTo());
          MinorType type = v.getField().getType().getMinorType();
          for (int i = 0; i < valueCount; i++) {
            u.getMutator().setType(i, type);
          }
          for (MinorType t : field.getType().getSubTypeList()) {
            if (u.getField().getType().getSubTypeList().contains(t)) {
              continue;
            }
            u.addSubType(t);
          }
          u.getMutator().setValueCount(valueCount);
          c.add(u);
        }
      } else {
        v = TypeHelper.getNewVector(field, context.getAllocator());
        v.allocateNew();
        v.getMutator().setValueCount(recordCount);
        c.add(v);
      }
    }
    c.buildSchema(in.getSchema().getSelectionVectorMode());
    c.setRecordCount(recordCount);
    Preconditions.checkState(vectorMap.size() == 0, "Leftover vector from incoming batch");
    return c;
  }
  /**
   * Convenience method that allows running tests on various {@link ValueVector vector} instances.
   *
   * @param test test function to execute
   */
  private void testVectors(VectorVerifier test) throws Exception {
    final MaterializedField[] fields = {
      MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE),
      MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE),
      MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE),
      MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE),
      MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE),
      MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE),
      MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE)
    };

    final ValueVector[] vectors = {
      new UInt4Vector(fields[0], allocator),
      new BitVector(fields[1], allocator),
      new VarCharVector(fields[2], allocator),
      new NullableVarCharVector(fields[3], allocator),
      new RepeatedListVector(fields[4], allocator, null),
      new MapVector(fields[5], allocator, null),
      new RepeatedMapVector(fields[6], allocator, null)
    };

    try {
      for (final ValueVector vector : vectors) {
        test.verify(vector);
      }
    } finally {
      AutoCloseables.close(vectors);
    }
  }
Example #11
0
 private VectorContainer constructHyperBatch(List<BatchGroup> batchGroupList) {
   VectorContainer cont = new VectorContainer();
   for (MaterializedField field : schema) {
     ValueVector[] vectors = new ValueVector[batchGroupList.size()];
     int i = 0;
     for (BatchGroup group : batchGroupList) {
       vectors[i++] =
           group
               .getValueAccessorById(
                   field.getValueClass(),
                   group.getValueVectorId(SchemaPath.getSimplePath(field.getPath())).getFieldIds())
               .getValueVector();
     }
     cont.add(vectors);
   }
   cont.buildSchema(BatchSchema.SelectionVectorMode.FOUR_BYTE);
   return cont;
 }
  @Test
  public void testNullableVarCharVectorLoad() {
    final MaterializedField field =
        MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE);

    // Create a new value vector for 1024 nullable variable length strings.
    final NullableVarCharVector vector1 = new NullableVarCharVector(field, allocator);
    final NullableVarCharVector.Mutator mutator = vector1.getMutator();
    vector1.allocateNew(1024 * 10, 1024);

    // Populate the vector.
    final StringBuilder stringBuilder = new StringBuilder();
    final int valueCount = 10;
    for (int i = 0; i < valueCount; ++i) {
      stringBuilder.append('x');
      mutator.set(i, stringBuilder.toString().getBytes(utf8Charset));
    }

    // Check the contents.
    final NullableVarCharVector.Accessor accessor1 = vector1.getAccessor();
    stringBuilder.setLength(0);
    for (int i = 0; i < valueCount; ++i) {
      stringBuilder.append('x');
      final Object object = accessor1.getObject(i);
      assertEquals(stringBuilder.toString(), object.toString());
    }

    mutator.setValueCount(valueCount);
    assertEquals(valueCount, vector1.getAccessor().getValueCount());

    // Still ok after setting value count?
    stringBuilder.setLength(0);
    for (int i = 0; i < valueCount; ++i) {
      stringBuilder.append('x');
      final Object object = accessor1.getObject(i);
      assertEquals(stringBuilder.toString(), object.toString());
    }

    // Combine into a single buffer so we can load it into a new vector.
    final DrillBuf[] buffers1 = vector1.getBuffers(false);
    final DrillBuf buffer1 = combineBuffers(allocator, buffers1);
    final NullableVarCharVector vector2 = new NullableVarCharVector(field, allocator);
    vector2.load(vector1.getMetadata(), buffer1);

    // Check the vector's contents.
    final NullableVarCharVector.Accessor accessor2 = vector2.getAccessor();
    stringBuilder.setLength(0);
    for (int i = 0; i < valueCount; ++i) {
      stringBuilder.append('x');
      final Object object = accessor2.getObject(i);
      assertEquals(stringBuilder.toString(), object.toString());
    }

    vector1.close();
    vector2.close();
    buffer1.release();
  }
  @Test
  public void testRepeatedIntVector() {
    final MaterializedField field =
        MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedIntHolder.TYPE);

    // Create a new value vector.
    final RepeatedIntVector vector1 = new RepeatedIntVector(field, allocator);

    // Populate the vector.
    final int[] values = {2, 3, 5, 7, 11, 13, 17, 19, 23, 27}; // some tricksy primes
    final int nRecords = 7;
    final int nElements = values.length;
    vector1.allocateNew(nRecords, nRecords * nElements);
    final RepeatedIntVector.Mutator mutator = vector1.getMutator();
    for (int recordIndex = 0; recordIndex < nRecords; ++recordIndex) {
      mutator.startNewValue(recordIndex);
      for (int elementIndex = 0; elementIndex < nElements; ++elementIndex) {
        mutator.add(recordIndex, recordIndex * values[elementIndex]);
      }
    }
    mutator.setValueCount(nRecords);

    // Verify the contents.
    final RepeatedIntVector.Accessor accessor1 = vector1.getAccessor();
    assertEquals(nRecords, accessor1.getValueCount());
    for (int recordIndex = 0; recordIndex < nRecords; ++recordIndex) {
      for (int elementIndex = 0; elementIndex < nElements; ++elementIndex) {
        final int value = accessor1.get(recordIndex, elementIndex);
        assertEquals(recordIndex * values[elementIndex], value);
      }
    }

    /* TODO(cwestin)
    the interface to load has changed
        // Serialize, reify, and verify.
        final DrillBuf[] buffers1 = vector1.getBuffers(false);
        final DrillBuf buffer1 = combineBuffers(allocator, buffers1);
        final RepeatedIntVector vector2 = new RepeatedIntVector(field, allocator);
        vector2.load(nRecords, nRecords * nElements, buffer1);

        final RepeatedIntVector.Accessor accessor2 = vector2.getAccessor();
        for(int recordIndex = 0; recordIndex < nRecords; ++recordIndex) {
          for(int elementIndex = 0; elementIndex < nElements; ++elementIndex) {
            final int value = accessor2.get(recordIndex, elementIndex);
            assertEquals(accessor1.get(recordIndex,  elementIndex), value);
          }
        }
    */

    vector1.close();
    /* TODO(cwestin)
        vector2.close();
        buffer1.release();
    */
  }
  @Test
  public void testNullableFloat() {
    // Build an optional float field definition
    MajorType.Builder typeBuilder = MajorType.newBuilder();
    FieldDef.Builder defBuilder = FieldDef.newBuilder();
    typeBuilder.setMinorType(MinorType.FLOAT4).setMode(DataMode.OPTIONAL).setWidth(4);
    defBuilder.setMajorType(typeBuilder.build());
    MaterializedField field = MaterializedField.create(defBuilder.build());

    // Create a new value vector for 1024 integers
    NullableFloat4Vector v = (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator);
    NullableFloat4Vector.Mutator m = v.getMutator();
    v.allocateNew(1024);

    // Put and set a few values
    m.set(0, 100.1f);
    m.set(1, 101.2f);
    m.set(100, 102.3f);
    m.set(1022, 103.4f);
    m.set(1023, 104.5f);
    assertEquals(100.1f, v.getAccessor().get(0), 0);
    assertEquals(101.2f, v.getAccessor().get(1), 0);
    assertEquals(102.3f, v.getAccessor().get(100), 0);
    assertEquals(103.4f, v.getAccessor().get(1022), 0);
    assertEquals(104.5f, v.getAccessor().get(1023), 0);

    // Ensure null values throw
    {
      boolean b = false;
      try {
        v.getAccessor().get(3);
      } catch (AssertionError e) {
        b = true;
      } finally {
        if (!b) {
          assert false;
        }
      }
    }

    v.allocateNew(2048);
    {
      boolean b = false;
      try {
        v.getAccessor().get(0);
      } catch (AssertionError e) {
        b = true;
      } finally {
        if (!b) {
          assert false;
        }
      }
    }
  }
  /**
   * We initialize and add the repeated varchar vector to the record batch in this constructor.
   * Perform some sanity checks if the selected columns are valid or not.
   *
   * @param outputMutator Used to create/modify schema in the record batch
   * @param columns List of columns selected in the query
   * @param isStarQuery boolean to indicate if all fields are selected or not
   * @throws SchemaChangeException
   */
  public RepeatedVarCharOutput(
      OutputMutator outputMutator, Collection<SchemaPath> columns, boolean isStarQuery)
      throws SchemaChangeException {
    super();

    MaterializedField field =
        MaterializedField.create(REF, Types.repeated(TypeProtos.MinorType.VARCHAR));
    this.vector = outputMutator.addField(field, RepeatedVarCharVector.class);

    this.mutator = vector.getMutator();

    { // setup fields
      List<Integer> columnIds = new ArrayList<Integer>();
      if (!isStarQuery) {
        String pathStr;
        for (SchemaPath path : columns) {
          assert path.getRootSegment().isNamed() : "root segment should be named";
          pathStr = path.getRootSegment().getPath();
          Preconditions.checkArgument(
              pathStr.equals(COL_NAME)
                  || (pathStr.equals("*") && path.getRootSegment().getChild() == null),
              String.format(
                  "Selected column '%s' must have name 'columns' or must be plain '*'", pathStr));

          if (path.getRootSegment().getChild() != null) {
            Preconditions.checkArgument(
                path.getRootSegment().getChild().isArray(),
                String.format("Selected column '%s' must be an array index", pathStr));
            int index = path.getRootSegment().getChild().getArraySegment().getIndex();
            columnIds.add(index);
          }
        }
        Collections.sort(columnIds);
      }

      boolean[] fields = new boolean[MAXIMUM_NUMBER_COLUMNS];

      int maxField = fields.length;

      if (isStarQuery) {
        Arrays.fill(fields, true);
      } else {
        for (Integer i : columnIds) {
          maxField = 0;
          maxField = Math.max(maxField, i);
          fields[i] = true;
        }
      }
      this.collectedFields = fields;
      this.maxField = maxField;
    }
  }
  public static FragmentWritableBatch getEmptyLastWithSchema(
      QueryId queryId,
      int sendMajorFragmentId,
      int sendMinorFragmentId,
      int receiveMajorFragmentId,
      int receiveMinorFragmentId,
      BatchSchema schema) {

    List<SerializedField> fields = Lists.newArrayList();
    for (MaterializedField field : schema) {
      fields.add(field.getAsBuilder().build());
    }
    RecordBatchDef def = RecordBatchDef.newBuilder().addAllField(fields).build();
    return new FragmentWritableBatch(
        true,
        queryId,
        sendMajorFragmentId,
        sendMinorFragmentId,
        receiveMajorFragmentId,
        receiveMinorFragmentId,
        def);
  }
  @Test
  public void testVVInitialCapacity() throws Exception {
    final MaterializedField[] fields = new MaterializedField[9];
    final ValueVector[] valueVectors = new ValueVector[9];

    fields[0] = MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE);
    fields[1] = MaterializedField.create(EMPTY_SCHEMA_PATH, IntHolder.TYPE);
    fields[2] = MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE);
    fields[3] = MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVar16CharHolder.TYPE);
    fields[4] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedFloat4Holder.TYPE);
    fields[5] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedVarBinaryHolder.TYPE);

    fields[6] = MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE);
    fields[6].addChild(fields[0] /*bit*/);
    fields[6].addChild(fields[2] /*varchar*/);

    fields[7] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE);
    fields[7].addChild(fields[1] /*int*/);
    fields[7].addChild(fields[3] /*optional var16char*/);

    fields[8] = MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE);
    fields[8].addChild(fields[1] /*int*/);

    final int initialCapacity = 1024;

    try {
      for (int i = 0; i < valueVectors.length; i++) {
        valueVectors[i] = TypeHelper.getNewVector(fields[i], allocator);
        valueVectors[i].setInitialCapacity(initialCapacity);
        valueVectors[i].allocateNew();
      }

      for (int i = 0; i < valueVectors.length; i++) {
        final ValueVector vv = valueVectors[i];
        final int vvCapacity = vv.getValueCapacity();
        assertEquals(
            String.format("Incorrect value capacity for %s [%d]", vv.getField(), vvCapacity),
            initialCapacity,
            vvCapacity);
      }
    } finally {
      AutoCloseables.close(valueVectors);
    }
  }
  @Test
  public void testNullableFloat() {
    final MaterializedField field =
        MaterializedField.create(EMPTY_SCHEMA_PATH, NullableFloat4Holder.TYPE);

    // Create a new value vector for 1024 integers
    try (final NullableFloat4Vector vector =
        (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator)) {
      final NullableFloat4Vector.Mutator m = vector.getMutator();
      vector.allocateNew(1024);

      // Put and set a few values.
      m.set(0, 100.1f);
      m.set(1, 101.2f);
      m.set(100, 102.3f);
      m.set(1022, 103.4f);
      m.set(1023, 104.5f);

      final NullableFloat4Vector.Accessor accessor = vector.getAccessor();
      assertEquals(100.1f, accessor.get(0), 0);
      assertEquals(101.2f, accessor.get(1), 0);
      assertEquals(102.3f, accessor.get(100), 0);
      assertEquals(103.4f, accessor.get(1022), 0);
      assertEquals(104.5f, accessor.get(1023), 0);

      // Ensure null values throw.
      {
        boolean b = false;
        try {
          vector.getAccessor().get(3);
        } catch (IllegalStateException e) {
          b = true;
        } finally {
          assertTrue(b);
        }
      }

      vector.allocateNew(2048);
      {
        boolean b = false;
        try {
          accessor.get(0);
        } catch (IllegalStateException e) {
          b = true;
        } finally {
          assertTrue(b);
        }
      }
    }
  }
  /**
   * Creates a copier that does a project for every Nth record from a VectorContainer incoming into
   * VectorContainer outgoing. Each Ordering in orderings generates a column, and evaluation of the
   * expression associated with each Ordering determines the value of each column. These records
   * will later be sorted based on the values in each column, in the same order as the orderings.
   *
   * @param sv4
   * @param incoming
   * @param outgoing
   * @param orderings
   * @return
   * @throws SchemaChangeException
   */
  private SampleCopier getCopier(
      SelectionVector4 sv4,
      VectorContainer incoming,
      VectorContainer outgoing,
      List<Ordering> orderings,
      List<ValueVector> localAllocationVectors)
      throws SchemaChangeException {
    final ErrorCollector collector = new ErrorCollectorImpl();
    final ClassGenerator<SampleCopier> cg =
        CodeGenerator.getRoot(SampleCopier.TEMPLATE_DEFINITION, context.getFunctionRegistry());

    int i = 0;
    for (Ordering od : orderings) {
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              od.getExpr(), incoming, collector, context.getFunctionRegistry());
      SchemaPath schemaPath = SchemaPath.getSimplePath("f" + i++);
      TypeProtos.MajorType.Builder builder =
          TypeProtos.MajorType.newBuilder()
              .mergeFrom(expr.getMajorType())
              .clearMode()
              .setMode(TypeProtos.DataMode.REQUIRED);
      TypeProtos.MajorType newType = builder.build();
      MaterializedField outputField = MaterializedField.create(schemaPath, newType);
      if (collector.hasErrors()) {
        throw new SchemaChangeException(
            String.format(
                "Failure while trying to materialize incoming schema.  Errors:\n %s.",
                collector.toErrorString()));
      }

      ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
      localAllocationVectors.add(vector);
      TypedFieldId fid = outgoing.add(vector);
      ValueVectorWriteExpression write = new ValueVectorWriteExpression(fid, expr, true);
      HoldingContainer hc = cg.addExpr(write);
      cg.getEvalBlock()._if(hc.getValue().eq(JExpr.lit(0)))._then()._return(JExpr.FALSE);
    }
    cg.rotateBlock();
    cg.getEvalBlock()._return(JExpr.TRUE);
    outgoing.buildSchema(BatchSchema.SelectionVectorMode.NONE);
    try {
      SampleCopier sampleCopier = context.getImplementationClass(cg);
      sampleCopier.setupCopier(context, sv4, incoming, outgoing);
      return sampleCopier;
    } catch (ClassTransformationException | IOException e) {
      throw new SchemaChangeException(e);
    }
  }
Example #20
0
  /**
   * Returns the merger of schemas. The merged schema will include the union all columns. If there
   * is a type conflict between columns with the same schemapath but different types, the merged
   * schema will contain a Union type.
   *
   * @param schemas
   * @return
   */
  public static BatchSchema mergeSchemas(BatchSchema... schemas) {
    Map<SchemaPath, Set<MinorType>> typeSetMap = Maps.newLinkedHashMap();

    for (BatchSchema s : schemas) {
      for (MaterializedField field : s) {
        SchemaPath path = field.getPath();
        Set<MinorType> currentTypes = typeSetMap.get(path);
        if (currentTypes == null) {
          currentTypes = Sets.newHashSet();
          typeSetMap.put(path, currentTypes);
        }
        MinorType newType = field.getType().getMinorType();
        if (newType == MinorType.MAP || newType == MinorType.LIST) {
          throw new RuntimeException(
              "Schema change not currently supported for schemas with complex types");
        }
        if (newType == MinorType.UNION) {
          for (MinorType subType : field.getType().getSubTypeList()) {
            currentTypes.add(subType);
          }
        } else {
          currentTypes.add(newType);
        }
      }
    }

    List<MaterializedField> fields = Lists.newArrayList();

    for (SchemaPath path : typeSetMap.keySet()) {
      Set<MinorType> types = typeSetMap.get(path);
      if (types.size() > 1) {
        MajorType.Builder builder =
            MajorType.newBuilder().setMinorType(MinorType.UNION).setMode(DataMode.OPTIONAL);
        for (MinorType t : types) {
          builder.addSubType(t);
        }
        MaterializedField field = MaterializedField.create(path, builder.build());
        fields.add(field);
      } else {
        MaterializedField field =
            MaterializedField.create(path, Types.optional(types.iterator().next()));
        fields.add(field);
      }
    }

    SchemaBuilder schemaBuilder = new SchemaBuilder();
    BatchSchema s =
        schemaBuilder
            .addFields(fields)
            .setSelectionVectorMode(schemas[0].getSelectionVectorMode())
            .build();
    return s;
  }
Example #21
0
  private void initCols(Schema schema) throws SchemaChangeException {
    ImmutableList.Builder<ProjectedColumnInfo> pciBuilder = ImmutableList.builder();

    for (int i = 0; i < schema.getColumnCount(); i++) {
      ColumnSchema col = schema.getColumnByIndex(i);

      final String name = col.getName();
      final Type kuduType = col.getType();
      MinorType minorType = TYPES.get(kuduType);
      if (minorType == null) {
        logger.warn(
            "Ignoring column that is unsupported.",
            UserException.unsupportedError()
                .message(
                    "A column you queried has a data type that is not currently supported by the Kudu storage plugin. "
                        + "The column's name was %s and its Kudu data type was %s. ",
                    name, kuduType.toString())
                .addContext("column Name", name)
                .addContext("plugin", "kudu")
                .build(logger));

        continue;
      }
      MajorType majorType;
      if (col.isNullable()) {
        majorType = Types.optional(minorType);
      } else {
        majorType = Types.required(minorType);
      }
      MaterializedField field = MaterializedField.create(name, majorType);
      final Class<? extends ValueVector> clazz =
          (Class<? extends ValueVector>)
              TypeHelper.getValueVectorClass(minorType, majorType.getMode());
      ValueVector vector = output.addField(field, clazz);
      vector.allocateNew();

      ProjectedColumnInfo pci = new ProjectedColumnInfo();
      pci.vv = vector;
      pci.kuduColumn = col;
      pci.index = i;
      pciBuilder.add(pci);
    }

    projectedCols = pciBuilder.build();
  }
  private boolean fieldSelected(MaterializedField field) {
    // TODO - not sure if this is how we want to represent this
    // for now it makes the existing tests pass, simply selecting
    // all available data if no columns are provided
    if (isStarQuery()) {
      return true;
    }

    int i = 0;
    for (SchemaPath expr : getColumns()) {
      if (field.getPath().equalsIgnoreCase(expr.getAsUnescapedPath())) {
        columnsFound[i] = true;
        return true;
      }
      i++;
    }
    return false;
  }
  @Test
  public void testBitVector() {
    // Build a required boolean field definition
    MajorType.Builder typeBuilder = MajorType.newBuilder();
    FieldDef.Builder defBuilder = FieldDef.newBuilder();
    typeBuilder.setMinorType(MinorType.BIT).setMode(DataMode.REQUIRED).setWidth(4);
    defBuilder.setMajorType(typeBuilder.build());
    MaterializedField field = MaterializedField.create(defBuilder.build());

    // Create a new value vector for 1024 integers
    BitVector v = new BitVector(field, allocator);
    BitVector.Mutator m = v.getMutator();
    v.allocateNew(1024);

    // Put and set a few values
    m.set(0, 1);
    m.set(1, 0);
    m.set(100, 0);
    m.set(1022, 1);
    assertEquals(1, v.getAccessor().get(0));
    assertEquals(0, v.getAccessor().get(1));
    assertEquals(0, v.getAccessor().get(100));
    assertEquals(1, v.getAccessor().get(1022));

    // test setting the same value twice
    m.set(0, 1);
    m.set(0, 1);
    m.set(1, 0);
    m.set(1, 0);
    assertEquals(1, v.getAccessor().get(0));
    assertEquals(0, v.getAccessor().get(1));

    // test toggling the values
    m.set(0, 0);
    m.set(1, 1);
    assertEquals(0, v.getAccessor().get(0));
    assertEquals(1, v.getAccessor().get(1));

    // Ensure unallocated space returns 0
    assertEquals(0, v.getAccessor().get(3));
  }
  @Test
  public void testBitVector() {
    final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE);

    // Create a new value vector for 1024 integers
    try (final BitVector vector = new BitVector(field, allocator)) {
      final BitVector.Mutator m = vector.getMutator();
      vector.allocateNew(1024);

      // Put and set a few values
      m.set(0, 1);
      m.set(1, 0);
      m.set(100, 0);
      m.set(1022, 1);

      final BitVector.Accessor accessor = vector.getAccessor();
      assertEquals(1, accessor.get(0));
      assertEquals(0, accessor.get(1));
      assertEquals(0, accessor.get(100));
      assertEquals(1, accessor.get(1022));

      // test setting the same value twice
      m.set(0, 1);
      m.set(0, 1);
      m.set(1, 0);
      m.set(1, 0);
      assertEquals(1, accessor.get(0));
      assertEquals(0, accessor.get(1));

      // test toggling the values
      m.set(0, 0);
      m.set(1, 1);
      assertEquals(0, accessor.get(0));
      assertEquals(1, accessor.get(1));

      // Ensure unallocated space returns 0
      assertEquals(0, accessor.get(3));
    }
  }
  @Test
  public void testNullableVarLen2() {
    // Build an optional varchar field definition
    MajorType.Builder typeBuilder = MajorType.newBuilder();
    FieldDef.Builder defBuilder = FieldDef.newBuilder();
    typeBuilder.setMinorType(MinorType.VARCHAR).setMode(DataMode.OPTIONAL).setWidth(2);
    defBuilder.setMajorType(typeBuilder.build());
    MaterializedField field = MaterializedField.create(defBuilder.build());

    // Create a new value vector for 1024 integers
    NullableVarCharVector v = new NullableVarCharVector(field, allocator);
    NullableVarCharVector.Mutator m = v.getMutator();
    v.allocateNew(1024 * 10, 1024);

    // Create and set 3 sample strings
    String str1 = new String("AAAAA1");
    String str2 = new String("BBBBBBBBB2");
    String str3 = new String("CCCC3");
    m.set(0, str1.getBytes(Charset.forName("UTF-8")));
    m.set(1, str2.getBytes(Charset.forName("UTF-8")));
    m.set(2, str3.getBytes(Charset.forName("UTF-8")));

    // Check the sample strings
    assertEquals(str1, new String(v.getAccessor().get(0), Charset.forName("UTF-8")));
    assertEquals(str2, new String(v.getAccessor().get(1), Charset.forName("UTF-8")));
    assertEquals(str3, new String(v.getAccessor().get(2), Charset.forName("UTF-8")));

    // Ensure null value throws
    boolean b = false;
    try {
      v.getAccessor().get(3);
    } catch (AssertionError e) {
      b = true;
    } finally {
      if (!b) {
        assert false;
      }
    }
  }
  @Test
  public void testReAllocNullableFixedWidthVector() {
    final MaterializedField field =
        MaterializedField.create(EMPTY_SCHEMA_PATH, NullableFloat4Holder.TYPE);

    // Create a new value vector for 1024 integers
    try (final NullableFloat4Vector vector =
        (NullableFloat4Vector) TypeHelper.getNewVector(field, allocator)) {
      final NullableFloat4Vector.Mutator m = vector.getMutator();
      vector.allocateNew(1024);

      assertEquals(1024, vector.getValueCapacity());

      // Put values in indexes that fall within the initial allocation
      m.setSafe(0, 100.1f);
      m.setSafe(100, 102.3f);
      m.setSafe(1023, 104.5f);

      // Now try to put values in space that falls beyond the initial allocation
      m.setSafe(2000, 105.5f);

      // Check valueCapacity is more than initial allocation
      assertEquals(1024 * 2, vector.getValueCapacity());

      final NullableFloat4Vector.Accessor accessor = vector.getAccessor();
      assertEquals(100.1f, accessor.get(0), 0);
      assertEquals(102.3f, accessor.get(100), 0);
      assertEquals(104.5f, accessor.get(1023), 0);
      assertEquals(105.5f, accessor.get(2000), 0);

      // Set the valueCount to be more than valueCapacity of current allocation. This is possible
      // for NullableValueVectors
      // as we don't call setSafe for null values, but we do call setValueCount when all values are
      // inserted into the
      // vector
      m.setValueCount(vector.getValueCapacity() + 200);
    }
  }
  @Test
  public void testFixedType() {
    final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);

    // Create a new value vector for 1024 integers.
    try (final UInt4Vector vector = new UInt4Vector(field, allocator)) {
      final UInt4Vector.Mutator m = vector.getMutator();
      vector.allocateNew(1024);

      // Put and set a few values
      m.setSafe(0, 100);
      m.setSafe(1, 101);
      m.setSafe(100, 102);
      m.setSafe(1022, 103);
      m.setSafe(1023, 104);

      final UInt4Vector.Accessor accessor = vector.getAccessor();
      assertEquals(100, accessor.get(0));
      assertEquals(101, accessor.get(1));
      assertEquals(102, accessor.get(100));
      assertEquals(103, accessor.get(1022));
      assertEquals(104, accessor.get(1023));
    }
  }
  @Test(expected = OversizedAllocationException.class)
  public void testFixedVectorReallocation() {
    final MaterializedField field = MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE);
    final UInt4Vector vector = new UInt4Vector(field, allocator);
    // edge case 1: buffer size = max value capacity
    final int expectedValueCapacity = BaseValueVector.MAX_ALLOCATION_SIZE / 4;
    try {
      vector.allocateNew(expectedValueCapacity);
      assertEquals(expectedValueCapacity, vector.getValueCapacity());
      vector.reAlloc();
      assertEquals(expectedValueCapacity * 2, vector.getValueCapacity());
    } finally {
      vector.close();
    }

    // common case: value count < max value capacity
    try {
      vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 8);
      vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION
      vector.reAlloc(); // this should throw an IOOB
    } finally {
      vector.close();
    }
  }
  @Override
  public void setup(OperatorContext operatorContext, OutputMutator output)
      throws ExecutionSetupException {
    this.operatorContext = operatorContext;
    if (!isStarQuery()) {
      columnsFound = new boolean[getColumns().size()];
      nullFilledVectors = new ArrayList<>();
    }
    columnStatuses = new ArrayList<>();
    //    totalRecords = footer.getBlocks().get(rowGroupIndex).getRowCount();
    List<ColumnDescriptor> columns = footer.getFileMetaData().getSchema().getColumns();
    allFieldsFixedLength = true;
    ColumnDescriptor column;
    ColumnChunkMetaData columnChunkMetaData;
    int columnsToScan = 0;
    mockRecordsRead = 0;

    MaterializedField field;
    //    ParquetMetadataConverter metaConverter = new ParquetMetadataConverter();
    FileMetaData fileMetaData;

    logger.debug(
        "Reading row group({}) with {} records in file {}.",
        rowGroupIndex,
        footer.getBlocks().get(rowGroupIndex).getRowCount(),
        hadoopPath.toUri().getPath());
    totalRecordsRead = 0;

    // TODO - figure out how to deal with this better once we add nested reading, note also look
    // where this map is used below
    // store a map from column name to converted types if they are non-null
    Map<String, SchemaElement> schemaElements =
        ParquetReaderUtility.getColNameToSchemaElementMapping(footer);

    // loop to add up the length of the fixed width columns and build the schema
    for (int i = 0; i < columns.size(); ++i) {
      column = columns.get(i);
      SchemaElement se = schemaElements.get(column.getPath()[0]);
      MajorType mt =
          ParquetToDrillTypeConverter.toMajorType(
              column.getType(),
              se.getType_length(),
              getDataMode(column),
              se,
              fragmentContext.getOptions());
      field = MaterializedField.create(toFieldName(column.getPath()), mt);
      if (!fieldSelected(field)) {
        continue;
      }
      columnsToScan++;
      int dataTypeLength = getDataTypeLength(column, se);
      if (dataTypeLength == -1) {
        allFieldsFixedLength = false;
      } else {
        bitWidthAllFixedFields += dataTypeLength;
      }
    }
    //    rowGroupOffset =
    // footer.getBlocks().get(rowGroupIndex).getColumns().get(0).getFirstDataPageOffset();

    if (columnsToScan != 0 && allFieldsFixedLength) {
      recordsPerBatch =
          (int)
              Math.min(
                  Math.min(
                      batchSize / bitWidthAllFixedFields,
                      footer.getBlocks().get(0).getColumns().get(0).getValueCount()),
                  65535);
    } else {
      recordsPerBatch = DEFAULT_RECORDS_TO_READ_IF_NOT_FIXED_WIDTH;
    }

    try {
      ValueVector vector;
      SchemaElement schemaElement;
      final ArrayList<VarLengthColumn<? extends ValueVector>> varLengthColumns = new ArrayList<>();
      // initialize all of the column read status objects
      boolean fieldFixedLength;
      // the column chunk meta-data is not guaranteed to be in the same order as the columns in the
      // schema
      // a map is constructed for fast access to the correct columnChunkMetadata to correspond
      // to an element in the schema
      Map<String, Integer> columnChunkMetadataPositionsInList = new HashMap<>();
      BlockMetaData rowGroupMetadata = footer.getBlocks().get(rowGroupIndex);

      int colChunkIndex = 0;
      for (ColumnChunkMetaData colChunk : rowGroupMetadata.getColumns()) {
        columnChunkMetadataPositionsInList.put(
            Arrays.toString(colChunk.getPath().toArray()), colChunkIndex);
        colChunkIndex++;
      }
      for (int i = 0; i < columns.size(); ++i) {
        column = columns.get(i);
        columnChunkMetaData =
            rowGroupMetadata
                .getColumns()
                .get(columnChunkMetadataPositionsInList.get(Arrays.toString(column.getPath())));
        schemaElement = schemaElements.get(column.getPath()[0]);
        MajorType type =
            ParquetToDrillTypeConverter.toMajorType(
                column.getType(),
                schemaElement.getType_length(),
                getDataMode(column),
                schemaElement,
                fragmentContext.getOptions());
        field = MaterializedField.create(toFieldName(column.getPath()), type);
        // the field was not requested to be read
        if (!fieldSelected(field)) {
          continue;
        }

        fieldFixedLength = column.getType() != PrimitiveType.PrimitiveTypeName.BINARY;
        vector =
            output.addField(
                field,
                (Class<? extends ValueVector>)
                    TypeHelper.getValueVectorClass(type.getMinorType(), type.getMode()));
        if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
          if (column.getMaxRepetitionLevel() > 0) {
            final RepeatedValueVector repeatedVector = RepeatedValueVector.class.cast(vector);
            ColumnReader<?> dataReader =
                ColumnReaderFactory.createFixedColumnReader(
                    this,
                    fieldFixedLength,
                    column,
                    columnChunkMetaData,
                    recordsPerBatch,
                    repeatedVector.getDataVector(),
                    schemaElement);
            varLengthColumns.add(
                new FixedWidthRepeatedReader(
                    this,
                    dataReader,
                    getTypeLengthInBits(column.getType()),
                    -1,
                    column,
                    columnChunkMetaData,
                    false,
                    repeatedVector,
                    schemaElement));
          } else {
            columnStatuses.add(
                ColumnReaderFactory.createFixedColumnReader(
                    this,
                    fieldFixedLength,
                    column,
                    columnChunkMetaData,
                    recordsPerBatch,
                    vector,
                    schemaElement));
          }
        } else {
          // create a reader and add it to the appropriate list
          varLengthColumns.add(
              ColumnReaderFactory.getReader(
                  this, -1, column, columnChunkMetaData, false, vector, schemaElement));
        }
      }
      varLengthReader = new VarLenBinaryReader(this, varLengthColumns);

      if (!isStarQuery()) {
        List<SchemaPath> projectedColumns = Lists.newArrayList(getColumns());
        SchemaPath col;
        for (int i = 0; i < columnsFound.length; i++) {
          col = projectedColumns.get(i);
          assert col != null;
          if (!columnsFound[i] && !col.equals(STAR_COLUMN)) {
            nullFilledVectors.add(
                (NullableIntVector)
                    output.addField(
                        MaterializedField.create(
                            col.getAsUnescapedPath(), Types.optional(TypeProtos.MinorType.INT)),
                        (Class<? extends ValueVector>)
                            TypeHelper.getValueVectorClass(
                                TypeProtos.MinorType.INT, DataMode.OPTIONAL)));
          }
        }
      }
    } catch (Exception e) {
      handleAndRaise("Failure in setting up reader", e);
    }
  }
Example #30
0
  private StreamingAggregator createAggregatorInternal()
      throws SchemaChangeException, ClassTransformationException, IOException {
    ClassGenerator<StreamingAggregator> cg =
        CodeGenerator.getRoot(
            StreamingAggTemplate.TEMPLATE_DEFINITION, context.getFunctionRegistry());
    container.clear();

    LogicalExpression[] keyExprs = new LogicalExpression[popConfig.getKeys().length];
    LogicalExpression[] valueExprs = new LogicalExpression[popConfig.getExprs().length];
    TypedFieldId[] keyOutputIds = new TypedFieldId[popConfig.getKeys().length];

    ErrorCollector collector = new ErrorCollectorImpl();

    for (int i = 0; i < keyExprs.length; i++) {
      final NamedExpression ne = popConfig.getKeys()[i];
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              ne.getExpr(), incoming, collector, context.getFunctionRegistry());
      if (expr == null) {
        continue;
      }
      keyExprs[i] = expr;
      final MaterializedField outputField =
          MaterializedField.create(ne.getRef(), expr.getMajorType());
      final ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
      keyOutputIds[i] = container.add(vector);
    }

    for (int i = 0; i < valueExprs.length; i++) {
      final NamedExpression ne = popConfig.getExprs()[i];
      final LogicalExpression expr =
          ExpressionTreeMaterializer.materialize(
              ne.getExpr(), incoming, collector, context.getFunctionRegistry());
      if (expr instanceof IfExpression) {
        throw UserException.unsupportedError(
                new UnsupportedOperationException(
                    "Union type not supported in aggregate functions"))
            .build(logger);
      }
      if (expr == null) {
        continue;
      }

      final MaterializedField outputField =
          MaterializedField.create(ne.getRef(), expr.getMajorType());
      ValueVector vector = TypeHelper.getNewVector(outputField, oContext.getAllocator());
      TypedFieldId id = container.add(vector);
      valueExprs[i] = new ValueVectorWriteExpression(id, expr, true);
    }

    if (collector.hasErrors()) {
      throw new SchemaChangeException(
          "Failure while materializing expression. " + collector.toErrorString());
    }

    setupIsSame(cg, keyExprs);
    setupIsSameApart(cg, keyExprs);
    addRecordValues(cg, valueExprs);
    outputRecordKeys(cg, keyOutputIds, keyExprs);
    outputRecordKeysPrev(cg, keyOutputIds, keyExprs);

    cg.getBlock("resetValues")._return(JExpr.TRUE);
    getIndex(cg);

    container.buildSchema(SelectionVectorMode.NONE);
    StreamingAggregator agg = context.getImplementationClass(cg);
    agg.setup(oContext, incoming, this);
    return agg;
  }