@Test
  public void testRoundTrip() {
    int positionCount = 40;

    // build dictionary
    BlockBuilder dictionaryBuilder = VARCHAR.createBlockBuilder(new BlockBuilderStatus(), 4);
    VARCHAR.writeString(dictionaryBuilder, "alice");
    VARCHAR.writeString(dictionaryBuilder, "bob");
    VARCHAR.writeString(dictionaryBuilder, "charlie");
    VARCHAR.writeString(dictionaryBuilder, "dave");
    Block dictionary = dictionaryBuilder.build();

    // build ids
    int[] ids = new int[positionCount];
    for (int i = 0; i < 40; i++) {
      ids[i] = i % 4;
    }

    BlockEncoding blockEncoding = new DictionaryBlockEncoding(new VariableWidthBlockEncoding());
    DictionaryBlock dictionaryBlock = new DictionaryBlock(positionCount, dictionary, ids);

    DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024);
    blockEncoding.writeBlock(sliceOutput, dictionaryBlock);
    Block actualBlock = blockEncoding.readBlock(sliceOutput.slice().getInput());

    assertTrue(actualBlock instanceof DictionaryBlock);
    DictionaryBlock actualDictionaryBlock = (DictionaryBlock) actualBlock;
    assertBlockEquals(VARCHAR, actualDictionaryBlock.getDictionary(), dictionary);
    for (int position = 0; position < actualDictionaryBlock.getPositionCount(); position++) {
      assertEquals(actualDictionaryBlock.getId(position), ids[position]);
    }
    assertEquals(
        actualDictionaryBlock.getDictionarySourceId(), dictionaryBlock.getDictionarySourceId());
  }
 private static void assertReadFields(RecordCursor cursor, List<ColumnMetadata> schema) {
   for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) {
     ColumnMetadata column = schema.get(columnIndex);
     if (!cursor.isNull(columnIndex)) {
       Type type = column.getType();
       if (BOOLEAN.equals(type)) {
         cursor.getBoolean(columnIndex);
       } else if (BIGINT.equals(type)) {
         cursor.getLong(columnIndex);
       } else if (TIMESTAMP.equals(type)) {
         cursor.getLong(columnIndex);
       } else if (DOUBLE.equals(type)) {
         cursor.getDouble(columnIndex);
       } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) {
         try {
           cursor.getSlice(columnIndex);
         } catch (RuntimeException e) {
           throw new RuntimeException("column " + column, e);
         }
       } else {
         fail("Unknown primitive type " + columnIndex);
       }
     }
   }
 }
  @Setup
  public void setup() {
    Random random = new Random();
    RowExpression[] arguments = new RowExpression[1 + inListCount];
    switch (type) {
      case StandardTypes.BIGINT:
        prestoType = BIGINT;
        for (int i = 1; i <= inListCount; i++) {
          arguments[i] = constant((long) random.nextInt(), BIGINT);
        }
        break;
      case StandardTypes.DOUBLE:
        prestoType = DOUBLE;
        for (int i = 1; i <= inListCount; i++) {
          arguments[i] = constant(random.nextDouble(), DOUBLE);
        }
        break;
      case StandardTypes.VARCHAR:
        prestoType = VARCHAR;
        for (int i = 1; i <= inListCount; i++) {
          arguments[i] = constant(Slices.utf8Slice(Long.toString(random.nextLong())), VARCHAR);
        }
        break;
      default:
        throw new IllegalStateException();
    }

    arguments[0] = field(0, prestoType);
    RowExpression project = field(0, prestoType);

    PageBuilder pageBuilder = new PageBuilder(ImmutableList.of(prestoType));
    for (int i = 0; i < 10_000; i++) {
      pageBuilder.declarePosition();

      switch (type) {
        case StandardTypes.BIGINT:
          BIGINT.writeLong(pageBuilder.getBlockBuilder(0), random.nextInt());
          break;
        case StandardTypes.DOUBLE:
          DOUBLE.writeDouble(pageBuilder.getBlockBuilder(0), random.nextDouble());
          break;
        case StandardTypes.VARCHAR:
          VARCHAR.writeSlice(
              pageBuilder.getBlockBuilder(0), Slices.utf8Slice(Long.toString(random.nextLong())));
          break;
      }
    }
    inputPage = pageBuilder.build();

    RowExpression filter =
        call(
            new Signature(IN, SCALAR, parseTypeSignature(StandardTypes.BOOLEAN)),
            BOOLEAN,
            arguments);

    processor =
        new ExpressionCompiler(MetadataManager.createTestMetadataManager())
            .compilePageProcessor(filter, ImmutableList.of(project))
            .get();
  }
 private void testIsFull(VariableWidthBlockBuilder blockBuilder) {
   assertTrue(blockBuilder.isEmpty());
   while (!blockBuilder.isFull()) {
     VARCHAR.writeSlice(blockBuilder, Slices.allocate(VARCHAR_VALUE_SIZE));
   }
   assertEquals(blockBuilder.getPositionCount(), EXPECTED_ENTRY_COUNT);
   assertEquals(blockBuilder.isFull(), true);
 }
  protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int numRows)
      throws IOException {
    for (int row = 0; row < numRows; row++) {
      assertTrue(cursor.advanceNextPosition());
      for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) {
        TestColumn testColumn = testColumns.get(i);

        Object fieldFromCursor;
        Type type =
            HiveType.valueOf(testColumn.getObjectInspector().getTypeName()).getType(TYPE_MANAGER);
        if (cursor.isNull(i)) {
          fieldFromCursor = null;
        } else if (BOOLEAN.equals(type)) {
          fieldFromCursor = cursor.getBoolean(i);
        } else if (BIGINT.equals(type)) {
          fieldFromCursor = cursor.getLong(i);
        } else if (DOUBLE.equals(type)) {
          fieldFromCursor = cursor.getDouble(i);
        } else if (VARCHAR.equals(type)) {
          fieldFromCursor = cursor.getSlice(i);
        } else if (VARBINARY.equals(type)) {
          fieldFromCursor = cursor.getSlice(i);
        } else if (DateType.DATE.equals(type)) {
          fieldFromCursor = cursor.getLong(i);
        } else if (TimestampType.TIMESTAMP.equals(type)) {
          fieldFromCursor = cursor.getLong(i);
        } else if (isStructuralType(type)) {
          fieldFromCursor = cursor.getObject(i);
        } else {
          throw new RuntimeException("unknown type");
        }

        if (fieldFromCursor == null) {
          assertEquals(
              null,
              testColumn.getExpectedValue(),
              String.format("Expected null for column %s", testColumn.getName()));
        } else if (testColumn.getObjectInspector().getTypeName().equals("float")
            || testColumn.getObjectInspector().getTypeName().equals("double")) {
          assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON);
        } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) {
          assertEquals(
              fieldFromCursor,
              testColumn.getExpectedValue(),
              String.format("Wrong value for column %s", testColumn.getName()));
        } else {
          Block expected = (Block) testColumn.getExpectedValue();
          Block actual = (Block) fieldFromCursor;
          assertBlockEquals(
              actual, expected, String.format("Wrong value for column %s", testColumn.getName()));
        }
      }
    }
  }
Пример #6
0
  private static void insertRows(
      ConnectorTableMetadata tableMetadata, Handle handle, RecordSet data) {
    List<ColumnMetadata> columns =
        ImmutableList.copyOf(
            Iterables.filter(
                tableMetadata.getColumns(),
                new Predicate<ColumnMetadata>() {
                  @Override
                  public boolean apply(ColumnMetadata columnMetadata) {
                    return !columnMetadata.isHidden();
                  }
                }));

    String vars = Joiner.on(',').join(nCopies(columns.size(), "?"));
    String sql =
        format("INSERT INTO %s VALUES (%s)", tableMetadata.getTable().getTableName(), vars);

    RecordCursor cursor = data.cursor();
    while (true) {
      // insert 1000 rows at a time
      PreparedBatch batch = handle.prepareBatch(sql);
      for (int row = 0; row < 1000; row++) {
        if (!cursor.advanceNextPosition()) {
          batch.execute();
          return;
        }
        PreparedBatchPart part = batch.add();
        for (int column = 0; column < columns.size(); column++) {
          Type type = columns.get(column).getType();
          if (BOOLEAN.equals(type)) {
            part.bind(column, cursor.getBoolean(column));
          } else if (BIGINT.equals(type)) {
            part.bind(column, cursor.getLong(column));
          } else if (DOUBLE.equals(type)) {
            part.bind(column, cursor.getDouble(column));
          } else if (VARCHAR.equals(type)) {
            part.bind(column, cursor.getSlice(column).toStringUtf8());
          } else if (DATE.equals(type)) {
            long millisUtc = TimeUnit.DAYS.toMillis(cursor.getLong(column));
            // H2 expects dates in to be millis at midnight in the JVM timezone
            long localMillis =
                DateTimeZone.UTC.getMillisKeepLocal(DateTimeZone.getDefault(), millisUtc);
            part.bind(column, new Date(localMillis));
          } else {
            throw new IllegalArgumentException("Unsupported type " + type);
          }
        }
      }
      batch.execute();
    }
  }
      @Override
      public void addInput(Page page) {
        Block block = page.getBlock(labelChannel);
        for (int position = 0; position < block.getPositionCount(); position++) {
          if (labelIsLong) {
            labels.add((double) BIGINT.getLong(block, position));
          } else {
            labels.add(DOUBLE.getDouble(block, position));
          }
        }

        block = page.getBlock(featuresChannel);
        for (int position = 0; position < block.getPositionCount(); position++) {
          FeatureVector featureVector =
              ModelUtils.jsonToFeatures(VARCHAR.getSlice(block, position));
          rowsSize += featureVector.getEstimatedSize();
          rows.add(featureVector);
        }

        if (params == null) {
          block = page.getBlock(paramsChannel);
          params = LibSvmUtils.parseParameters(VARCHAR.getSlice(block, 0).toStringUtf8());
        }
      }
Пример #8
0
 private void parseColumn(int column) {
   Type type = types[column];
   if (BOOLEAN.equals(type)) {
     parseBooleanColumn(column);
   } else if (BIGINT.equals(type)) {
     parseLongColumn(column);
   } else if (DOUBLE.equals(type)) {
     parseDoubleColumn(column);
   } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) {
     parseStringColumn(column);
   } else if (TIMESTAMP.equals(type)) {
     parseLongColumn(column);
   } else {
     throw new UnsupportedOperationException("Unsupported column type: " + type);
   }
 }
public class TableStatsData extends AutoIncrementTable<StatsDataModel, UInteger> {
  public static TableStatsData TABLE_STATSDATA;

  public final TableField<StatsDataModel, UInteger> KEY = createField("key", INTEGERUNSIGNED, this);
  public final TableField<StatsDataModel, UInteger> STAT =
      createField("stat", INTEGERUNSIGNED, this);
  public final TableField<StatsDataModel, Timestamp> TIME =
      createField("timestamp", TIMESTAMP, this);
  public final TableField<StatsDataModel, String> DATA =
      createField("data", VARCHAR.length(64), this);

  public TableStatsData(String prefix) {
    super(prefix + "statsdata", new Version(1));
    setAIKey(KEY);
    addForeignKey(TABLE_STATS.getPrimaryKey(), STAT);
    addFields(KEY, STAT, TIME, DATA);
    TABLE_STATSDATA = this;
  }

  @Override
  public Class<StatsDataModel> getRecordType() {
    return StatsDataModel.class;
  }
}
Пример #10
0
    @Override
    protected Object visitGenericLiteral(GenericLiteral node, ConnectorSession session) {
      Type type = metadata.getType(parseTypeSignature(node.getType()));
      if (type == null) {
        throw new SemanticException(TYPE_MISMATCH, node, "Unknown type: " + node.getType());
      }

      if (JSON.equals(type)) {
        ScalarFunctionImplementation operator =
            metadata
                .getFunctionRegistry()
                .getScalarFunctionImplementation(
                    new Signature(
                        "json_parse", SCALAR, JSON.getTypeSignature(), VARCHAR.getTypeSignature()));
        try {
          return ExpressionInterpreter.invoke(
              session, operator, ImmutableList.<Object>of(utf8Slice(node.getValue())));
        } catch (Throwable throwable) {
          throw Throwables.propagate(throwable);
        }
      }

      ScalarFunctionImplementation operator;
      try {
        Signature signature = metadata.getFunctionRegistry().getCoercion(VARCHAR, type);
        operator = metadata.getFunctionRegistry().getScalarFunctionImplementation(signature);
      } catch (IllegalArgumentException e) {
        throw new SemanticException(TYPE_MISMATCH, node, "No literal form for type %s", type);
      }
      try {
        return ExpressionInterpreter.invoke(
            session, operator, ImmutableList.<Object>of(utf8Slice(node.getValue())));
      } catch (Throwable throwable) {
        throw Throwables.propagate(throwable);
      }
    }
  @Override
  protected RunLengthEncodedBlockCursor createTestCursor() {
    Block value = VARCHAR.createBlockBuilder(new BlockBuilderStatus()).appendNull().build();

    return new RunLengthEncodedBlock(value, 11).cursor();
  }
Пример #12
0
  public GenericHiveRecordCursor(
      RecordReader<K, V> recordReader,
      long totalBytes,
      Properties splitSchema,
      List<HivePartitionKey> partitionKeys,
      List<HiveColumnHandle> columns,
      DateTimeZone hiveStorageTimeZone,
      DateTimeZone sessionTimeZone) {
    checkNotNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    checkNotNull(splitSchema, "splitSchema is null");
    checkNotNull(partitionKeys, "partitionKeys is null");
    checkNotNull(columns, "columns is null");
    checkArgument(!columns.isEmpty(), "columns is empty");
    checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");
    checkNotNull(sessionTimeZone, "sessionTimeZone is null");

    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;
    this.sessionTimeZone = sessionTimeZone;

    this.deserializer = getDeserializer(splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    String[] names = new String[size];
    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.isPartitionColumn = new boolean[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
      HiveColumnHandle column = columns.get(i);

      names[i] = column.getName();
      types[i] = column.getType();
      hiveTypes[i] = column.getHiveType();

      if (!column.isPartitionKey()) {
        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
      }

      isPartitionColumn[i] = column.isPartitionKey();
    }

    // parse requested partition columns
    Map<String, HivePartitionKey> partitionKeysByName =
        uniqueIndex(partitionKeys, HivePartitionKey.nameGetter());
    for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) {
      HiveColumnHandle column = columns.get(columnIndex);
      if (column.isPartitionKey()) {
        HivePartitionKey partitionKey = partitionKeysByName.get(column.getName());
        checkArgument(partitionKey != null, "Unknown partition key %s", column.getName());

        byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8);

        Type type = types[columnIndex];
        if (BOOLEAN.equals(type)) {
          if (isTrue(bytes, 0, bytes.length)) {
            booleans[columnIndex] = true;
          } else if (isFalse(bytes, 0, bytes.length)) {
            booleans[columnIndex] = false;
          } else {
            String valueString = new String(bytes, Charsets.UTF_8);
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '%s' for BOOLEAN partition key %s",
                    valueString, names[columnIndex]));
          }
        } else if (BIGINT.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for BIGINT partition key %s", names[columnIndex]));
          }
          longs[columnIndex] = parseLong(bytes, 0, bytes.length);
        } else if (DOUBLE.equals(type)) {
          if (bytes.length == 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex]));
          }
          doubles[columnIndex] = parseDouble(bytes, 0, bytes.length);
        } else if (VARCHAR.equals(type)) {
          slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length));
        } else {
          throw new UnsupportedOperationException("Unsupported column type: " + type);
        }
      }
    }
  }