@Test public void testRoundTrip() { int positionCount = 40; // build dictionary BlockBuilder dictionaryBuilder = VARCHAR.createBlockBuilder(new BlockBuilderStatus(), 4); VARCHAR.writeString(dictionaryBuilder, "alice"); VARCHAR.writeString(dictionaryBuilder, "bob"); VARCHAR.writeString(dictionaryBuilder, "charlie"); VARCHAR.writeString(dictionaryBuilder, "dave"); Block dictionary = dictionaryBuilder.build(); // build ids int[] ids = new int[positionCount]; for (int i = 0; i < 40; i++) { ids[i] = i % 4; } BlockEncoding blockEncoding = new DictionaryBlockEncoding(new VariableWidthBlockEncoding()); DictionaryBlock dictionaryBlock = new DictionaryBlock(positionCount, dictionary, ids); DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); blockEncoding.writeBlock(sliceOutput, dictionaryBlock); Block actualBlock = blockEncoding.readBlock(sliceOutput.slice().getInput()); assertTrue(actualBlock instanceof DictionaryBlock); DictionaryBlock actualDictionaryBlock = (DictionaryBlock) actualBlock; assertBlockEquals(VARCHAR, actualDictionaryBlock.getDictionary(), dictionary); for (int position = 0; position < actualDictionaryBlock.getPositionCount(); position++) { assertEquals(actualDictionaryBlock.getId(position), ids[position]); } assertEquals( actualDictionaryBlock.getDictionarySourceId(), dictionaryBlock.getDictionarySourceId()); }
private static void assertReadFields(RecordCursor cursor, List<ColumnMetadata> schema) { for (int columnIndex = 0; columnIndex < schema.size(); columnIndex++) { ColumnMetadata column = schema.get(columnIndex); if (!cursor.isNull(columnIndex)) { Type type = column.getType(); if (BOOLEAN.equals(type)) { cursor.getBoolean(columnIndex); } else if (BIGINT.equals(type)) { cursor.getLong(columnIndex); } else if (TIMESTAMP.equals(type)) { cursor.getLong(columnIndex); } else if (DOUBLE.equals(type)) { cursor.getDouble(columnIndex); } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) { try { cursor.getSlice(columnIndex); } catch (RuntimeException e) { throw new RuntimeException("column " + column, e); } } else { fail("Unknown primitive type " + columnIndex); } } } }
@Setup public void setup() { Random random = new Random(); RowExpression[] arguments = new RowExpression[1 + inListCount]; switch (type) { case StandardTypes.BIGINT: prestoType = BIGINT; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant((long) random.nextInt(), BIGINT); } break; case StandardTypes.DOUBLE: prestoType = DOUBLE; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant(random.nextDouble(), DOUBLE); } break; case StandardTypes.VARCHAR: prestoType = VARCHAR; for (int i = 1; i <= inListCount; i++) { arguments[i] = constant(Slices.utf8Slice(Long.toString(random.nextLong())), VARCHAR); } break; default: throw new IllegalStateException(); } arguments[0] = field(0, prestoType); RowExpression project = field(0, prestoType); PageBuilder pageBuilder = new PageBuilder(ImmutableList.of(prestoType)); for (int i = 0; i < 10_000; i++) { pageBuilder.declarePosition(); switch (type) { case StandardTypes.BIGINT: BIGINT.writeLong(pageBuilder.getBlockBuilder(0), random.nextInt()); break; case StandardTypes.DOUBLE: DOUBLE.writeDouble(pageBuilder.getBlockBuilder(0), random.nextDouble()); break; case StandardTypes.VARCHAR: VARCHAR.writeSlice( pageBuilder.getBlockBuilder(0), Slices.utf8Slice(Long.toString(random.nextLong()))); break; } } inputPage = pageBuilder.build(); RowExpression filter = call( new Signature(IN, SCALAR, parseTypeSignature(StandardTypes.BOOLEAN)), BOOLEAN, arguments); processor = new ExpressionCompiler(MetadataManager.createTestMetadataManager()) .compilePageProcessor(filter, ImmutableList.of(project)) .get(); }
private void testIsFull(VariableWidthBlockBuilder blockBuilder) { assertTrue(blockBuilder.isEmpty()); while (!blockBuilder.isFull()) { VARCHAR.writeSlice(blockBuilder, Slices.allocate(VARCHAR_VALUE_SIZE)); } assertEquals(blockBuilder.getPositionCount(), EXPECTED_ENTRY_COUNT); assertEquals(blockBuilder.isFull(), true); }
protected void checkCursor(RecordCursor cursor, List<TestColumn> testColumns, int numRows) throws IOException { for (int row = 0; row < numRows; row++) { assertTrue(cursor.advanceNextPosition()); for (int i = 0, testColumnsSize = testColumns.size(); i < testColumnsSize; i++) { TestColumn testColumn = testColumns.get(i); Object fieldFromCursor; Type type = HiveType.valueOf(testColumn.getObjectInspector().getTypeName()).getType(TYPE_MANAGER); if (cursor.isNull(i)) { fieldFromCursor = null; } else if (BOOLEAN.equals(type)) { fieldFromCursor = cursor.getBoolean(i); } else if (BIGINT.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (DOUBLE.equals(type)) { fieldFromCursor = cursor.getDouble(i); } else if (VARCHAR.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (VARBINARY.equals(type)) { fieldFromCursor = cursor.getSlice(i); } else if (DateType.DATE.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (TimestampType.TIMESTAMP.equals(type)) { fieldFromCursor = cursor.getLong(i); } else if (isStructuralType(type)) { fieldFromCursor = cursor.getObject(i); } else { throw new RuntimeException("unknown type"); } if (fieldFromCursor == null) { assertEquals( null, testColumn.getExpectedValue(), String.format("Expected null for column %s", testColumn.getName())); } else if (testColumn.getObjectInspector().getTypeName().equals("float") || testColumn.getObjectInspector().getTypeName().equals("double")) { assertEquals((double) fieldFromCursor, (double) testColumn.getExpectedValue(), EPSILON); } else if (testColumn.getObjectInspector().getCategory() == Category.PRIMITIVE) { assertEquals( fieldFromCursor, testColumn.getExpectedValue(), String.format("Wrong value for column %s", testColumn.getName())); } else { Block expected = (Block) testColumn.getExpectedValue(); Block actual = (Block) fieldFromCursor; assertBlockEquals( actual, expected, String.format("Wrong value for column %s", testColumn.getName())); } } } }
private static void insertRows( ConnectorTableMetadata tableMetadata, Handle handle, RecordSet data) { List<ColumnMetadata> columns = ImmutableList.copyOf( Iterables.filter( tableMetadata.getColumns(), new Predicate<ColumnMetadata>() { @Override public boolean apply(ColumnMetadata columnMetadata) { return !columnMetadata.isHidden(); } })); String vars = Joiner.on(',').join(nCopies(columns.size(), "?")); String sql = format("INSERT INTO %s VALUES (%s)", tableMetadata.getTable().getTableName(), vars); RecordCursor cursor = data.cursor(); while (true) { // insert 1000 rows at a time PreparedBatch batch = handle.prepareBatch(sql); for (int row = 0; row < 1000; row++) { if (!cursor.advanceNextPosition()) { batch.execute(); return; } PreparedBatchPart part = batch.add(); for (int column = 0; column < columns.size(); column++) { Type type = columns.get(column).getType(); if (BOOLEAN.equals(type)) { part.bind(column, cursor.getBoolean(column)); } else if (BIGINT.equals(type)) { part.bind(column, cursor.getLong(column)); } else if (DOUBLE.equals(type)) { part.bind(column, cursor.getDouble(column)); } else if (VARCHAR.equals(type)) { part.bind(column, cursor.getSlice(column).toStringUtf8()); } else if (DATE.equals(type)) { long millisUtc = TimeUnit.DAYS.toMillis(cursor.getLong(column)); // H2 expects dates in to be millis at midnight in the JVM timezone long localMillis = DateTimeZone.UTC.getMillisKeepLocal(DateTimeZone.getDefault(), millisUtc); part.bind(column, new Date(localMillis)); } else { throw new IllegalArgumentException("Unsupported type " + type); } } } batch.execute(); } }
@Override public void addInput(Page page) { Block block = page.getBlock(labelChannel); for (int position = 0; position < block.getPositionCount(); position++) { if (labelIsLong) { labels.add((double) BIGINT.getLong(block, position)); } else { labels.add(DOUBLE.getDouble(block, position)); } } block = page.getBlock(featuresChannel); for (int position = 0; position < block.getPositionCount(); position++) { FeatureVector featureVector = ModelUtils.jsonToFeatures(VARCHAR.getSlice(block, position)); rowsSize += featureVector.getEstimatedSize(); rows.add(featureVector); } if (params == null) { block = page.getBlock(paramsChannel); params = LibSvmUtils.parseParameters(VARCHAR.getSlice(block, 0).toStringUtf8()); } }
private void parseColumn(int column) { Type type = types[column]; if (BOOLEAN.equals(type)) { parseBooleanColumn(column); } else if (BIGINT.equals(type)) { parseLongColumn(column); } else if (DOUBLE.equals(type)) { parseDoubleColumn(column); } else if (VARCHAR.equals(type) || VARBINARY.equals(type)) { parseStringColumn(column); } else if (TIMESTAMP.equals(type)) { parseLongColumn(column); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } }
public class TableStatsData extends AutoIncrementTable<StatsDataModel, UInteger> { public static TableStatsData TABLE_STATSDATA; public final TableField<StatsDataModel, UInteger> KEY = createField("key", INTEGERUNSIGNED, this); public final TableField<StatsDataModel, UInteger> STAT = createField("stat", INTEGERUNSIGNED, this); public final TableField<StatsDataModel, Timestamp> TIME = createField("timestamp", TIMESTAMP, this); public final TableField<StatsDataModel, String> DATA = createField("data", VARCHAR.length(64), this); public TableStatsData(String prefix) { super(prefix + "statsdata", new Version(1)); setAIKey(KEY); addForeignKey(TABLE_STATS.getPrimaryKey(), STAT); addFields(KEY, STAT, TIME, DATA); TABLE_STATSDATA = this; } @Override public Class<StatsDataModel> getRecordType() { return StatsDataModel.class; } }
@Override protected Object visitGenericLiteral(GenericLiteral node, ConnectorSession session) { Type type = metadata.getType(parseTypeSignature(node.getType())); if (type == null) { throw new SemanticException(TYPE_MISMATCH, node, "Unknown type: " + node.getType()); } if (JSON.equals(type)) { ScalarFunctionImplementation operator = metadata .getFunctionRegistry() .getScalarFunctionImplementation( new Signature( "json_parse", SCALAR, JSON.getTypeSignature(), VARCHAR.getTypeSignature())); try { return ExpressionInterpreter.invoke( session, operator, ImmutableList.<Object>of(utf8Slice(node.getValue()))); } catch (Throwable throwable) { throw Throwables.propagate(throwable); } } ScalarFunctionImplementation operator; try { Signature signature = metadata.getFunctionRegistry().getCoercion(VARCHAR, type); operator = metadata.getFunctionRegistry().getScalarFunctionImplementation(signature); } catch (IllegalArgumentException e) { throw new SemanticException(TYPE_MISMATCH, node, "No literal form for type %s", type); } try { return ExpressionInterpreter.invoke( session, operator, ImmutableList.<Object>of(utf8Slice(node.getValue()))); } catch (Throwable throwable) { throw Throwables.propagate(throwable); } }
@Override protected RunLengthEncodedBlockCursor createTestCursor() { Block value = VARCHAR.createBlockBuilder(new BlockBuilderStatus()).appendNull().build(); return new RunLengthEncodedBlock(value, 11).cursor(); }
public GenericHiveRecordCursor( RecordReader<K, V> recordReader, long totalBytes, Properties splitSchema, List<HivePartitionKey> partitionKeys, List<HiveColumnHandle> columns, DateTimeZone hiveStorageTimeZone, DateTimeZone sessionTimeZone) { checkNotNull(recordReader, "recordReader is null"); checkArgument(totalBytes >= 0, "totalBytes is negative"); checkNotNull(splitSchema, "splitSchema is null"); checkNotNull(partitionKeys, "partitionKeys is null"); checkNotNull(columns, "columns is null"); checkArgument(!columns.isEmpty(), "columns is empty"); checkNotNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"); checkNotNull(sessionTimeZone, "sessionTimeZone is null"); this.recordReader = recordReader; this.totalBytes = totalBytes; this.key = recordReader.createKey(); this.value = recordReader.createValue(); this.hiveStorageTimeZone = hiveStorageTimeZone; this.sessionTimeZone = sessionTimeZone; this.deserializer = getDeserializer(splitSchema); this.rowInspector = getTableObjectInspector(deserializer); int size = columns.size(); String[] names = new String[size]; this.types = new Type[size]; this.hiveTypes = new HiveType[size]; this.structFields = new StructField[size]; this.fieldInspectors = new ObjectInspector[size]; this.isPartitionColumn = new boolean[size]; this.loaded = new boolean[size]; this.booleans = new boolean[size]; this.longs = new long[size]; this.doubles = new double[size]; this.slices = new Slice[size]; this.nulls = new boolean[size]; // initialize data columns for (int i = 0; i < columns.size(); i++) { HiveColumnHandle column = columns.get(i); names[i] = column.getName(); types[i] = column.getType(); hiveTypes[i] = column.getHiveType(); if (!column.isPartitionKey()) { StructField field = rowInspector.getStructFieldRef(column.getName()); structFields[i] = field; fieldInspectors[i] = field.getFieldObjectInspector(); } isPartitionColumn[i] = column.isPartitionKey(); } // parse requested partition columns Map<String, HivePartitionKey> partitionKeysByName = uniqueIndex(partitionKeys, HivePartitionKey.nameGetter()); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); if (column.isPartitionKey()) { HivePartitionKey partitionKey = partitionKeysByName.get(column.getName()); checkArgument(partitionKey != null, "Unknown partition key %s", column.getName()); byte[] bytes = partitionKey.getValue().getBytes(Charsets.UTF_8); Type type = types[columnIndex]; if (BOOLEAN.equals(type)) { if (isTrue(bytes, 0, bytes.length)) { booleans[columnIndex] = true; } else if (isFalse(bytes, 0, bytes.length)) { booleans[columnIndex] = false; } else { String valueString = new String(bytes, Charsets.UTF_8); throw new IllegalArgumentException( String.format( "Invalid partition value '%s' for BOOLEAN partition key %s", valueString, names[columnIndex])); } } else if (BIGINT.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for BIGINT partition key %s", names[columnIndex])); } longs[columnIndex] = parseLong(bytes, 0, bytes.length); } else if (DOUBLE.equals(type)) { if (bytes.length == 0) { throw new IllegalArgumentException( String.format( "Invalid partition value '' for DOUBLE partition key %s", names[columnIndex])); } doubles[columnIndex] = parseDouble(bytes, 0, bytes.length); } else if (VARCHAR.equals(type)) { slices[columnIndex] = Slices.wrappedBuffer(Arrays.copyOf(bytes, bytes.length)); } else { throw new UnsupportedOperationException("Unsupported column type: " + type); } } } }