@Test
  public void testBucketedTableDoubleFloat() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableBucketedDoubleFloat);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    assertTableIsBucketed(tableHandle);

    ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings =
        ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder()
            .put(columnHandles.get(columnIndex.get("t_float")), 406.1000061035156)
            .put(columnHandles.get(columnIndex.get("t_double")), 407.2)
            .build();

    // floats and doubles are not supported, so we should see all splits
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings));
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 32);

    int count = 0;
    for (ConnectorSplit split : splits) {
      try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
        while (cursor.advanceNextPosition()) {
          count++;
        }
      }
    }
    assertEquals(count, 300);
  }
  private List<CassandraPartition> getCassandraPartitions(
      CassandraTable table, TupleDomain<ColumnHandle> tupleDomain) {
    if (tupleDomain.isNone()) {
      return ImmutableList.of();
    }

    Set<List<Comparable<?>>> partitionKeysSet = getPartitionKeysSet(table, tupleDomain);

    // empty filter means, all partitions
    if (partitionKeysSet.isEmpty()) {
      return schemaProvider.getAllPartitions(table);
    }

    ImmutableList.Builder<ListenableFuture<List<CassandraPartition>>> getPartitionResults =
        ImmutableList.builder();
    for (List<Comparable<?>> partitionKeys : partitionKeysSet) {
      getPartitionResults.add(
          executor.submit(() -> schemaProvider.getPartitions(table, partitionKeys)));
    }

    ImmutableList.Builder<CassandraPartition> partitions = ImmutableList.builder();
    for (ListenableFuture<List<CassandraPartition>> result : getPartitionResults.build()) {
      try {
        partitions.addAll(result.get());
      } catch (InterruptedException e) {
        Thread.currentThread().interrupt();
        throw Throwables.propagate(e);
      } catch (ExecutionException e) {
        throw new PrestoException(EXTERNAL, "Error fetching cassandra partitions", e);
      }
    }

    return partitions.build();
  }
 @Test
 public void testGetPartitionNames() throws Exception {
   ConnectorTableHandle tableHandle = getTableHandle(table);
   ConnectorPartitionResult partitionResult =
       splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
   assertExpectedPartitions(partitionResult.getPartitions());
 }
  @Test
  public void testGetPartitionSplitsTableOfflinePartition() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableOfflinePartition);
    assertNotNull(tableHandle);

    ConnectorColumnHandle dsColumn = metadata.getColumnHandle(tableHandle, "ds");
    assertNotNull(dsColumn);

    Domain domain = Domain.singleValue(utf8Slice("2012-12-30"));
    TupleDomain<ConnectorColumnHandle> tupleDomain =
        TupleDomain.withColumnDomains(ImmutableMap.of(dsColumn, domain));
    ConnectorPartitionResult partitionResult = splitManager.getPartitions(tableHandle, tupleDomain);
    for (ConnectorPartition partition : partitionResult.getPartitions()) {
      if (domain.equals(partition.getTupleDomain().getDomains().get(dsColumn))) {
        try {
          getSplitCount(splitManager.getPartitionSplits(tableHandle, ImmutableList.of(partition)));
          fail("Expected PartitionOfflineException");
        } catch (PartitionOfflineException e) {
          assertEquals(e.getTableName(), tableOfflinePartition);
          assertEquals(e.getPartition(), "ds=2012-12-30");
        }
      } else {
        getSplitCount(splitManager.getPartitionSplits(tableHandle, ImmutableList.of(partition)));
      }
    }
  }
  private static Set<List<Comparable<?>>> getPartitionKeysSet(
      CassandraTable table, TupleDomain<ColumnHandle> tupleDomain) {
    ImmutableList.Builder<Set<Comparable<?>>> partitionColumnValues = ImmutableList.builder();
    for (CassandraColumnHandle columnHandle : table.getPartitionKeyColumns()) {
      Domain domain = tupleDomain.getDomains().get(columnHandle);

      // if there is no constraint on a partition key, return an empty set
      if (domain == null) {
        return ImmutableSet.of();
      }

      // todo does cassandra allow null partition keys?
      if (domain.isNullAllowed()) {
        return ImmutableSet.of();
      }

      ImmutableSet.Builder<Comparable<?>> columnValues = ImmutableSet.builder();
      for (Range range : domain.getRanges()) {
        // if the range is not a single value, we can not perform partition pruning
        if (!range.isSingleValue()) {
          return ImmutableSet.of();
        }
        Comparable<?> value = range.getSingleValue();

        CassandraType valueType = columnHandle.getCassandraType();
        columnValues.add(valueType.getValueForPartitionKey(value));
      }
      partitionColumnValues.add(columnValues.build());
    }
    return Sets.cartesianProduct(partitionColumnValues.build());
  }
 private static TupleDomain spanTupleDomain(TupleDomain tupleDomain) {
   if (tupleDomain.isNone()) {
     return tupleDomain;
   }
   Map<ColumnHandle, Domain> spannedDomains =
       Maps.transformValues(
           tupleDomain.getDomains(),
           new Function<Domain, Domain>() {
             @Override
             public Domain apply(Domain domain) {
               // Retain nullability, but collapse each SortedRangeSet into a single span
               return Domain.create(
                   getSortedRangeSpan(domain.getRanges()), domain.isNullAllowed());
             }
           });
   return TupleDomain.withColumnDomains(spannedDomains);
 }
 @Test
 public void testGetPartitionNamesUnpartitioned() throws Exception {
   ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned);
   ConnectorPartitionResult partitionResult =
       splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
   assertEquals(partitionResult.getPartitions().size(), 1);
   assertEquals(partitionResult.getPartitions(), unpartitionedPartitions);
 }
 @Override
 public ConnectorPartition fromPartitionDto(PartitionDto partitionDto) {
   return new ConnectorPartitionDetailImpl(
       partitionDto.getName().getPartitionName(),
       TupleDomain.none(),
       fromStorageDto(partitionDto.getSerde()),
       partitionDto.getMetadata(),
       fromAuditDto(partitionDto.getAudit()));
 }
  @Test
  public void testGetPartitionSplitsBatchUnpartitioned() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned);
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    ConnectorSplitSource splitSource =
        splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions());

    assertEquals(getSplitCount(splitSource), 1);
  }
 @Test
 public void testGetPartitionsWithBindings() throws Exception {
   ConnectorTableHandle tableHandle = getTableHandle(table);
   ConnectorPartitionResult partitionResult =
       splitManager.getPartitions(
           tableHandle,
           TupleDomain.withColumnDomains(
               ImmutableMap.<ConnectorColumnHandle, Domain>of(intColumn, Domain.singleValue(5L))));
   assertExpectedPartitions(partitionResult.getPartitions());
 }
 @Test
 public void testGetPartitionTableOffline() throws Exception {
   ConnectorTableHandle tableHandle = getTableHandle(tableOffline);
   try {
     splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
     fail("expected TableOfflineException");
   } catch (TableOfflineException e) {
     assertEquals(e.getTableName(), tableOffline);
   }
 }
  private void assertTableIsBucketed(ConnectorTableHandle tableHandle) throws Exception {
    // the bucketed test tables should have exactly 32 splits
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 32);

    // verify all paths are unique
    Set<String> paths = new HashSet<>();
    for (ConnectorSplit split : splits) {
      assertTrue(paths.add(((HiveSplit) split).getPath()));
    }
  }
 @Test(
     expectedExceptions = RuntimeException.class,
     expectedExceptionsMessageRegExp = ".*" + INVALID_COLUMN + ".*")
 public void testGetRecordsInvalidColumn() throws Exception {
   ConnectorTableHandle table = getTableHandle(tableUnpartitioned);
   ConnectorPartitionResult partitionResult =
       splitManager.getPartitions(table, TupleDomain.<ConnectorColumnHandle>all());
   ConnectorSplit split =
       Iterables.getFirst(
           getAllSplits(splitManager.getPartitionSplits(table, partitionResult.getPartitions())),
           null);
   RecordSet recordSet =
       recordSetProvider.getRecordSet(split, ImmutableList.of(invalidColumnHandle));
   recordSet.cursor();
 }
Beispiel #14
0
  @Override
  protected RelationPlan visitTable(Table node, Void context) {
    Query namedQuery = analysis.getNamedQuery(node);
    if (namedQuery != null) {
      RelationPlan subPlan = process(namedQuery, null);
      return new RelationPlan(
          subPlan.getRoot(),
          analysis.getOutputDescriptor(node),
          subPlan.getOutputSymbols(),
          subPlan.getSampleWeight());
    }

    TupleDescriptor descriptor = analysis.getOutputDescriptor(node);
    TableHandle handle = analysis.getTableHandle(node);

    ImmutableList.Builder<Symbol> outputSymbolsBuilder = ImmutableList.builder();
    ImmutableMap.Builder<Symbol, ColumnHandle> columns = ImmutableMap.builder();
    for (Field field : descriptor.getAllFields()) {
      Symbol symbol = symbolAllocator.newSymbol(field.getName().get(), field.getType());

      outputSymbolsBuilder.add(symbol);
      columns.put(symbol, analysis.getColumn(field));
    }

    List<Symbol> planOutputSymbols = outputSymbolsBuilder.build();
    Optional<ColumnHandle> sampleWeightColumn =
        metadata.getSampleWeightColumnHandle(session, handle);
    Symbol sampleWeightSymbol = null;
    if (sampleWeightColumn.isPresent()) {
      sampleWeightSymbol = symbolAllocator.newSymbol("$sampleWeight", BIGINT);
      outputSymbolsBuilder.add(sampleWeightSymbol);
      columns.put(sampleWeightSymbol, sampleWeightColumn.get());
    }

    List<Symbol> nodeOutputSymbols = outputSymbolsBuilder.build();
    PlanNode root =
        new TableScanNode(
            idAllocator.getNextId(),
            handle,
            nodeOutputSymbols,
            columns.build(),
            Optional.empty(),
            TupleDomain.all(),
            null);
    return new RelationPlan(
        root, descriptor, planOutputSymbols, Optional.ofNullable(sampleWeightSymbol));
  }
  @Test
  public void testBucketedTableBigintBoolean() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableBucketedBigintBoolean);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    assertTableIsBucketed(tableHandle);

    String testString = "textfile test";
    // This needs to match one of the rows where t_string is not empty or null, and where t_bigint
    // is not null
    // (i.e. (testBigint - 604) % 19 > 1 and (testBigint - 604) % 13 != 0)
    Long testBigint = 608L;
    Boolean testBoolean = true;

    ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings =
        ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder()
            .put(columnHandles.get(columnIndex.get("t_string")), utf8Slice(testString))
            .put(columnHandles.get(columnIndex.get("t_bigint")), testBigint)
            .put(columnHandles.get(columnIndex.get("t_boolean")), testBoolean)
            .build();

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings));
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    boolean rowFound = false;
    try (RecordCursor cursor =
        recordSetProvider.getRecordSet(splits.get(0), columnHandles).cursor()) {
      while (cursor.advanceNextPosition()) {
        if (testString.equals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8())
            && testBigint == cursor.getLong(columnIndex.get("t_bigint"))
            && testBoolean == cursor.getBoolean(columnIndex.get("t_boolean"))) {
          rowFound = true;
          break;
        }
      }
      assertTrue(rowFound);
    }
  }
  @Test
  public void testGetRecordsUnpartitioned() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableUnpartitioned);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      assertEquals(hiveSplit.getPartitionKeys(), ImmutableList.of());

      long rowNumber = 0;
      try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
        assertRecordCursorType(cursor, "textfile");
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "unpartitioned");
          }

          assertEquals(cursor.getLong(columnIndex.get("t_tinyint")), 1 + rowNumber);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
  // TODO: get the right partitions right here
  @Override
  public ConnectorPartitionResult getPartitions(
      ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) {
    checkArgument(
        tableHandle instanceof RiakTableHandle,
        "tableHandle is not an instance of RiakTableHandle");
    RiakTableHandle riakTableHandle = (RiakTableHandle) tableHandle;

    log.info("==========================tupleDomain=============================");
    log.info(tupleDomain.toString());

    try {
      String parentTable = PRSubTable.parentTableName(riakTableHandle.getTableName());
      SchemaTableName parentSchemaTable =
          new SchemaTableName(riakTableHandle.getSchemaName(), parentTable);
      PRTable table = riakClient.getTable(parentSchemaTable);
      List<String> indexedColumns = new LinkedList<String>();
      for (RiakColumn riakColumn : table.getColumns()) {
        if (riakColumn.getIndex()) {
          indexedColumns.add(riakColumn.getName());
        }
      }

      // Riak connector has only one partition
      List<ConnectorPartition> partitions =
          ImmutableList.<ConnectorPartition>of(
              new RiakPartition(
                  riakTableHandle.getSchemaName(),
                  riakTableHandle.getTableName(),
                  tupleDomain,
                  indexedColumns));

      // Riak connector does not do any additional processing/filtering with the TupleDomain, so
      // just return the whole TupleDomain
      return new ConnectorPartitionResult(partitions, tupleDomain);
    } catch (Exception e) {
      log.error("interrupted: %s", e.toString());
      throw new TableNotFoundException(riakTableHandle.toSchemaTableName());
    }
  }
  @Test
  public void testBucketedTableStringInt() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(tableBucketedStringInt);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    assertTableIsBucketed(tableHandle);

    String testString = "sequencefile test";
    Long testInt = 413L;
    Long testSmallint = 412L;

    // Reverse the order of bindings as compared to bucketing order
    ImmutableMap<ConnectorColumnHandle, Comparable<?>> bindings =
        ImmutableMap.<ConnectorColumnHandle, Comparable<?>>builder()
            .put(columnHandles.get(columnIndex.get("t_int")), testInt)
            .put(columnHandles.get(columnIndex.get("t_string")), utf8Slice(testString))
            .put(columnHandles.get(columnIndex.get("t_smallint")), testSmallint)
            .build();

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.withFixedValues(bindings));
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), 1);

    boolean rowFound = false;
    try (RecordCursor cursor =
        recordSetProvider.getRecordSet(splits.get(0), columnHandles).cursor()) {
      while (cursor.advanceNextPosition()) {
        if (testString.equals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8())
            && testInt == cursor.getLong(columnIndex.get("t_int"))
            && testSmallint == cursor.getLong(columnIndex.get("t_smallint"))) {
          rowFound = true;
        }
      }
      assertTrue(rowFound);
    }
  }
  @Test
  public void testGetPartialRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);

      long rowNumber = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        while (cursor.advanceNextPosition()) {
          rowNumber++;

          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);
          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);
        }
      }
      assertEquals(rowNumber, 100);
    }
  }
  private InternalTable buildPartitions(
      Session session, String catalogName, Map<String, SerializableNativeValue> filters) {
    QualifiedTableName tableName = extractQualifiedTableName(catalogName, filters);

    InternalTable.Builder table =
        InternalTable.builder(informationSchemaTableColumns(TABLE_INTERNAL_PARTITIONS));

    Optional<TableHandle> tableHandle = metadata.getTableHandle(session, tableName);
    checkArgument(tableHandle.isPresent(), "Table %s does not exist", tableName);
    Map<ColumnHandle, String> columnHandles =
        ImmutableBiMap.copyOf(metadata.getColumnHandles(session, tableHandle.get())).inverse();

    List<TableLayoutResult> layouts =
        metadata.getLayouts(
            session, tableHandle.get(), Constraint.<ColumnHandle>alwaysTrue(), Optional.empty());

    if (layouts.size() == 1) {
      TableLayout layout = Iterables.getOnlyElement(layouts).getLayout();

      layout
          .getDiscretePredicates()
          .ifPresent(
              domains -> {
                int partitionNumber = 1;
                for (TupleDomain<ColumnHandle> domain : domains) {
                  for (Entry<ColumnHandle, SerializableNativeValue> entry :
                      domain.extractNullableFixedValues().entrySet()) {
                    ColumnHandle columnHandle = entry.getKey();
                    String columnName = columnHandles.get(columnHandle);
                    String value = null;
                    if (entry.getValue().getValue() != null) {
                      ColumnMetadata columnMetadata =
                          metadata.getColumnMetadata(session, tableHandle.get(), columnHandle);
                      try {
                        FunctionInfo operator =
                            metadata
                                .getFunctionRegistry()
                                .getCoercion(columnMetadata.getType(), VARCHAR);
                        value =
                            ((Slice)
                                    operator
                                        .getMethodHandle()
                                        .invokeWithArguments(entry.getValue().getValue()))
                                .toStringUtf8();
                      } catch (OperatorNotFoundException e) {
                        value = "<UNREPRESENTABLE VALUE>";
                      } catch (Throwable throwable) {
                        throw Throwables.propagate(throwable);
                      }
                    }
                    table.add(
                        catalogName,
                        tableName.getSchemaName(),
                        tableName.getTableName(),
                        partitionNumber,
                        columnName,
                        value);
                  }
                  partitionNumber++;
                }
              });
    }
    return table.build();
  }
 @Test(expectedExceptions = TableNotFoundException.class)
 public void testGetPartitionNamesException() throws Exception {
   splitManager.getPartitions(invalidTableHandle, TupleDomain.<ConnectorColumnHandle>all());
 }
  @BeforeMethod
  public void setUp() throws Exception {
    DualMetadata dualMetadata = new DualMetadata();
    TableHandle tableHandle =
        dualMetadata.getTableHandle(new SchemaTableName("default", DualMetadata.NAME));
    assertNotNull(tableHandle, "tableHandle is null");

    ColumnHandle columnHandle = dualMetadata.getColumnHandle(tableHandle, DualMetadata.COLUMN_NAME);
    assertNotNull(columnHandle, "columnHandle is null");
    Symbol symbol = new Symbol(DualMetadata.COLUMN_NAME);

    MetadataManager metadata = new MetadataManager(new FeaturesConfig());
    metadata.addInternalSchemaMetadata(MetadataManager.INTERNAL_CONNECTOR_ID, dualMetadata);

    DualSplitManager dualSplitManager = new DualSplitManager(new InMemoryNodeManager());
    PartitionResult partitionResult =
        dualSplitManager.getPartitions(tableHandle, TupleDomain.all());

    SplitSource splitSource =
        dualSplitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions());
    split = Iterables.getOnlyElement(splitSource.getNextBatch(1));
    assertTrue(splitSource.isFinished());

    LocalExecutionPlanner planner =
        new LocalExecutionPlanner(
            new NodeInfo("test"),
            metadata,
            new DataStreamManager(new DualDataStreamProvider()),
            new MockLocalStorageManager(new File("target/temp")),
            new RecordSinkManager(),
            new MockExchangeClientSupplier(),
            new ExpressionCompiler(metadata));

    taskExecutor = new TaskExecutor(8);
    taskExecutor.start();

    tableScanNodeId = new PlanNodeId("tableScan");
    PlanFragment testFragment =
        new PlanFragment(
            new PlanFragmentId("fragment"),
            new TableScanNode(
                tableScanNodeId,
                tableHandle,
                ImmutableList.of(symbol),
                ImmutableMap.of(symbol, columnHandle),
                null,
                Optional.<GeneratedPartitions>absent()),
            ImmutableMap.of(symbol, Type.VARCHAR),
            PlanDistribution.SOURCE,
            tableScanNodeId,
            OutputPartitioning.NONE,
            ImmutableList.<Symbol>of());

    TaskId taskId = new TaskId("query", "stage", "task");
    Session session = new Session("user", "test", "default", "default", "test", "test");

    taskNotificationExecutor = Executors.newCachedThreadPool(threadsNamed("task-notification-%d"));

    outputBuffers = OutputBuffers.INITIAL_EMPTY_OUTPUT_BUFFERS;

    taskExecution =
        SqlTaskExecution.createSqlTaskExecution(
            session,
            taskId,
            URI.create("fake://task/" + taskId),
            testFragment,
            ImmutableList.<TaskSource>of(),
            outputBuffers,
            planner,
            new DataSize(32, Unit.MEGABYTE),
            taskExecutor,
            taskNotificationExecutor,
            new DataSize(256, Unit.MEGABYTE),
            new DataSize(8, Unit.MEGABYTE),
            new QueryMonitor(
                new ObjectMapperProvider().get(), new NullEventClient(), new NodeInfo("test")),
            false);
  }
  private void doCreateTable() throws InterruptedException {
    // begin creating the table
    List<ColumnMetadata> columns =
        ImmutableList.<ColumnMetadata>builder()
            .add(new ColumnMetadata("id", BIGINT, 1, false))
            .add(new ColumnMetadata("t_string", VARCHAR, 2, false))
            .add(new ColumnMetadata("t_bigint", BIGINT, 3, false))
            .add(new ColumnMetadata("t_double", DOUBLE, 4, false))
            .add(new ColumnMetadata("t_boolean", BOOLEAN, 5, false))
            .build();

    ConnectorTableMetadata tableMetadata =
        new ConnectorTableMetadata(temporaryCreateTable, columns, tableOwner);
    ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata);

    // write the records
    RecordSink sink = recordSinkProvider.getRecordSink(outputHandle);

    sink.beginRecord(1);
    sink.appendLong(1);
    sink.appendString("hello".getBytes(UTF_8));
    sink.appendLong(123);
    sink.appendDouble(43.5);
    sink.appendBoolean(true);
    sink.finishRecord();

    sink.beginRecord(1);
    sink.appendLong(2);
    sink.appendNull();
    sink.appendNull();
    sink.appendNull();
    sink.appendNull();
    sink.finishRecord();

    sink.beginRecord(1);
    sink.appendLong(3);
    sink.appendString("bye".getBytes(UTF_8));
    sink.appendLong(456);
    sink.appendDouble(98.1);
    sink.appendBoolean(false);
    sink.finishRecord();

    String fragment = sink.commit();

    // commit the table
    metadata.commitCreateTable(outputHandle, ImmutableList.of(fragment));

    // load the new table
    ConnectorTableHandle tableHandle = getTableHandle(temporaryCreateTable);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());

    // verify the metadata
    tableMetadata = metadata.getTableMetadata(getTableHandle(temporaryCreateTable));
    assertEquals(tableMetadata.getOwner(), tableOwner);

    Map<String, ColumnMetadata> columnMap =
        uniqueIndex(tableMetadata.getColumns(), columnNameGetter());

    assertPrimitiveField(columnMap, 0, "id", BIGINT, false);
    assertPrimitiveField(columnMap, 1, "t_string", VARCHAR, false);
    assertPrimitiveField(columnMap, 2, "t_bigint", BIGINT, false);
    assertPrimitiveField(columnMap, 3, "t_double", DOUBLE, false);
    assertPrimitiveField(columnMap, 4, "t_boolean", BOOLEAN, false);

    // verify the data
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    assertEquals(partitionResult.getPartitions().size(), 1);
    ConnectorSplitSource splitSource =
        splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions());
    ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));

    try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
      assertRecordCursorType(cursor, "rcfile-binary");

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 1);
      assertEquals(cursor.getSlice(1).toStringUtf8(), "hello");
      assertEquals(cursor.getLong(2), 123);
      assertEquals(cursor.getDouble(3), 43.5);
      assertEquals(cursor.getBoolean(4), true);

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 2);
      assertTrue(cursor.isNull(1));
      assertTrue(cursor.isNull(2));
      assertTrue(cursor.isNull(3));
      assertTrue(cursor.isNull(4));

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 3);
      assertEquals(cursor.getSlice(1).toStringUtf8(), "bye");
      assertEquals(cursor.getLong(2), 456);
      assertEquals(cursor.getDouble(3), 98.1);
      assertEquals(cursor.getBoolean(4), false);

      assertFalse(cursor.advanceNextPosition());
    }
  }
Beispiel #24
0
    @Override
    public PlanNode rewriteTableScan(
        TableScanNode node, Expression inheritedPredicate, PlanRewriter<Expression> planRewriter) {
      DomainTranslator.ExtractionResult extractionResult =
          DomainTranslator.fromPredicate(
              inheritedPredicate, symbolAllocator.getTypes(), node.getAssignments());
      Expression extractionRemainingExpression = extractionResult.getRemainingExpression();
      TupleDomain tupleDomain = extractionResult.getTupleDomain();

      if (node.getGeneratedPartitions().isPresent()) {
        // Add back in the TupleDomain that was used to generate the previous set of Partitions if
        // present
        // And just for kicks, throw in the domain summary too (as that can only help prune down the
        // ranges)
        // The domains should never widen between each pass.
        tupleDomain =
            tupleDomain
                .intersect(node.getGeneratedPartitions().get().getTupleDomainInput())
                .intersect(node.getPartitionsDomainSummary());
      }

      PartitionResult matchingPartitions =
          splitManager.getPartitions(node.getTable(), Optional.of(tupleDomain));
      List<Partition> partitions = matchingPartitions.getPartitions();
      TupleDomain undeterminedTupleDomain = matchingPartitions.getUndeterminedTupleDomain();

      Expression unevaluatedDomainPredicate =
          DomainTranslator.toPredicate(
              undeterminedTupleDomain, ImmutableBiMap.copyOf(node.getAssignments()).inverse());

      // Construct the post scan predicate. Add the unevaluated TupleDomain back first since those
      // are generally cheaper to evaluate than anything we can't extract
      Expression postScanPredicate =
          combineConjuncts(unevaluatedDomainPredicate, extractionRemainingExpression);

      // Do some early partition pruning
      partitions =
          ImmutableList.copyOf(
              filter(
                  partitions, not(shouldPrunePartition(postScanPredicate, node.getAssignments()))));
      GeneratedPartitions generatedPartitions = new GeneratedPartitions(tupleDomain, partitions);

      PlanNode output = node;
      if (!node.getGeneratedPartitions().equals(Optional.of(generatedPartitions))) {
        // Only overwrite the originalConstraint if it was previously null
        Expression originalConstraint =
            node.getOriginalConstraint() == null
                ? inheritedPredicate
                : node.getOriginalConstraint();
        output =
            new TableScanNode(
                node.getId(),
                node.getTable(),
                node.getOutputSymbols(),
                node.getAssignments(),
                originalConstraint,
                Optional.of(generatedPartitions));
      }
      if (!postScanPredicate.equals(BooleanLiteral.TRUE_LITERAL)) {
        output = new FilterNode(idAllocator.getNextId(), output, postScanPredicate);
      }
      return output;
    }
  private void doCreateSampledTable() throws InterruptedException {
    // begin creating the table
    List<ColumnMetadata> columns =
        ImmutableList.<ColumnMetadata>builder()
            .add(new ColumnMetadata("sales", BIGINT, 1, false))
            .build();

    ConnectorTableMetadata tableMetadata =
        new ConnectorTableMetadata(temporaryCreateSampledTable, columns, tableOwner, true);
    ConnectorOutputTableHandle outputHandle = metadata.beginCreateTable(SESSION, tableMetadata);

    // write the records
    RecordSink sink = recordSinkProvider.getRecordSink(outputHandle);

    sink.beginRecord(8);
    sink.appendLong(2);
    sink.finishRecord();

    sink.beginRecord(5);
    sink.appendLong(3);
    sink.finishRecord();

    sink.beginRecord(7);
    sink.appendLong(4);
    sink.finishRecord();

    String fragment = sink.commit();

    // commit the table
    metadata.commitCreateTable(outputHandle, ImmutableList.of(fragment));

    // load the new table
    ConnectorTableHandle tableHandle = getTableHandle(temporaryCreateSampledTable);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.<ConnectorColumnHandle>builder()
            .addAll(metadata.getColumnHandles(tableHandle).values())
            .add(metadata.getSampleWeightColumnHandle(tableHandle))
            .build();
    assertEquals(columnHandles.size(), 2);

    // verify the metadata
    tableMetadata = metadata.getTableMetadata(getTableHandle(temporaryCreateSampledTable));
    assertEquals(tableMetadata.getOwner(), tableOwner);

    Map<String, ColumnMetadata> columnMap =
        uniqueIndex(tableMetadata.getColumns(), columnNameGetter());
    assertEquals(columnMap.size(), 1);

    assertPrimitiveField(columnMap, 0, "sales", BIGINT, false);

    // verify the data
    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    assertEquals(partitionResult.getPartitions().size(), 1);
    ConnectorSplitSource splitSource =
        splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions());
    ConnectorSplit split = getOnlyElement(getAllSplits(splitSource));

    try (RecordCursor cursor = recordSetProvider.getRecordSet(split, columnHandles).cursor()) {
      assertRecordCursorType(cursor, "rcfile-binary");

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 2);
      assertEquals(cursor.getLong(1), 8);

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 3);
      assertEquals(cursor.getLong(1), 5);

      assertTrue(cursor.advanceNextPosition());
      assertEquals(cursor.getLong(0), 4);
      assertEquals(cursor.getLong(1), 7);

      assertFalse(cursor.advanceNextPosition());
    }
  }
  @Test
  public void testGetRecords() throws Exception {
    ConnectorTableHandle tableHandle = getTableHandle(table);
    ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(tableHandle);
    List<ConnectorColumnHandle> columnHandles =
        ImmutableList.copyOf(metadata.getColumnHandles(tableHandle).values());
    Map<String, Integer> columnIndex = indexColumns(columnHandles);

    ConnectorPartitionResult partitionResult =
        splitManager.getPartitions(tableHandle, TupleDomain.<ConnectorColumnHandle>all());
    List<ConnectorSplit> splits =
        getAllSplits(splitManager.getPartitionSplits(tableHandle, partitionResult.getPartitions()));
    assertEquals(splits.size(), this.partitions.size());
    for (ConnectorSplit split : splits) {
      HiveSplit hiveSplit = (HiveSplit) split;

      List<HivePartitionKey> partitionKeys = hiveSplit.getPartitionKeys();
      String ds = partitionKeys.get(0).getValue();
      String fileType = partitionKeys.get(1).getValue();
      long dummy = Long.parseLong(partitionKeys.get(2).getValue());

      long baseValue = getBaseValueForFileType(fileType);
      assertEquals(dummy * 100, baseValue);

      long rowNumber = 0;
      long completedBytes = 0;
      try (RecordCursor cursor =
          recordSetProvider.getRecordSet(hiveSplit, columnHandles).cursor()) {
        assertRecordCursorType(cursor, fileType);
        assertEquals(cursor.getTotalBytes(), hiveSplit.getLength());

        while (cursor.advanceNextPosition()) {
          try {
            assertReadFields(cursor, tableMetadata.getColumns());
          } catch (RuntimeException e) {
            throw new RuntimeException("row " + rowNumber, e);
          }

          rowNumber++;

          if (rowNumber % 19 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_string")));
          } else if (rowNumber % 19 == 1) {
            assertEquals(cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), "");
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_string")).toStringUtf8(), (fileType + " test"));
          }

          assertEquals(
              cursor.getLong(columnIndex.get("t_tinyint")),
              (long) ((byte) (baseValue + 1 + rowNumber)));
          assertEquals(cursor.getLong(columnIndex.get("t_smallint")), baseValue + 2 + rowNumber);
          assertEquals(cursor.getLong(columnIndex.get("t_int")), baseValue + 3 + rowNumber);

          if (rowNumber % 13 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_bigint")));
          } else {
            assertEquals(cursor.getLong(columnIndex.get("t_bigint")), baseValue + 4 + rowNumber);
          }

          assertEquals(
              cursor.getDouble(columnIndex.get("t_float")), baseValue + 5.1 + rowNumber, 0.001);
          assertEquals(cursor.getDouble(columnIndex.get("t_double")), baseValue + 6.2 + rowNumber);

          if (rowNumber % 3 == 2) {
            assertTrue(cursor.isNull(columnIndex.get("t_boolean")));
          } else {
            assertEquals(cursor.getBoolean(columnIndex.get("t_boolean")), rowNumber % 3 != 0);
          }

          if (rowNumber % 17 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_timestamp")));
          } else {
            long millis = new DateTime(2011, 5, 6, 7, 8, 9, 123, timeZone).getMillis();
            assertEquals(
                cursor.getLong(columnIndex.get("t_timestamp")), millis, (fileType + " test"));
          }

          if (rowNumber % 23 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_binary")));
          } else {
            assertEquals(
                cursor.getSlice(columnIndex.get("t_binary")).toStringUtf8(), (fileType + " test"));
          }

          if (rowNumber % 29 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_map")));
          } else {
            String expectedJson = "{\"format\":\"" + fileType + "\"}";
            assertEquals(cursor.getSlice(columnIndex.get("t_map")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 27 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_array_string")));
          } else {
            String expectedJson = "[\"" + fileType + "\",\"test\",\"data\"]";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_array_string")).toStringUtf8(), expectedJson);
          }

          if (rowNumber % 31 == 0) {
            assertTrue(cursor.isNull(columnIndex.get("t_complex")));
          } else {
            String expectedJson =
                "{\"1\":[{\"s_string\":\""
                    + fileType
                    + "-a\",\"s_double\":0.1},{\"s_string\":\""
                    + fileType
                    + "-b\",\"s_double\":0.2}]}";
            assertEquals(
                cursor.getSlice(columnIndex.get("t_complex")).toStringUtf8(), expectedJson);
          }

          assertEquals(cursor.getSlice(columnIndex.get("ds")).toStringUtf8(), ds);
          assertEquals(cursor.getSlice(columnIndex.get("file_format")).toStringUtf8(), fileType);
          assertEquals(cursor.getLong(columnIndex.get("dummy")), dummy);

          long newCompletedBytes = cursor.getCompletedBytes();
          assertTrue(newCompletedBytes >= completedBytes);
          assertTrue(newCompletedBytes <= hiveSplit.getLength());
          completedBytes = newCompletedBytes;
        }
      }
      assertTrue(completedBytes <= hiveSplit.getLength());
      assertEquals(rowNumber, 100);
    }
  }
Beispiel #27
0
 @Override
 public TupleDomain getTupleDomain() {
   return TupleDomain.all();
 }
  @Override
  public ConnectorPartitionResult getPartitions(
      ConnectorSession session,
      ConnectorTableHandle tableHandle,
      TupleDomain<ColumnHandle> tupleDomain) {
    CassandraTableHandle cassandraTableHandle =
        checkType(tableHandle, CassandraTableHandle.class, "tableHandle");
    checkNotNull(tupleDomain, "tupleDomain is null");
    CassandraTable table = schemaProvider.getTable(cassandraTableHandle);
    List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns();

    // fetch the partitions
    List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain);
    log.debug(
        "%s.%s #partitions: %d",
        cassandraTableHandle.getSchemaName(),
        cassandraTableHandle.getTableName(),
        allPartitions.size());

    // do a final pass to filter based on fields that could not be used to build the prefix
    List<ConnectorPartition> partitions =
        allPartitions
            .stream()
            .filter(partition -> tupleDomain.overlaps(partition.getTupleDomain()))
            .collect(toList());

    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none();
    if (!tupleDomain.isNone()) {
      if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) {
        remainingTupleDomain = tupleDomain;
      } else {
        @SuppressWarnings({"rawtypes", "unchecked"})
        List<ColumnHandle> partitionColumns = (List) partitionKeys;
        remainingTupleDomain =
            TupleDomain.withColumnDomains(
                Maps.filterKeys(tupleDomain.getDomains(), not(in(partitionColumns))));
      }
    }

    // push down indexed column fixed value predicates only for unpartitioned partition which uses
    // token range query
    if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) {
      Map<ColumnHandle, Domain> domains = tupleDomain.getDomains();
      List<ColumnHandle> indexedColumns = new ArrayList<>();
      // compose partitionId by using indexed column
      StringBuilder sb = new StringBuilder();
      for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
        CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey();
        Domain domain = entry.getValue();
        if (column.isIndexed() && domain.isSingleValue()) {
          sb.append(CassandraCqlUtils.validColumnName(column.getName()))
              .append(" = ")
              .append(
                  CassandraCqlUtils.cqlValue(
                      toCQLCompatibleString(entry.getValue().getSingleValue()),
                      column.getCassandraType()));
          indexedColumns.add(column);
          // Only one indexed column predicate can be pushed down.
          break;
        }
      }
      if (sb.length() > 0) {
        CassandraPartition partition = (CassandraPartition) partitions.get(0);
        TupleDomain<ColumnHandle> filterIndexedColumn =
            TupleDomain.withColumnDomains(
                Maps.filterKeys(remainingTupleDomain.getDomains(), not(in(indexedColumns))));
        partitions = new ArrayList<>();
        partitions.add(
            new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true));
        return new ConnectorPartitionResult(partitions, filterIndexedColumn);
      }
    }
    return new ConnectorPartitionResult(partitions, remainingTupleDomain);
  }