Exemplo n.º 1
0
  @Override
  public ConnectorSplitSource getPartitionSplits(
      ConnectorSession session,
      ConnectorTableHandle tableHandle,
      List<ConnectorPartition> partitions) {
    checkNotNull(tableHandle, "tableHandle is null");
    CassandraTableHandle cassandraTableHandle =
        checkType(tableHandle, CassandraTableHandle.class, "tableHandle");

    checkNotNull(partitions, "partitions is null");
    if (partitions.isEmpty()) {
      return new FixedSplitSource(connectorId, ImmutableList.<ConnectorSplit>of());
    }

    // if this is an unpartitioned table, split into equal ranges
    if (partitions.size() == 1) {
      ConnectorPartition partition = partitions.get(0);
      CassandraPartition cassandraPartition =
          checkType(partition, CassandraPartition.class, "partition");

      if (cassandraPartition.isUnpartitioned()
          || cassandraPartition.isIndexedColumnPredicatePushdown()) {
        CassandraTable table = schemaProvider.getTable(cassandraTableHandle);
        List<ConnectorSplit> splits =
            getSplitsByTokenRange(table, cassandraPartition.getPartitionId());
        return new FixedSplitSource(connectorId, splits);
      }
    }

    return new FixedSplitSource(
        connectorId, getSplitsForPartitions(cassandraTableHandle, partitions));
  }
Exemplo n.º 2
0
  @Override
  public ConnectorPartitionResult getPartitions(
      ConnectorSession session,
      ConnectorTableHandle tableHandle,
      TupleDomain<ColumnHandle> tupleDomain) {
    CassandraTableHandle cassandraTableHandle =
        checkType(tableHandle, CassandraTableHandle.class, "tableHandle");
    checkNotNull(tupleDomain, "tupleDomain is null");
    CassandraTable table = schemaProvider.getTable(cassandraTableHandle);
    List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns();

    // fetch the partitions
    List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain);
    log.debug(
        "%s.%s #partitions: %d",
        cassandraTableHandle.getSchemaName(),
        cassandraTableHandle.getTableName(),
        allPartitions.size());

    // do a final pass to filter based on fields that could not be used to build the prefix
    List<ConnectorPartition> partitions =
        allPartitions
            .stream()
            .filter(partition -> tupleDomain.overlaps(partition.getTupleDomain()))
            .collect(toList());

    // All partition key domains will be fully evaluated, so we don't need to include those
    TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none();
    if (!tupleDomain.isNone()) {
      if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) {
        remainingTupleDomain = tupleDomain;
      } else {
        @SuppressWarnings({"rawtypes", "unchecked"})
        List<ColumnHandle> partitionColumns = (List) partitionKeys;
        remainingTupleDomain =
            TupleDomain.withColumnDomains(
                Maps.filterKeys(tupleDomain.getDomains(), not(in(partitionColumns))));
      }
    }

    // push down indexed column fixed value predicates only for unpartitioned partition which uses
    // token range query
    if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) {
      Map<ColumnHandle, Domain> domains = tupleDomain.getDomains();
      List<ColumnHandle> indexedColumns = new ArrayList<>();
      // compose partitionId by using indexed column
      StringBuilder sb = new StringBuilder();
      for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) {
        CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey();
        Domain domain = entry.getValue();
        if (column.isIndexed() && domain.isSingleValue()) {
          sb.append(CassandraCqlUtils.validColumnName(column.getName()))
              .append(" = ")
              .append(
                  CassandraCqlUtils.cqlValue(
                      toCQLCompatibleString(entry.getValue().getSingleValue()),
                      column.getCassandraType()));
          indexedColumns.add(column);
          // Only one indexed column predicate can be pushed down.
          break;
        }
      }
      if (sb.length() > 0) {
        CassandraPartition partition = (CassandraPartition) partitions.get(0);
        TupleDomain<ColumnHandle> filterIndexedColumn =
            TupleDomain.withColumnDomains(
                Maps.filterKeys(remainingTupleDomain.getDomains(), not(in(indexedColumns))));
        partitions = new ArrayList<>();
        partitions.add(
            new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true));
        return new ConnectorPartitionResult(partitions, filterIndexedColumn);
      }
    }
    return new ConnectorPartitionResult(partitions, remainingTupleDomain);
  }
Exemplo n.º 3
0
  private List<ConnectorSplit> getSplitsForPartitions(
      CassandraTableHandle cassTableHandle, List<ConnectorPartition> partitions) {
    String schema = cassTableHandle.getSchemaName();
    String table = cassTableHandle.getTableName();
    HostAddressFactory hostAddressFactory = new HostAddressFactory();
    ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder();

    // For single partition key column table, we can merge multiple partitions into a single split
    // by using IN CLAUSE in a single select query if the partitions have the same host list.
    // For multiple partition key columns table, we can't merge them into a single select query, so
    // keep them in a separate split.
    boolean singlePartitionKeyColumn = true;
    String partitionKeyColumnName = null;
    if (!partitions.isEmpty()) {
      singlePartitionKeyColumn =
          partitions.get(0).getTupleDomain().getNullableColumnDomains().size() == 1;
      if (singlePartitionKeyColumn) {
        String partitionId = partitions.get(0).getPartitionId();
        partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1);
      }
    }
    Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>();
    Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>();

    for (ConnectorPartition partition : partitions) {
      CassandraPartition cassandraPartition =
          checkType(partition, CassandraPartition.class, "partition");
      Set<Host> hosts =
          cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer());
      List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts);
      if (singlePartitionKeyColumn) {
        // host ip addresses
        ImmutableSet.Builder<String> sb = ImmutableSet.builder();
        for (HostAddress address : addresses) {
          sb.add(address.getHostText());
        }
        Set<String> hostAddresses = sb.build();
        // partition key values
        Set<String> values = hostsToPartitionKeys.get(hostAddresses);
        if (values == null) {
          values = new HashSet<>();
        }
        String partitionId = cassandraPartition.getPartitionId();
        values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2));
        hostsToPartitionKeys.put(hostAddresses, values);
        hostMap.put(hostAddresses, addresses);
      } else {
        CassandraSplit split =
            new CassandraSplit(
                connectorId, schema, table, cassandraPartition.getPartitionId(), null, addresses);
        builder.add(split);
      }
    }
    if (singlePartitionKeyColumn) {
      for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) {
        StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect);
        int size = 0;
        for (String value : entry.getValue()) {
          if (size > 0) {
            sb.append(",");
          }
          sb.append(value);
          size++;
          if (size > partitionSizeForBatchSelect) {
            String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString());
            CassandraSplit split =
                new CassandraSplit(
                    connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey()));
            builder.add(split);
            size = 0;
            sb.setLength(0);
            sb.trimToSize();
          }
        }
        if (size > 0) {
          String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString());
          CassandraSplit split =
              new CassandraSplit(
                  connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey()));
          builder.add(split);
        }
      }
    }
    return builder.build();
  }