@Override public ConnectorSplitSource getPartitionSplits( ConnectorSession session, ConnectorTableHandle tableHandle, List<ConnectorPartition> partitions) { checkNotNull(tableHandle, "tableHandle is null"); CassandraTableHandle cassandraTableHandle = checkType(tableHandle, CassandraTableHandle.class, "tableHandle"); checkNotNull(partitions, "partitions is null"); if (partitions.isEmpty()) { return new FixedSplitSource(connectorId, ImmutableList.<ConnectorSplit>of()); } // if this is an unpartitioned table, split into equal ranges if (partitions.size() == 1) { ConnectorPartition partition = partitions.get(0); CassandraPartition cassandraPartition = checkType(partition, CassandraPartition.class, "partition"); if (cassandraPartition.isUnpartitioned() || cassandraPartition.isIndexedColumnPredicatePushdown()) { CassandraTable table = schemaProvider.getTable(cassandraTableHandle); List<ConnectorSplit> splits = getSplitsByTokenRange(table, cassandraPartition.getPartitionId()); return new FixedSplitSource(connectorId, splits); } } return new FixedSplitSource( connectorId, getSplitsForPartitions(cassandraTableHandle, partitions)); }
@Override public ConnectorPartitionResult getPartitions( ConnectorSession session, ConnectorTableHandle tableHandle, TupleDomain<ColumnHandle> tupleDomain) { CassandraTableHandle cassandraTableHandle = checkType(tableHandle, CassandraTableHandle.class, "tableHandle"); checkNotNull(tupleDomain, "tupleDomain is null"); CassandraTable table = schemaProvider.getTable(cassandraTableHandle); List<CassandraColumnHandle> partitionKeys = table.getPartitionKeyColumns(); // fetch the partitions List<CassandraPartition> allPartitions = getCassandraPartitions(table, tupleDomain); log.debug( "%s.%s #partitions: %d", cassandraTableHandle.getSchemaName(), cassandraTableHandle.getTableName(), allPartitions.size()); // do a final pass to filter based on fields that could not be used to build the prefix List<ConnectorPartition> partitions = allPartitions .stream() .filter(partition -> tupleDomain.overlaps(partition.getTupleDomain())) .collect(toList()); // All partition key domains will be fully evaluated, so we don't need to include those TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.none(); if (!tupleDomain.isNone()) { if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) { remainingTupleDomain = tupleDomain; } else { @SuppressWarnings({"rawtypes", "unchecked"}) List<ColumnHandle> partitionColumns = (List) partitionKeys; remainingTupleDomain = TupleDomain.withColumnDomains( Maps.filterKeys(tupleDomain.getDomains(), not(in(partitionColumns)))); } } // push down indexed column fixed value predicates only for unpartitioned partition which uses // token range query if (partitions.size() == 1 && ((CassandraPartition) partitions.get(0)).isUnpartitioned()) { Map<ColumnHandle, Domain> domains = tupleDomain.getDomains(); List<ColumnHandle> indexedColumns = new ArrayList<>(); // compose partitionId by using indexed column StringBuilder sb = new StringBuilder(); for (Map.Entry<ColumnHandle, Domain> entry : domains.entrySet()) { CassandraColumnHandle column = (CassandraColumnHandle) entry.getKey(); Domain domain = entry.getValue(); if (column.isIndexed() && domain.isSingleValue()) { sb.append(CassandraCqlUtils.validColumnName(column.getName())) .append(" = ") .append( CassandraCqlUtils.cqlValue( toCQLCompatibleString(entry.getValue().getSingleValue()), column.getCassandraType())); indexedColumns.add(column); // Only one indexed column predicate can be pushed down. break; } } if (sb.length() > 0) { CassandraPartition partition = (CassandraPartition) partitions.get(0); TupleDomain<ColumnHandle> filterIndexedColumn = TupleDomain.withColumnDomains( Maps.filterKeys(remainingTupleDomain.getDomains(), not(in(indexedColumns)))); partitions = new ArrayList<>(); partitions.add( new CassandraPartition(partition.getKey(), sb.toString(), filterIndexedColumn, true)); return new ConnectorPartitionResult(partitions, filterIndexedColumn); } } return new ConnectorPartitionResult(partitions, remainingTupleDomain); }
private List<ConnectorSplit> getSplitsForPartitions( CassandraTableHandle cassTableHandle, List<ConnectorPartition> partitions) { String schema = cassTableHandle.getSchemaName(); String table = cassTableHandle.getTableName(); HostAddressFactory hostAddressFactory = new HostAddressFactory(); ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder(); // For single partition key column table, we can merge multiple partitions into a single split // by using IN CLAUSE in a single select query if the partitions have the same host list. // For multiple partition key columns table, we can't merge them into a single select query, so // keep them in a separate split. boolean singlePartitionKeyColumn = true; String partitionKeyColumnName = null; if (!partitions.isEmpty()) { singlePartitionKeyColumn = partitions.get(0).getTupleDomain().getNullableColumnDomains().size() == 1; if (singlePartitionKeyColumn) { String partitionId = partitions.get(0).getPartitionId(); partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1); } } Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>(); Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>(); for (ConnectorPartition partition : partitions) { CassandraPartition cassandraPartition = checkType(partition, CassandraPartition.class, "partition"); Set<Host> hosts = cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer()); List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts); if (singlePartitionKeyColumn) { // host ip addresses ImmutableSet.Builder<String> sb = ImmutableSet.builder(); for (HostAddress address : addresses) { sb.add(address.getHostText()); } Set<String> hostAddresses = sb.build(); // partition key values Set<String> values = hostsToPartitionKeys.get(hostAddresses); if (values == null) { values = new HashSet<>(); } String partitionId = cassandraPartition.getPartitionId(); values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2)); hostsToPartitionKeys.put(hostAddresses, values); hostMap.put(hostAddresses, addresses); } else { CassandraSplit split = new CassandraSplit( connectorId, schema, table, cassandraPartition.getPartitionId(), null, addresses); builder.add(split); } } if (singlePartitionKeyColumn) { for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) { StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect); int size = 0; for (String value : entry.getValue()) { if (size > 0) { sb.append(","); } sb.append(value); size++; if (size > partitionSizeForBatchSelect) { String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString()); CassandraSplit split = new CassandraSplit( connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey())); builder.add(split); size = 0; sb.setLength(0); sb.trimToSize(); } } if (size > 0) { String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString()); CassandraSplit split = new CassandraSplit( connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey())); builder.add(split); } } } return builder.build(); }