@BeforeMethod public void setUp() throws Exception { address = HostAddress.fromParts("localhost", 1234); split = new LocalFileSplit( address, LocalFileTables.HttpRequestLogTable.getSchemaTableName(), TupleDomain.all()); }
@Test public void testJsonRoundTrip() { Properties schema = new Properties(); schema.setProperty("foo", "bar"); schema.setProperty("bar", "baz"); ImmutableList<HivePartitionKey> partitionKeys = ImmutableList.of( new HivePartitionKey("a", HIVE_STRING, "apple"), new HivePartitionKey("b", HiveType.HIVE_LONG, "42")); ImmutableList<HostAddress> addresses = ImmutableList.of( HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45)); HiveSplit expected = new HiveSplit( "clientId", "db", "table", "partitionId", "path", 42, 88, schema, partitionKeys, addresses, OptionalInt.empty(), true, TupleDomain.<HiveColumnHandle>all(), ImmutableMap.of(1, HIVE_STRING)); String json = codec.toJson(expected); HiveSplit actual = codec.fromJson(json); assertEquals(actual.getClientId(), expected.getClientId()); assertEquals(actual.getDatabase(), expected.getDatabase()); assertEquals(actual.getTable(), expected.getTable()); assertEquals(actual.getPartitionName(), expected.getPartitionName()); assertEquals(actual.getPath(), expected.getPath()); assertEquals(actual.getStart(), expected.getStart()); assertEquals(actual.getLength(), expected.getLength()); assertEquals(actual.getSchema(), expected.getSchema()); assertEquals(actual.getPartitionKeys(), expected.getPartitionKeys()); assertEquals(actual.getAddresses(), expected.getAddresses()); assertEquals(actual.getColumnCoercions(), expected.getColumnCoercions()); assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling()); }
@JsonCreator public KafkaSplit( @JsonProperty("clientId") String clientId, @JsonProperty("topicName") String topicName, @JsonProperty("partitionId") int partitionId, @JsonProperty("brokerHost") String brokerHost, @JsonProperty("brokerPort") int brokerPort, @JsonProperty("sampleRate") int sampleRate, @JsonProperty("startTs") long startTs, @JsonProperty("endTs") long endTs, @JsonProperty("zookeeper") String zookeeper, @JsonProperty("zkSessionTimeout") int zkSessionTimeout, @JsonProperty("zkConnectTimeout") int zkConnectTimeout) { checkNotNull(clientId, "clientId is null"); checkNotNull(topicName, "topicName is null"); checkNotNull(partitionId, "partitionId is null"); checkNotNull(startTs, "startTs is null"); checkNotNull(endTs, "endTs is null"); this.clientId = clientId; this.topicName = topicName; this.partitionId = partitionId; this.brokerHost = brokerHost; this.brokerPort = brokerPort; this.sampleRate = sampleRate; this.startTs = startTs; this.endTs = endTs; this.zookeeper = zookeeper; this.zkSessionTimeout = zkSessionTimeout; this.zkConnectTimeout = zkConnectTimeout; try { InetAddress address = InetAddress.getByName(brokerHost); this.address = HostAddress.fromParts(address.getHostAddress(), 8080); } catch (UnknownHostException ex) { throw new RuntimeException(ex.toString()); } }
private List<Node> selectCandidateNodes(NodeMap nodeMap, Split split) { Set<Node> chosen = new LinkedHashSet<>(minCandidates); // first look for nodes that match the hint for (HostAddress hint : split.getAddresses()) { for (Node node : nodeMap.getNodesByHostAndPort().get(hint)) { if (chosen.add(node)) { scheduleLocal.incrementAndGet(); } } InetAddress address; try { address = hint.toInetAddress(); } catch (UnknownHostException e) { // skip addresses that don't resolve continue; } // consider a split with a host hint without a port as being accessible // by all nodes in that host if (!hint.hasPort() || split.isRemotelyAccessible()) { for (Node node : nodeMap.getNodesByHost().get(address)) { if (chosen.add(node)) { scheduleLocal.incrementAndGet(); } } } } // add nodes in same rack, if below the minimum count if (split.isRemotelyAccessible() && chosen.size() < minCandidates) { for (HostAddress hint : split.getAddresses()) { InetAddress address; try { address = hint.toInetAddress(); } catch (UnknownHostException e) { // skip addresses that don't resolve continue; } for (Node node : nodeMap.getNodesByRack().get(Rack.of(address))) { if (chosen.add(node)) { scheduleRack.incrementAndGet(); } if (chosen.size() == minCandidates) { break; } } if (chosen.size() == minCandidates) { break; } } } // add some random nodes if below the minimum count if (split.isRemotelyAccessible()) { if (chosen.size() < minCandidates) { for (Node node : lazyShuffle(nodeMap.getNodesByHost().values())) { if (chosen.add(node)) { scheduleRandom.incrementAndGet(); } if (chosen.size() == minCandidates) { break; } } } } return ImmutableList.copyOf(chosen); }
@Override public HostAddress getHostAndPort() { return HostAddress.fromParts(hostname, 8080); }
@Override public HostAddress getHostAndPort() { return HostAddress.fromUri(httpUri); }
private List<ConnectorSplit> getSplitsForPartitions( CassandraTableHandle cassTableHandle, List<ConnectorPartition> partitions) { String schema = cassTableHandle.getSchemaName(); String table = cassTableHandle.getTableName(); HostAddressFactory hostAddressFactory = new HostAddressFactory(); ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder(); // For single partition key column table, we can merge multiple partitions into a single split // by using IN CLAUSE in a single select query if the partitions have the same host list. // For multiple partition key columns table, we can't merge them into a single select query, so // keep them in a separate split. boolean singlePartitionKeyColumn = true; String partitionKeyColumnName = null; if (!partitions.isEmpty()) { singlePartitionKeyColumn = partitions.get(0).getTupleDomain().getNullableColumnDomains().size() == 1; if (singlePartitionKeyColumn) { String partitionId = partitions.get(0).getPartitionId(); partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1); } } Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>(); Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>(); for (ConnectorPartition partition : partitions) { CassandraPartition cassandraPartition = checkType(partition, CassandraPartition.class, "partition"); Set<Host> hosts = cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer()); List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts); if (singlePartitionKeyColumn) { // host ip addresses ImmutableSet.Builder<String> sb = ImmutableSet.builder(); for (HostAddress address : addresses) { sb.add(address.getHostText()); } Set<String> hostAddresses = sb.build(); // partition key values Set<String> values = hostsToPartitionKeys.get(hostAddresses); if (values == null) { values = new HashSet<>(); } String partitionId = cassandraPartition.getPartitionId(); values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2)); hostsToPartitionKeys.put(hostAddresses, values); hostMap.put(hostAddresses, addresses); } else { CassandraSplit split = new CassandraSplit( connectorId, schema, table, cassandraPartition.getPartitionId(), null, addresses); builder.add(split); } } if (singlePartitionKeyColumn) { for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) { StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect); int size = 0; for (String value : entry.getValue()) { if (size > 0) { sb.append(","); } sb.append(value); size++; if (size > partitionSizeForBatchSelect) { String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString()); CassandraSplit split = new CassandraSplit( connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey())); builder.add(split); size = 0; sb.setLength(0); sb.trimToSize(); } } if (size > 0) { String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString()); CassandraSplit split = new CassandraSplit( connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey())); builder.add(split); } } } return builder.build(); }