Пример #1
0
 @BeforeMethod
 public void setUp() throws Exception {
   address = HostAddress.fromParts("localhost", 1234);
   split =
       new LocalFileSplit(
           address, LocalFileTables.HttpRequestLogTable.getSchemaTableName(), TupleDomain.all());
 }
Пример #2
0
  @Test
  public void testJsonRoundTrip() {
    Properties schema = new Properties();
    schema.setProperty("foo", "bar");
    schema.setProperty("bar", "baz");

    ImmutableList<HivePartitionKey> partitionKeys =
        ImmutableList.of(
            new HivePartitionKey("a", HIVE_STRING, "apple"),
            new HivePartitionKey("b", HiveType.HIVE_LONG, "42"));
    ImmutableList<HostAddress> addresses =
        ImmutableList.of(
            HostAddress.fromParts("127.0.0.1", 44), HostAddress.fromParts("127.0.0.1", 45));
    HiveSplit expected =
        new HiveSplit(
            "clientId",
            "db",
            "table",
            "partitionId",
            "path",
            42,
            88,
            schema,
            partitionKeys,
            addresses,
            OptionalInt.empty(),
            true,
            TupleDomain.<HiveColumnHandle>all(),
            ImmutableMap.of(1, HIVE_STRING));

    String json = codec.toJson(expected);
    HiveSplit actual = codec.fromJson(json);

    assertEquals(actual.getClientId(), expected.getClientId());
    assertEquals(actual.getDatabase(), expected.getDatabase());
    assertEquals(actual.getTable(), expected.getTable());
    assertEquals(actual.getPartitionName(), expected.getPartitionName());
    assertEquals(actual.getPath(), expected.getPath());
    assertEquals(actual.getStart(), expected.getStart());
    assertEquals(actual.getLength(), expected.getLength());
    assertEquals(actual.getSchema(), expected.getSchema());
    assertEquals(actual.getPartitionKeys(), expected.getPartitionKeys());
    assertEquals(actual.getAddresses(), expected.getAddresses());
    assertEquals(actual.getColumnCoercions(), expected.getColumnCoercions());
    assertEquals(actual.isForceLocalScheduling(), expected.isForceLocalScheduling());
  }
 @JsonCreator
 public KafkaSplit(
     @JsonProperty("clientId") String clientId,
     @JsonProperty("topicName") String topicName,
     @JsonProperty("partitionId") int partitionId,
     @JsonProperty("brokerHost") String brokerHost,
     @JsonProperty("brokerPort") int brokerPort,
     @JsonProperty("sampleRate") int sampleRate,
     @JsonProperty("startTs") long startTs,
     @JsonProperty("endTs") long endTs,
     @JsonProperty("zookeeper") String zookeeper,
     @JsonProperty("zkSessionTimeout") int zkSessionTimeout,
     @JsonProperty("zkConnectTimeout") int zkConnectTimeout) {
   checkNotNull(clientId, "clientId is null");
   checkNotNull(topicName, "topicName is null");
   checkNotNull(partitionId, "partitionId is null");
   checkNotNull(startTs, "startTs is null");
   checkNotNull(endTs, "endTs is null");
   this.clientId = clientId;
   this.topicName = topicName;
   this.partitionId = partitionId;
   this.brokerHost = brokerHost;
   this.brokerPort = brokerPort;
   this.sampleRate = sampleRate;
   this.startTs = startTs;
   this.endTs = endTs;
   this.zookeeper = zookeeper;
   this.zkSessionTimeout = zkSessionTimeout;
   this.zkConnectTimeout = zkConnectTimeout;
   try {
     InetAddress address = InetAddress.getByName(brokerHost);
     this.address = HostAddress.fromParts(address.getHostAddress(), 8080);
   } catch (UnknownHostException ex) {
     throw new RuntimeException(ex.toString());
   }
 }
Пример #4
0
    private List<Node> selectCandidateNodes(NodeMap nodeMap, Split split) {
      Set<Node> chosen = new LinkedHashSet<>(minCandidates);

      // first look for nodes that match the hint
      for (HostAddress hint : split.getAddresses()) {
        for (Node node : nodeMap.getNodesByHostAndPort().get(hint)) {
          if (chosen.add(node)) {
            scheduleLocal.incrementAndGet();
          }
        }

        InetAddress address;
        try {
          address = hint.toInetAddress();
        } catch (UnknownHostException e) {
          // skip addresses that don't resolve
          continue;
        }

        // consider a split with a host hint without a port as being accessible
        // by all nodes in that host
        if (!hint.hasPort() || split.isRemotelyAccessible()) {
          for (Node node : nodeMap.getNodesByHost().get(address)) {
            if (chosen.add(node)) {
              scheduleLocal.incrementAndGet();
            }
          }
        }
      }

      // add nodes in same rack, if below the minimum count
      if (split.isRemotelyAccessible() && chosen.size() < minCandidates) {
        for (HostAddress hint : split.getAddresses()) {
          InetAddress address;
          try {
            address = hint.toInetAddress();
          } catch (UnknownHostException e) {
            // skip addresses that don't resolve
            continue;
          }
          for (Node node : nodeMap.getNodesByRack().get(Rack.of(address))) {
            if (chosen.add(node)) {
              scheduleRack.incrementAndGet();
            }
            if (chosen.size() == minCandidates) {
              break;
            }
          }
          if (chosen.size() == minCandidates) {
            break;
          }
        }
      }

      // add some random nodes if below the minimum count
      if (split.isRemotelyAccessible()) {
        if (chosen.size() < minCandidates) {
          for (Node node : lazyShuffle(nodeMap.getNodesByHost().values())) {
            if (chosen.add(node)) {
              scheduleRandom.incrementAndGet();
            }

            if (chosen.size() == minCandidates) {
              break;
            }
          }
        }
      }

      return ImmutableList.copyOf(chosen);
    }
Пример #5
0
 @Override
 public HostAddress getHostAndPort() {
   return HostAddress.fromParts(hostname, 8080);
 }
Пример #6
0
 @Override
 public HostAddress getHostAndPort() {
   return HostAddress.fromUri(httpUri);
 }
Пример #7
0
  private List<ConnectorSplit> getSplitsForPartitions(
      CassandraTableHandle cassTableHandle, List<ConnectorPartition> partitions) {
    String schema = cassTableHandle.getSchemaName();
    String table = cassTableHandle.getTableName();
    HostAddressFactory hostAddressFactory = new HostAddressFactory();
    ImmutableList.Builder<ConnectorSplit> builder = ImmutableList.builder();

    // For single partition key column table, we can merge multiple partitions into a single split
    // by using IN CLAUSE in a single select query if the partitions have the same host list.
    // For multiple partition key columns table, we can't merge them into a single select query, so
    // keep them in a separate split.
    boolean singlePartitionKeyColumn = true;
    String partitionKeyColumnName = null;
    if (!partitions.isEmpty()) {
      singlePartitionKeyColumn =
          partitions.get(0).getTupleDomain().getNullableColumnDomains().size() == 1;
      if (singlePartitionKeyColumn) {
        String partitionId = partitions.get(0).getPartitionId();
        partitionKeyColumnName = partitionId.substring(0, partitionId.lastIndexOf('=') - 1);
      }
    }
    Map<Set<String>, Set<String>> hostsToPartitionKeys = new HashMap<>();
    Map<Set<String>, List<HostAddress>> hostMap = new HashMap<>();

    for (ConnectorPartition partition : partitions) {
      CassandraPartition cassandraPartition =
          checkType(partition, CassandraPartition.class, "partition");
      Set<Host> hosts =
          cassandraSession.getReplicas(schema, cassandraPartition.getKeyAsByteBuffer());
      List<HostAddress> addresses = hostAddressFactory.toHostAddressList(hosts);
      if (singlePartitionKeyColumn) {
        // host ip addresses
        ImmutableSet.Builder<String> sb = ImmutableSet.builder();
        for (HostAddress address : addresses) {
          sb.add(address.getHostText());
        }
        Set<String> hostAddresses = sb.build();
        // partition key values
        Set<String> values = hostsToPartitionKeys.get(hostAddresses);
        if (values == null) {
          values = new HashSet<>();
        }
        String partitionId = cassandraPartition.getPartitionId();
        values.add(partitionId.substring(partitionId.lastIndexOf('=') + 2));
        hostsToPartitionKeys.put(hostAddresses, values);
        hostMap.put(hostAddresses, addresses);
      } else {
        CassandraSplit split =
            new CassandraSplit(
                connectorId, schema, table, cassandraPartition.getPartitionId(), null, addresses);
        builder.add(split);
      }
    }
    if (singlePartitionKeyColumn) {
      for (Map.Entry<Set<String>, Set<String>> entry : hostsToPartitionKeys.entrySet()) {
        StringBuilder sb = new StringBuilder(partitionSizeForBatchSelect);
        int size = 0;
        for (String value : entry.getValue()) {
          if (size > 0) {
            sb.append(",");
          }
          sb.append(value);
          size++;
          if (size > partitionSizeForBatchSelect) {
            String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString());
            CassandraSplit split =
                new CassandraSplit(
                    connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey()));
            builder.add(split);
            size = 0;
            sb.setLength(0);
            sb.trimToSize();
          }
        }
        if (size > 0) {
          String partitionId = String.format("%s in (%s)", partitionKeyColumnName, sb.toString());
          CassandraSplit split =
              new CassandraSplit(
                  connectorId, schema, table, partitionId, null, hostMap.get(entry.getKey()));
          builder.add(split);
        }
      }
    }
    return builder.build();
  }