Exemple #1
0
  /**
   * Get the HRegionInfo from cache, if not there, from the hbase:meta table. Be careful. Does RPC.
   * Do not hold a lock or synchronize when you call this method.
   *
   * @param regionName
   * @return HRegionInfo for the region
   */
  @SuppressWarnings("deprecation")
  protected HRegionInfo getRegionInfo(final byte[] regionName) {
    String encodedName = HRegionInfo.encodeRegionName(regionName);
    RegionState regionState = getRegionState(encodedName);
    if (regionState != null) {
      return regionState.getRegion();
    }

    try {
      Pair<HRegionInfo, ServerName> p =
          MetaTableAccessor.getRegion(server.getConnection(), regionName);
      HRegionInfo hri = p == null ? null : p.getFirst();
      if (hri != null) {
        createRegionState(hri);
      }
      return hri;
    } catch (IOException e) {
      server.abort(
          "Aborting because error occoured while reading "
              + Bytes.toStringBinary(regionName)
              + " from hbase:meta",
          e);
      return null;
    }
  }
Exemple #2
0
 /**
  * @param catalogTracker
  * @param serverName
  * @return List of user regions installed on this server (does not include catalog regions).
  * @throws IOException
  */
 public static NavigableMap<HRegionInfo, Result> getServerUserRegions(
     CatalogTracker catalogTracker, final ServerName serverName) throws IOException {
   final NavigableMap<HRegionInfo, Result> hris = new TreeMap<HRegionInfo, Result>();
   // Fill the above hris map with entries from .META. that have the passed
   // servername.
   CollectingVisitor<Result> v =
       new CollectingVisitor<Result>() {
         @Override
         void add(Result r) {
           if (r == null || r.isEmpty()) return;
           ServerName sn = getServerNameFromCatalogResult(r);
           if (sn != null && sn.equals(serverName)) this.results.add(r);
         }
       };
   fullScan(catalogTracker, v);
   List<Result> results = v.getResults();
   if (results != null && !results.isEmpty()) {
     // Convert results to Map keyed by HRI
     for (Result r : results) {
       Pair<HRegionInfo, ServerName> p = parseCatalogResult(r);
       if (p != null && p.getFirst() != null) hris.put(p.getFirst(), r);
     }
   }
   return hris;
 }
  @Override
  public void mutateMany(Map<String, Map<ByteBuffer, KCVMutation>> mutations, StoreTransaction txh)
      throws StorageException {
    final long delTS = System.currentTimeMillis();
    final long putTS = delTS + 1;

    Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey = convertToCommands(mutations, putTS, delTS);
    List<Row> batch = new ArrayList<Row>(commandsPerKey.size()); // actual batch operation

    // convert sorted commands into representation required for 'batch' operation
    for (Pair<Put, Delete> commands : commandsPerKey.values()) {
      if (commands.getFirst() != null) batch.add(commands.getFirst());

      if (commands.getSecond() != null) batch.add(commands.getSecond());
    }

    try {
      HTableInterface table = null;

      try {
        table = connectionPool.getTable(tableName);
        table.batch(batch);
        table.flushCommits();
      } finally {
        IOUtils.closeQuietly(table);
      }
    } catch (IOException e) {
      throw new TemporaryStorageException(e);
    } catch (InterruptedException e) {
      throw new TemporaryStorageException(e);
    }

    waitUntil(putTS);
  }
 @Override
 public ByteBuffer getValueShallowCopy() {
   currentBuffer.asSubByteBuffer(current.valueOffset, current.valueLength, tmpPair);
   ByteBuffer dup = tmpPair.getFirst().duplicate();
   dup.position(tmpPair.getSecond());
   dup.limit(tmpPair.getSecond() + current.valueLength);
   return dup.slice();
 }
 private List<HRegionInfo> getListOfHRegionInfos(final List<Pair<HRegionInfo, ServerName>> pairs) {
   if (pairs == null || pairs.isEmpty()) return null;
   List<HRegionInfo> result = new ArrayList<HRegionInfo>(pairs.size());
   for (Pair<HRegionInfo, ServerName> pair : pairs) {
     result.add(pair.getFirst());
   }
   return result;
 }
 private void addANDColsToFinalList(FilterList filterList) {
   for (Entry<Column, Pair<Value, Value>> entry : colWithOperators.entrySet()) {
     Pair<Value, Value> value = entry.getValue();
     if (value.getFirst() != null && value.getSecond() != null) {
       // Here we are introducing a new Filter
       SingleColumnRangeFilter rangeFltr =
           new SingleColumnRangeFilter(
               entry.getKey().getFamily(),
               entry.getKey().getQualifier(),
               entry.getKey().getValuePartition(),
               value.getFirst().getValue(),
               value.getFirst().getOperator(),
               value.getSecond().getValue(),
               value.getSecond().getOperator());
       filterList.addFilter(rangeFltr);
     } else if (value.getFirst() != null) {
       if (value.getFirst().getOperator() == CompareOp.EQUAL) {
         filterList.addFilter(value.getFirst().getFilter());
       } else {
         SingleColumnRangeFilter rangeFltr =
             new SingleColumnRangeFilter(
                 entry.getKey().getFamily(),
                 entry.getKey().getQualifier(),
                 entry.getKey().getValuePartition(),
                 value.getFirst().getValue(),
                 value.getFirst().getOperator(),
                 null,
                 null);
         filterList.addFilter(rangeFltr);
       }
     }
   }
 }
  // TODO consider parallelizing these operations since they are independent. Right now its just
  // easier to keep them serial though
  @Override
  public void snapshotRegions(List<Pair<HRegionInfo, ServerName>> regionsAndLocations)
      throws IOException, KeeperException {
    try {
      timeoutInjector.start();

      // 1. get all the regions hosting this table.

      // extract each pair to separate lists
      Set<HRegionInfo> regions = new HashSet<HRegionInfo>();
      for (Pair<HRegionInfo, ServerName> p : regionsAndLocations) {
        regions.add(p.getFirst());
      }

      // 2. for each region, write all the info to disk
      String msg =
          "Starting to write region info and WALs for regions for offline snapshot:"
              + ClientSnapshotDescriptionUtils.toString(snapshot);
      LOG.info(msg);
      status.setStatus(msg);

      ThreadPoolExecutor exec = SnapshotManifest.createExecutor(conf, "DisabledTableSnapshot");
      try {
        ModifyRegionUtils.editRegions(
            exec,
            regions,
            new ModifyRegionUtils.RegionEditTask() {
              @Override
              public void editRegion(final HRegionInfo regionInfo) throws IOException {
                snapshotManifest.addRegion(FSUtils.getTableDir(rootDir, snapshotTable), regionInfo);
              }
            });
      } finally {
        exec.shutdown();
      }
    } catch (Exception e) {
      // make sure we capture the exception to propagate back to the client later
      String reason =
          "Failed snapshot "
              + ClientSnapshotDescriptionUtils.toString(snapshot)
              + " due to exception:"
              + e.getMessage();
      ForeignException ee = new ForeignException(reason, e);
      monitor.receive(ee);
      status.abort("Snapshot of table: " + snapshotTable + " failed because " + e.getMessage());
    } finally {
      LOG.debug(
          "Marking snapshot" + ClientSnapshotDescriptionUtils.toString(snapshot) + " as finished.");

      // 3. mark the timer as finished - even if we got an exception, we don't need to time the
      // operation any further
      timeoutInjector.complete();
    }
  }
    private ReplicateWALEntryResponse replayToServer(List<Entry> entries, int timeout)
        throws IOException {
      // check whether we should still replay this entry. If the regions are changed, or the
      // entry is not coming form the primary region, filter it out because we do not need it.
      // Regions can change because of (1) region split (2) region merge (3) table recreated
      boolean skip = false;

      if (!Bytes.equals(
          location.getRegionInfo().getEncodedNameAsBytes(), initialEncodedRegionName)) {
        skip = true;
      }
      if (!entries.isEmpty() && !skip) {
        Entry[] entriesArray = new Entry[entries.size()];
        entriesArray = entries.toArray(entriesArray);

        // set the region name for the target region replica
        Pair<AdminProtos.ReplicateWALEntryRequest, CellScanner> p =
            ReplicationProtbufUtil.buildReplicateWALEntryRequest(
                entriesArray, location.getRegionInfo().getEncodedNameAsBytes(), null, null, null);
        try {
          PayloadCarryingRpcController controller =
              rpcControllerFactory.newController(p.getSecond());
          controller.setCallTimeout(timeout);
          controller.setPriority(tableName);
          return stub.replay(controller, p.getFirst());
        } catch (ServiceException se) {
          throw ProtobufUtil.getRemoteException(se);
        }
      }

      if (skip) {
        if (LOG.isTraceEnabled()) {
          LOG.trace(
              "Skipping "
                  + entries.size()
                  + " entries in table "
                  + tableName
                  + " because located region "
                  + location.getRegionInfo().getEncodedName()
                  + " is different than the original region "
                  + Bytes.toStringBinary(initialEncodedRegionName)
                  + " from WALEdit");
          for (Entry entry : entries) {
            LOG.trace("Skipping : " + entry);
          }
        }
        skippedEntries.addAndGet(entries.size());
      }
      return ReplicateWALEntryResponse.newBuilder().build();
    }
 /**
  * Checks if the given region has merge qualifier in hbase:meta
  *
  * @param services
  * @param regionName name of specified region
  * @return true if the given region has merge qualifier in META.(It will be cleaned by
  *     CatalogJanitor)
  * @throws IOException
  */
 boolean hasMergeQualifierInMeta(final RegionServerServices services, final byte[] regionName)
     throws IOException {
   if (services == null) return false;
   // Get merge regions if it is a merged region and already has merge
   // qualifier
   Pair<HRegionInfo, HRegionInfo> mergeRegions =
       MetaTableAccessor.getRegionsFromMergeQualifier(services.getConnection(), regionName);
   if (mergeRegions != null
       && (mergeRegions.getFirst() != null || mergeRegions.getSecond() != null)) {
     // It has merge qualifier
     return true;
   }
   return false;
 }
Exemple #10
0
 @Override
 public SingleByteBuff put(int offset, ByteBuff src, int srcOffset, int length) {
   if (src instanceof SingleByteBuff) {
     ByteBufferUtils.copyFromBufferToBuffer(
         ((SingleByteBuff) src).buf, this.buf, srcOffset, offset, length);
   } else {
     // TODO we can do some optimization here? Call to asSubByteBuffer might
     // create a copy.
     Pair<ByteBuffer, Integer> pair = new Pair<ByteBuffer, Integer>();
     src.asSubByteBuffer(srcOffset, length, pair);
     ByteBufferUtils.copyFromBufferToBuffer(
         pair.getFirst(), this.buf, pair.getSecond(), offset, length);
   }
   return this;
 }
 @Override
 public RowFilter adapt(FilterAdapterContext context, FuzzyRowFilter filter) throws IOException {
   Interleave.Builder interleaveBuilder = Interleave.newBuilder();
   List<Pair<byte[], byte[]>> pairs = extractFuzzyRowFilterPairs(filter);
   if (pairs.isEmpty()) {
     return ALL_VALUES_FILTER;
   }
   for (Pair<byte[], byte[]> pair : pairs) {
     Preconditions.checkArgument(
         pair.getFirst().length == pair.getSecond().length,
         "Fuzzy info and match mask must have the same length");
     interleaveBuilder.addFilters(createSingleRowFilter(pair.getFirst(), pair.getSecond()));
   }
   return RowFilter.newBuilder().setInterleave(interleaveBuilder).build();
 }
Exemple #12
0
 private List<ReplicationPeer> listValidReplicationPeers() {
   Map<String, ReplicationPeerConfig> peers = listPeerConfigs();
   if (peers == null || peers.size() <= 0) {
     return null;
   }
   List<ReplicationPeer> validPeers = new ArrayList<ReplicationPeer>(peers.size());
   for (Entry<String, ReplicationPeerConfig> peerEntry : peers.entrySet()) {
     String peerId = peerEntry.getKey();
     String clusterKey = peerEntry.getValue().getClusterKey();
     Configuration peerConf = new Configuration(this.connection.getConfiguration());
     Stat s = null;
     try {
       ZKUtil.applyClusterKeyToConf(peerConf, clusterKey);
       Pair<ReplicationPeerConfig, Configuration> pair = this.replicationPeers.getPeerConf(peerId);
       ReplicationPeer peer = new ReplicationPeerZKImpl(peerConf, peerId, pair.getFirst());
       s =
           zkw.getRecoverableZooKeeper()
               .exists(peerConf.get(HConstants.ZOOKEEPER_ZNODE_PARENT), null);
       if (null == s) {
         LOG.info(peerId + ' ' + clusterKey + " is invalid now.");
         continue;
       }
       validPeers.add(peer);
     } catch (ReplicationException e) {
       LOG.warn(
           "Failed to get valid replication peers. "
               + "Error connecting to peer cluster with peerId="
               + peerId);
       LOG.debug("Failure details to get valid replication peers.", e);
       continue;
     } catch (KeeperException e) {
       LOG.warn(
           "Failed to get valid replication peers. KeeperException code=" + e.code().intValue());
       LOG.debug("Failure details to get valid replication peers.", e);
       continue;
     } catch (InterruptedException e) {
       LOG.warn("Failed to get valid replication peers due to InterruptedException.");
       LOG.debug("Failure details to get valid replication peers.", e);
       Thread.currentThread().interrupt();
       continue;
     } catch (IOException e) {
       LOG.warn("Failed to get valid replication peers due to IOException.");
       LOG.debug("Failure details to get valid replication peers.", e);
       continue;
     }
   }
   return validPeers;
 }
 @Test
 public void testMergeWithReplicas() throws Exception {
   final TableName tableName = TableName.valueOf("testMergeWithReplicas");
   // Create table and load data.
   createTableAndLoadData(master, tableName, 5, 2);
   List<Pair<HRegionInfo, ServerName>> initialRegionToServers =
       MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), tableName);
   // Merge 1st and 2nd region
   PairOfSameType<HRegionInfo> mergedRegions =
       mergeRegionsAndVerifyRegionNum(master, tableName, 0, 2, 5 * 2 - 2);
   List<Pair<HRegionInfo, ServerName>> currentRegionToServers =
       MetaTableAccessor.getTableRegionsAndLocations(master.getConnection(), tableName);
   List<HRegionInfo> initialRegions = new ArrayList<HRegionInfo>();
   for (Pair<HRegionInfo, ServerName> p : initialRegionToServers) {
     initialRegions.add(p.getFirst());
   }
   List<HRegionInfo> currentRegions = new ArrayList<HRegionInfo>();
   for (Pair<HRegionInfo, ServerName> p : currentRegionToServers) {
     currentRegions.add(p.getFirst());
   }
   assertTrue(initialRegions.contains(mergedRegions.getFirst())); // this is the first region
   assertTrue(
       initialRegions.contains(
           RegionReplicaUtil.getRegionInfoForReplica(
               mergedRegions.getFirst(), 1))); // this is the replica of the first region
   assertTrue(initialRegions.contains(mergedRegions.getSecond())); // this is the second region
   assertTrue(
       initialRegions.contains(
           RegionReplicaUtil.getRegionInfoForReplica(
               mergedRegions.getSecond(), 1))); // this is the replica of the second region
   assertTrue(!initialRegions.contains(currentRegions.get(0))); // this is the new region
   assertTrue(
       !initialRegions.contains(
           RegionReplicaUtil.getRegionInfoForReplica(
               currentRegions.get(0), 1))); // replica of the new region
   assertTrue(
       currentRegions.contains(
           RegionReplicaUtil.getRegionInfoForReplica(
               currentRegions.get(0), 1))); // replica of the new region
   assertTrue(
       !currentRegions.contains(
           RegionReplicaUtil.getRegionInfoForReplica(
               mergedRegions.getFirst(), 1))); // replica of the merged region
   assertTrue(
       !currentRegions.contains(
           RegionReplicaUtil.getRegionInfoForReplica(
               mergedRegions.getSecond(), 1))); // replica of the merged region
 }
  public static List<Pair<byte[], ServerName>> getStartKeysAndLocations(
      HMaster master, String tableName) throws IOException, InterruptedException {

    List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations =
        MetaReader.getTableRegionsAndLocations(
            master.getCatalogTracker(), TableName.valueOf(tableName));
    List<Pair<byte[], ServerName>> startKeyAndLocationPairs =
        new ArrayList<Pair<byte[], ServerName>>(tableRegionsAndLocations.size());
    Pair<byte[], ServerName> startKeyAndLocation = null;
    for (Pair<HRegionInfo, ServerName> regionAndLocation : tableRegionsAndLocations) {
      startKeyAndLocation =
          new Pair<byte[], ServerName>(
              regionAndLocation.getFirst().getStartKey(), regionAndLocation.getSecond());
      startKeyAndLocationPairs.add(startKeyAndLocation);
    }
    return startKeyAndLocationPairs;
  }
 @Test
 public void testParseCreateTablePrimaryKeyConstraintWithOrder() throws Exception {
   for (String order : new String[] {"asc", "desc"}) {
     String s =
         "create table core.entity_history_archive (id CHAR(15), name VARCHAR(150), constraint pk primary key (id ${o}, name ${o}))"
             .replace("${o}", order);
     CreateTableStatement stmt =
         (CreateTableStatement) new SQLParser(new StringReader(s)).parseStatement();
     PrimaryKeyConstraint pkConstraint = stmt.getPrimaryKeyConstraint();
     List<Pair<ColumnName, SortOrder>> columns = pkConstraint.getColumnNames();
     assertEquals(2, columns.size());
     for (Pair<ColumnName, SortOrder> pair : columns) {
       assertEquals(
           SortOrder.fromDDLValue(order), pkConstraint.getColumn(pair.getFirst()).getSecond());
     }
   }
 }
 @Override
 public MetaDataMutationResult getFunctions(
     PName tenantId, List<Pair<byte[], Long>> functionNameAndTimeStampPairs, long clientTimestamp)
     throws SQLException {
   List<PFunction> functions = new ArrayList<PFunction>(functionNameAndTimeStampPairs.size());
   for (Pair<byte[], Long> functionInfo : functionNameAndTimeStampPairs) {
     try {
       PFunction function2 =
           metaData.getFunction(new PTableKey(tenantId, Bytes.toString(functionInfo.getFirst())));
       functions.add(function2);
     } catch (FunctionNotFoundException e) {
       return new MetaDataMutationResult(MutationCode.FUNCTION_NOT_FOUND, 0, null);
     }
   }
   if (functions.isEmpty()) {
     return null;
   }
   return new MetaDataMutationResult(MutationCode.FUNCTION_ALREADY_EXISTS, 0, functions, true);
 }
Exemple #17
0
  @Test
  @SuppressWarnings("deprecation")
  public void testMasterOpsWhileSplitting() throws Exception {
    MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
    HMaster m = cluster.getMaster();

    try (Table ht = TEST_UTIL.createTable(TABLENAME, FAMILYNAME)) {
      assertTrue(m.getTableStateManager().isTableState(TABLENAME, TableState.State.ENABLED));
      TEST_UTIL.loadTable(ht, FAMILYNAME, false);
    }

    List<Pair<HRegionInfo, ServerName>> tableRegions =
        MetaTableAccessor.getTableRegionsAndLocations(m.getConnection(), TABLENAME);
    LOG.info("Regions after load: " + Joiner.on(',').join(tableRegions));
    assertEquals(1, tableRegions.size());
    assertArrayEquals(HConstants.EMPTY_START_ROW, tableRegions.get(0).getFirst().getStartKey());
    assertArrayEquals(HConstants.EMPTY_END_ROW, tableRegions.get(0).getFirst().getEndKey());

    // Now trigger a split and stop when the split is in progress
    LOG.info("Splitting table");
    TEST_UTIL.getAdmin().split(TABLENAME);
    LOG.info("Waiting for split result to be about to open");
    RegionStates regionStates = m.getAssignmentManager().getRegionStates();
    while (regionStates.getRegionsOfTable(TABLENAME).size() <= 1) {
      Thread.sleep(100);
    }
    LOG.info("Making sure we can call getTableRegions while opening");
    tableRegions =
        MetaTableAccessor.getTableRegionsAndLocations(m.getConnection(), TABLENAME, false);

    LOG.info("Regions: " + Joiner.on(',').join(tableRegions));
    // We have three regions because one is split-in-progress
    assertEquals(3, tableRegions.size());
    LOG.info("Making sure we can call getTableRegionClosest while opening");
    Pair<HRegionInfo, ServerName> pair = m.getTableRegionForRow(TABLENAME, Bytes.toBytes("cde"));
    LOG.info("Result is: " + pair);
    Pair<HRegionInfo, ServerName> tableRegionFromName =
        MetaTableAccessor.getRegion(m.getConnection(), pair.getFirst().getRegionName());
    assertEquals(tableRegionFromName.getFirst(), pair.getFirst());
  }
  public static boolean checkForColocation(HMaster master, String tableName, String indexTableName)
      throws IOException, InterruptedException {
    List<Pair<byte[], ServerName>> uTableStartKeysAndLocations =
        getStartKeysAndLocations(master, tableName);
    List<Pair<byte[], ServerName>> iTableStartKeysAndLocations =
        getStartKeysAndLocations(master, indexTableName);

    boolean regionsColocated = true;
    if (uTableStartKeysAndLocations.size() != iTableStartKeysAndLocations.size()) {
      regionsColocated = false;
    } else {
      for (int i = 0; i < uTableStartKeysAndLocations.size(); i++) {
        Pair<byte[], ServerName> uStartKeyAndLocation = uTableStartKeysAndLocations.get(i);
        Pair<byte[], ServerName> iStartKeyAndLocation = iTableStartKeysAndLocations.get(i);

        if (Bytes.compareTo(uStartKeyAndLocation.getFirst(), iStartKeyAndLocation.getFirst())
            == 0) {
          if (uStartKeyAndLocation.getSecond().equals(iStartKeyAndLocation.getSecond())) {
            continue;
          }
        }
        regionsColocated = false;
      }
    }
    return regionsColocated;
  }
 public RegionServerAccounting(Configuration conf) {
   this.conf = conf;
   Pair<Long, MemoryType> globalMemstoreSizePair = MemorySizeUtil.getGlobalMemstoreSize(conf);
   this.globalMemStoreLimit = globalMemstoreSizePair.getFirst();
   this.memType = globalMemstoreSizePair.getSecond();
   this.globalMemStoreLimitLowMarkPercent =
       MemorySizeUtil.getGlobalMemStoreHeapLowerMark(conf, this.memType == MemoryType.HEAP);
   // When off heap memstore in use we configure the global off heap space for memstore as bytes
   // not as % of max memory size. In such case, the lower water mark should be specified using the
   // key "hbase.regionserver.global.memstore.size.lower.limit" which says % of the global upper
   // bound and defaults to 95%. In on heap case also specifying this way is ideal. But in the past
   // we used to take lower bound also as the % of xmx (38% as default). For backward compatibility
   // for this deprecated config,we will fall back to read that config when new one is missing.
   // Only for on heap case, do this fallback mechanism. For off heap it makes no sense.
   // TODO When to get rid of the deprecated config? ie
   // "hbase.regionserver.global.memstore.lowerLimit". Can get rid of this boolean passing then.
   this.globalMemStoreLimitLowMark =
       (long) (this.globalMemStoreLimit * this.globalMemStoreLimitLowMarkPercent);
   this.globalOnHeapMemstoreLimit = MemorySizeUtil.getOnheapGlobalMemstoreSize(conf);
   this.globalOnHeapMemstoreLimitLowMark =
       (long) (this.globalOnHeapMemstoreLimit * this.globalMemStoreLimitLowMarkPercent);
 }
 static GetResponse doMetaGetResponse(
     final SortedMap<byte[], Pair<HRegionInfo, ServerName>> meta, final GetRequest request) {
   ClientProtos.Result.Builder resultBuilder = ClientProtos.Result.newBuilder();
   ByteString row = request.getGet().getRow();
   Pair<HRegionInfo, ServerName> p = meta.get(row.toByteArray());
   if (p == null) {
     if (request.getGet().getClosestRowBefore()) {
       byte[] bytes = row.toByteArray();
       SortedMap<byte[], Pair<HRegionInfo, ServerName>> head =
           bytes != null ? meta.headMap(bytes) : meta;
       p = head == null ? null : head.get(head.lastKey());
     }
   }
   if (p != null) {
     resultBuilder.addCell(getRegionInfo(row, p.getFirst()));
     resultBuilder.addCell(getServer(row, p.getSecond()));
   }
   resultBuilder.addCell(getStartCode(row));
   GetResponse.Builder builder = GetResponse.newBuilder();
   builder.setResult(resultBuilder.build());
   return builder.build();
 }
Exemple #21
0
    /**
     * Check if the server should be considered as bad. Clean the old entries of the list.
     *
     * @return true if the server is in the failed servers list
     */
    public synchronized boolean isFailedServer(final InetSocketAddress address) {
      if (failedServers.isEmpty()) {
        return false;
      }

      final String lookup = address.toString();
      final long now = EnvironmentEdgeManager.currentTimeMillis();

      // iterate, looking for the search entry and cleaning expired entries
      Iterator<Pair<Long, String>> it = failedServers.iterator();
      while (it.hasNext()) {
        Pair<Long, String> cur = it.next();
        if (cur.getFirst() < now) {
          it.remove();
        } else {
          if (lookup.equals(cur.getSecond())) {
            return true;
          }
        }
      }

      return false;
    }
  @Test
  public void testGetRegion() throws IOException, InterruptedException {
    final String name = "testGetRegion";
    LOG.info("Started " + name);
    final byte[] nameBytes = Bytes.toBytes(name);
    HTable t = UTIL.createTable(nameBytes, HConstants.CATALOG_FAMILY);
    int regionCount = UTIL.createMultiRegions(t, HConstants.CATALOG_FAMILY);

    // Test it works getting a region from user table.
    List<HRegionInfo> regions = MetaReader.getTableRegions(ct, nameBytes);
    assertEquals(regionCount, regions.size());
    Pair<HRegionInfo, HServerAddress> pair =
        MetaReader.getRegion(ct, regions.get(0).getRegionName());
    assertEquals(regions.get(0).getEncodedName(), pair.getFirst().getEncodedName());
    // Test get on non-existent region.
    pair = MetaReader.getRegion(ct, Bytes.toBytes("nonexistent-region"));
    assertNull(pair);
    // Test it works getting a region from meta/root.
    pair = MetaReader.getRegion(ct, HRegionInfo.FIRST_META_REGIONINFO.getRegionName());
    assertEquals(
        HRegionInfo.FIRST_META_REGIONINFO.getEncodedName(), pair.getFirst().getEncodedName());
    LOG.info("Finished " + name);
  }
  /**
   * Initialize PhoenixPigConfiguration if it is null. Called by {@link #setLocation} and {@link
   * #getSchema}
   *
   * @param location
   * @param configuration
   * @throws PigException
   */
  private void initializePhoenixPigConfiguration(
      final String location, final Configuration configuration) throws PigException {
    if (this.config != null) {
      return;
    }
    this.config = new PhoenixPigConfiguration(configuration);
    this.config.setServerName(this.zkQuorum);
    Pair<String, String> pair = null;
    try {
      if (location.startsWith(PHOENIX_TABLE_NAME_SCHEME)) {
        String tableSchema = location.substring(PHOENIX_TABLE_NAME_SCHEME.length());
        final TableSchemaParserFunction parseFunction = new TableSchemaParserFunction();
        pair = parseFunction.apply(tableSchema);
        this.config.setSchemaType(SchemaType.TABLE);
      } else if (location.startsWith(PHOENIX_QUERY_SCHEME)) {
        this.selectQuery = location.substring(PHOENIX_QUERY_SCHEME.length());
        final QuerySchemaParserFunction queryParseFunction =
            new QuerySchemaParserFunction(this.config);
        pair = queryParseFunction.apply(this.selectQuery);
        config.setSelectStatement(this.selectQuery);
        this.config.setSchemaType(SchemaType.QUERY);
      }
      this.tableName = pair.getFirst();
      final String selectedColumns = pair.getSecond();

      if (isEmpty(this.tableName) && isEmpty(this.selectQuery)) {
        printUsage(location);
      }
      this.config.setTableName(this.tableName);
      if (!isEmpty(selectedColumns)) {
        this.config.setSelectColumns(selectedColumns);
      }
    } catch (IllegalArgumentException iae) {
      printUsage(location);
    }
  }
 private List<CompactionRequest> requestCompactionInternal(
     final HRegion r,
     final String why,
     int p,
     List<Pair<CompactionRequest, Store>> requests,
     boolean selectNow)
     throws IOException {
   // not a special compaction request, so make our own list
   List<CompactionRequest> ret = null;
   if (requests == null) {
     ret = selectNow ? new ArrayList<CompactionRequest>(r.getStores().size()) : null;
     for (Store s : r.getStores().values()) {
       CompactionRequest cr = requestCompactionInternal(r, s, why, p, null, selectNow);
       if (selectNow) ret.add(cr);
     }
   } else {
     Preconditions.checkArgument(selectNow); // only system requests have selectNow == false
     ret = new ArrayList<CompactionRequest>(requests.size());
     for (Pair<CompactionRequest, Store> pair : requests) {
       ret.add(requestCompaction(r, pair.getSecond(), why, p, pair.getFirst()));
     }
   }
   return ret;
 }
Exemple #25
0
  /**
   * Convert Titan internal Mutation representation into HBase native commands.
   *
   * @param mutations Mutations to convert into HBase commands.
   * @param putTimestamp The timestamp to use for Put commands.
   * @param delTimestamp The timestamp to use for Delete commands.
   * @return Commands sorted by key converted from Titan internal representation.
   */
  private static Map<ByteBuffer, Pair<Put, Delete>> convertToCommands(
      Map<String, Map<ByteBuffer, KCVMutation>> mutations,
      final long putTimestamp,
      final long delTimestamp) {
    Map<ByteBuffer, Pair<Put, Delete>> commandsPerKey =
        new HashMap<ByteBuffer, Pair<Put, Delete>>();

    for (Map.Entry<String, Map<ByteBuffer, KCVMutation>> entry : mutations.entrySet()) {
      byte[] cfName = entry.getKey().getBytes();

      for (Map.Entry<ByteBuffer, KCVMutation> m : entry.getValue().entrySet()) {
        ByteBuffer key = m.getKey();
        KCVMutation mutation = m.getValue();

        Pair<Put, Delete> commands = commandsPerKey.get(key);

        if (commands == null) {
          commands = new Pair<Put, Delete>();
          commandsPerKey.put(key, commands);
        }

        if (mutation.hasDeletions()) {
          if (commands.getSecond() == null)
            commands.setSecond(new Delete(ByteBufferUtil.getArray(key), delTimestamp, null));

          for (ByteBuffer b : mutation.getDeletions()) {
            commands.getSecond().deleteColumns(cfName, ByteBufferUtil.getArray(b), delTimestamp);
          }
        }

        if (mutation.hasAdditions()) {
          if (commands.getFirst() == null)
            commands.setFirst(new Put(ByteBufferUtil.getArray(key), putTimestamp));

          for (Entry e : mutation.getAdditions()) {
            commands
                .getFirst()
                .add(
                    cfName,
                    ByteBufferUtil.getArray(e.getColumn()),
                    putTimestamp,
                    ByteBufferUtil.getArray(e.getValue()));
          }
        }
      }
    }

    return commandsPerKey;
  }
Exemple #26
0
  public MutationState createTable(CreateTableStatement statement, byte[][] splits)
      throws SQLException {
    PTableType tableType = statement.getTableType();
    boolean isView = tableType == PTableType.VIEW;
    if (isView && !statement.getProps().isEmpty()) {
      throw new SQLExceptionInfo.Builder(SQLExceptionCode.VIEW_WITH_TABLE_CONFIG)
          .build()
          .buildException();
    }
    connection.rollback();
    boolean wasAutoCommit = connection.getAutoCommit();
    try {
      connection.setAutoCommit(false);
      TableName tableNameNode = statement.getTableName();
      String schemaName = tableNameNode.getSchemaName();
      String tableName = tableNameNode.getTableName();

      PrimaryKeyConstraint pkConstraint = statement.getPrimaryKeyConstraint();
      String pkName = null;
      Set<String> pkColumns = Collections.<String>emptySet();
      Iterator<String> pkColumnsIterator = Iterators.emptyIterator();
      if (pkConstraint != null) {
        pkColumns = pkConstraint.getColumnNames();
        pkColumnsIterator = pkColumns.iterator();
        pkName = pkConstraint.getName();
      }

      List<ColumnDef> colDefs = statement.getColumnDefs();
      List<PColumn> columns = Lists.newArrayListWithExpectedSize(colDefs.size());
      PreparedStatement colUpsert = connection.prepareStatement(INSERT_COLUMN);
      int columnOrdinal = 0;
      Map<String, PName> familyNames = Maps.newLinkedHashMap();
      boolean isPK = false;
      for (ColumnDef colDef : colDefs) {
        if (colDef.isPK()) {
          if (isPK) {
            throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_ALREADY_EXISTS)
                .setColumnName(colDef.getColumnDefName().getColumnName().getName())
                .build()
                .buildException();
          }
          isPK = true;
        }
        PColumn column = newColumn(columnOrdinal++, colDef, pkConstraint);
        if (SchemaUtil.isPKColumn(column)) {
          // TODO: remove this constraint?
          if (!pkColumns.isEmpty()
              && !column.getName().getString().equals(pkColumnsIterator.next())) {
            throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_OUT_OF_ORDER)
                .setSchemaName(schemaName)
                .setTableName(tableName)
                .setColumnName(column.getName().getString())
                .build()
                .buildException();
          }
        }
        columns.add(column);
        if (colDef.getDataType() == PDataType.BINARY && colDefs.size() > 1) {
          throw new SQLExceptionInfo.Builder(SQLExceptionCode.BINARY_IN_ROW_KEY)
              .setSchemaName(schemaName)
              .setTableName(tableName)
              .setColumnName(column.getName().getString())
              .build()
              .buildException();
        }
        if (column.getFamilyName() != null) {
          familyNames.put(column.getFamilyName().getString(), column.getFamilyName());
        }
      }
      if (!isPK && pkColumns.isEmpty()) {
        throw new SQLExceptionInfo.Builder(SQLExceptionCode.PRIMARY_KEY_MISSING)
            .setSchemaName(schemaName)
            .setTableName(tableName)
            .build()
            .buildException();
      }

      List<Pair<byte[], Map<String, Object>>> familyPropList =
          Lists.newArrayListWithExpectedSize(familyNames.size());
      Map<String, Object> commonFamilyProps = Collections.emptyMap();
      Map<String, Object> tableProps = Collections.emptyMap();
      if (!statement.getProps().isEmpty()) {
        if (statement.isView()) {
          throw new SQLExceptionInfo.Builder(SQLExceptionCode.VIEW_WITH_PROPERTIES)
              .build()
              .buildException();
        }
        for (String familyName : statement.getProps().keySet()) {
          if (!familyName.equals(QueryConstants.ALL_FAMILY_PROPERTIES_KEY)) {
            if (familyNames.get(familyName) == null) {
              throw new SQLExceptionInfo.Builder(SQLExceptionCode.PROPERTIES_FOR_FAMILY)
                  .setFamilyName(familyName)
                  .build()
                  .buildException();
            }
          }
        }
        commonFamilyProps = Maps.newHashMapWithExpectedSize(statement.getProps().size());
        tableProps = Maps.newHashMapWithExpectedSize(statement.getProps().size());

        Collection<Pair<String, Object>> props =
            statement.getProps().get(QueryConstants.ALL_FAMILY_PROPERTIES_KEY);
        // Somewhat hacky way of determining if property is for HColumnDescriptor or
        // HTableDescriptor
        HColumnDescriptor defaultDescriptor =
            new HColumnDescriptor(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES);
        for (Pair<String, Object> prop : props) {
          if (defaultDescriptor.getValue(prop.getFirst()) != null) {
            commonFamilyProps.put(prop.getFirst(), prop.getSecond());
          } else {
            tableProps.put(prop.getFirst(), prop.getSecond());
          }
        }
      }

      for (PName familyName : familyNames.values()) {
        Collection<Pair<String, Object>> props = statement.getProps().get(familyName.getString());
        if (props.isEmpty()) {
          familyPropList.add(
              new Pair<byte[], Map<String, Object>>(familyName.getBytes(), commonFamilyProps));
        } else {
          Map<String, Object> combinedFamilyProps =
              Maps.newHashMapWithExpectedSize(props.size() + commonFamilyProps.size());
          combinedFamilyProps.putAll(commonFamilyProps);
          for (Pair<String, Object> prop : props) {
            combinedFamilyProps.put(prop.getFirst(), prop.getSecond());
          }
          familyPropList.add(
              new Pair<byte[], Map<String, Object>>(familyName.getBytes(), combinedFamilyProps));
        }
      }

      // Bootstrapping for our SYSTEM.TABLE that creates itself before it exists
      if (tableType == PTableType.SYSTEM) {
        PTable table =
            new PTableImpl(
                new PNameImpl(tableName),
                tableType,
                MetaDataProtocol.MIN_TABLE_TIMESTAMP,
                0,
                QueryConstants.SYSTEM_TABLE_PK_NAME,
                null,
                columns);
        connection.addTable(schemaName, table);
      }

      for (PColumn column : columns) {
        addColumnMutation(schemaName, tableName, column, colUpsert);
      }

      Integer saltBucketNum = (Integer) tableProps.remove(PhoenixDatabaseMetaData.SALT_BUCKETS);
      if (saltBucketNum != null
          && (saltBucketNum <= 0 || saltBucketNum > SaltingUtil.MAX_BUCKET_NUM)) {
        throw new SQLExceptionInfo.Builder(SQLExceptionCode.INVALID_BUCKET_NUM)
            .build()
            .buildException();
      }

      PreparedStatement tableUpsert = connection.prepareStatement(CREATE_TABLE);
      tableUpsert.setString(1, schemaName);
      tableUpsert.setString(2, tableName);
      tableUpsert.setString(3, tableType.getSerializedValue());
      tableUpsert.setInt(4, 0);
      tableUpsert.setInt(5, columnOrdinal);
      if (saltBucketNum != null) {
        tableUpsert.setInt(6, saltBucketNum);
      } else {
        tableUpsert.setNull(6, Types.INTEGER);
      }
      tableUpsert.setString(7, pkName);
      tableUpsert.execute();

      final List<Mutation> tableMetaData = connection.getMutationState().toMutations();
      connection.rollback();

      MetaDataMutationResult result =
          connection
              .getQueryServices()
              .createTable(tableMetaData, isView, tableProps, familyPropList, splits);
      MutationCode code = result.getMutationCode();
      switch (code) {
        case TABLE_ALREADY_EXISTS:
          connection.addTable(schemaName, result.getTable());
          if (!statement.ifNotExists()) {
            throw new TableAlreadyExistsException(schemaName, tableName);
          }
          break;
        case NEWER_TABLE_FOUND:
          // TODO: add table if in result?
          throw new NewerTableAlreadyExistsException(schemaName, tableName);
        case UNALLOWED_TABLE_MUTATION:
          throw new SQLExceptionInfo.Builder(SQLExceptionCode.CANNOT_MUTATE_TABLE)
              .setSchemaName(schemaName)
              .setTableName(tableName)
              .build()
              .buildException();
        default:
          PTable table =
              new PTableImpl(
                  new PNameImpl(tableName),
                  tableType,
                  result.getMutationTime(),
                  0,
                  pkName,
                  saltBucketNum,
                  columns);
          connection.addTable(schemaName, table);
          if (tableType == PTableType.USER) {
            connection.setAutoCommit(true);
            // Delete everything in the column. You'll still be able to do queries at earlier
            // timestamps
            Long scn = connection.getSCN();
            long ts = (scn == null ? result.getMutationTime() : scn);
            PSchema schema =
                new PSchemaImpl(
                    schemaName,
                    ImmutableMap.<String, PTable>of(table.getName().getString(), table));
            TableRef tableRef = new TableRef(null, table, schema, ts);
            byte[] emptyCF = SchemaUtil.getEmptyColumnFamily(table.getColumnFamilies());
            MutationPlan plan =
                new PostDDLCompiler(connection).compile(tableRef, emptyCF, null, ts);
            return connection.getQueryServices().updateData(plan);
          }
          break;
      }
      return new MutationState(0, connection);
    } finally {
      connection.setAutoCommit(wasAutoCommit);
    }
  }
 private void handleScvf(SingleColumnValueFilter scvf) {
   ValuePartition vp = null;
   if (scvf instanceof SingleColumnValuePartitionFilter) {
     vp = ((SingleColumnValuePartitionFilter) scvf).getValuePartition();
   }
   Column column = new Column(scvf.getFamily(), scvf.getQualifier(), vp);
   Pair<Value, Value> pair = colWithOperators.get(column);
   if (pair == null) {
     pair = new Pair<Value, Value>();
     // The first operator should be set here
     pair.setFirst(new Value(scvf.getOperator(), scvf.getComparator().getValue(), scvf));
     colWithOperators.put(column, pair);
   } else {
     if (pair.getFirst() != null && pair.getSecond() == null) {
       // TODO As Anoop said we may have to check the Value type also..
       // We can not compare and validate this way. btw "a" and "K".
       // Only in case of Numeric col type we can have this check.
       byte[] curBoundValue = scvf.getComparator().getValue();
       byte[] prevBoundValue = pair.getFirst().getValue();
       int result = Bytes.compareTo(prevBoundValue, curBoundValue);
       CompareOp curBoundOperator = scvf.getOperator();
       CompareOp prevBoundOperator = pair.getFirst().getOperator();
       switch (curBoundOperator) {
         case GREATER:
         case GREATER_OR_EQUAL:
           if (prevBoundOperator == CompareOp.GREATER
               || prevBoundOperator == CompareOp.GREATER_OR_EQUAL) {
             LOG.warn("Wrong usage.  It should be < > || > <.  Cannot be > >");
             if (result > 1) {
               pair.setFirst(new Value(curBoundOperator, curBoundValue, scvf));
             }
             pair.setSecond(null);
           } else if (prevBoundOperator == CompareOp.LESS
               || prevBoundOperator == CompareOp.LESS_OR_EQUAL) {
             if (result < 1) {
               LOG.warn("Possible wrong usage as there cannot be a value < 10 and  > 20");
               pair.setFirst(null);
               pair.setSecond(null);
             } else {
               pair.setSecond(new Value(curBoundOperator, curBoundValue, scvf));
             }
           } else if (prevBoundOperator == CompareOp.EQUAL) {
             LOG.warn("Use the equal operator and ignore the current one");
             pair.setSecond(null);
           }
           break;
         case LESS:
         case LESS_OR_EQUAL:
           if (prevBoundOperator == CompareOp.LESS
               || prevBoundOperator == CompareOp.LESS_OR_EQUAL) {
             LOG.warn("Wrong usage.  It should be < > || > <.  Cannot be > >");
             if (result < 1) {
               pair.setFirst(new Value(curBoundOperator, curBoundValue, scvf));
             }
             pair.setSecond(null);
           } else if (prevBoundOperator == CompareOp.GREATER
               || prevBoundOperator == CompareOp.GREATER_OR_EQUAL) {
             if (result > 1) {
               LOG.warn("Possible wrong usage as there cannot be a value < 10 and  > 20");
               pair.setFirst(null);
               pair.setSecond(null);
             } else {
               pair.setSecond(new Value(curBoundOperator, curBoundValue, scvf));
             }
           } else if (prevBoundOperator == CompareOp.EQUAL) {
             LOG.warn("Use the EQUAL operator only  and ignore the current one.");
             pair.setSecond(null);
           }
           break;
         case EQUAL:
           // For equal condition give priority to equals only..
           // If the prevOperator is also == and the current is also ==
           // take the second one.(Currently)
           if (prevBoundOperator == CompareOp.LESS
               || prevBoundOperator == CompareOp.LESS_OR_EQUAL
               || prevBoundOperator == CompareOp.EQUAL
               || prevBoundOperator == CompareOp.GREATER
               || prevBoundOperator == CompareOp.GREATER_OR_EQUAL) {
             pair.setFirst(new Value(curBoundOperator, curBoundValue, scvf));
             pair.setSecond(null);
           }
           break;
         case NOT_EQUAL:
         case NO_OP:
           // Need to check this
           break;
       }
     } else {
       LOG.warn(
           "Am getting an extra comparison coming for the same col family."
               + "I cannot have 3 conditions on the same column");
       pair.setFirst(null);
       pair.setSecond(null);
     }
   }
 }
Exemple #28
0
 @Override
 public void asSubByteBuffer(int offset, int length, Pair<ByteBuffer, Integer> pair) {
   // Just return the single BB that is available
   pair.setFirst(this.buf);
   pair.setSecond(offset);
 }
Exemple #29
0
  @Override
  public Integer apply(Pair<RequestHeader, Message> headerAndParam) {
    RequestHeader header = headerAndParam.getFirst();
    String methodName = header.getMethodName();
    Integer priorityByAnnotation = annotatedQos.get(methodName);
    if (priorityByAnnotation != null) {
      return priorityByAnnotation;
    }

    Message param = headerAndParam.getSecond();
    if (param == null) {
      return HConstants.NORMAL_QOS;
    }
    if (methodName.equalsIgnoreCase("multi") && param instanceof MultiRequest) {
      // The multi call has its priority set in the header.  All calls should work this way but
      // only this one has been converted so far.  No priority == NORMAL_QOS.
      return header.hasPriority() ? header.getPriority() : HConstants.NORMAL_QOS;
    }
    String cls = param.getClass().getName();
    Class<? extends Message> rpcArgClass = argumentToClassMap.get(cls);
    RegionSpecifier regionSpecifier = null;
    // check whether the request has reference to meta region or now.
    try {
      // Check if the param has a region specifier; the pb methods are hasRegion and getRegion if
      // hasRegion returns true.  Not all listed methods have region specifier each time.  For
      // example, the ScanRequest has it on setup but thereafter relies on the scannerid rather than
      // send the region over every time.
      Method hasRegion = methodMap.get("hasRegion").get(rpcArgClass);
      if (hasRegion != null && (Boolean) hasRegion.invoke(param, (Object[]) null)) {
        Method getRegion = methodMap.get("getRegion").get(rpcArgClass);
        regionSpecifier = (RegionSpecifier) getRegion.invoke(param, (Object[]) null);
        HRegion region = hRegionServer.getRegion(regionSpecifier);
        if (region.getRegionInfo().isMetaTable()) {
          if (LOG.isTraceEnabled()) {
            LOG.trace("High priority because region=" + region.getRegionNameAsString());
          }
          return HConstants.HIGH_QOS;
        }
      }
    } catch (Exception ex) {
      // Not good throwing an exception out of here, a runtime anyways.  Let the query go into the
      // server and have it throw the exception if still an issue.  Just mark it normal priority.
      if (LOG.isTraceEnabled()) LOG.trace("Marking normal priority after getting exception=" + ex);
      return HConstants.NORMAL_QOS;
    }

    if (methodName.equals("scan")) { // scanner methods...
      ScanRequest request = (ScanRequest) param;
      if (!request.hasScannerId()) {
        return HConstants.NORMAL_QOS;
      }
      RegionScanner scanner = hRegionServer.getScanner(request.getScannerId());
      if (scanner != null && scanner.getRegionInfo().isMetaRegion()) {
        if (LOG.isTraceEnabled()) {
          // Scanner requests are small in size so TextFormat version should not overwhelm log.
          LOG.trace("High priority scanner request " + TextFormat.shortDebugString(request));
        }
        return HConstants.HIGH_QOS;
      }
    }
    return HConstants.NORMAL_QOS;
  }
  @Override
  public Result[] call(int timeout) throws IOException {
    // If the active replica callable was closed somewhere, invoke the RPC to
    // really close it. In the case of regular scanners, this applies. We make couple
    // of RPCs to a RegionServer, and when that region is exhausted, we set
    // the closed flag. Then an RPC is required to actually close the scanner.
    if (currentScannerCallable != null && currentScannerCallable.closed) {
      // For closing we target that exact scanner (and not do replica fallback like in
      // the case of normal reads)
      if (LOG.isDebugEnabled()) {
        LOG.debug("Closing scanner " + currentScannerCallable.scannerId);
      }
      Result[] r = currentScannerCallable.call(timeout);
      currentScannerCallable = null;
      return r;
    }
    // We need to do the following:
    // 1. When a scan goes out to a certain replica (default or not), we need to
    //   continue to hit that until there is a failure. So store the last successfully invoked
    //   replica
    // 2. We should close the "losing" scanners (scanners other than the ones we hear back
    //   from first)
    //
    RegionLocations rl =
        RpcRetryingCallerWithReadReplicas.getRegionLocations(
            true,
            RegionReplicaUtil.DEFAULT_REPLICA_ID,
            cConnection,
            tableName,
            currentScannerCallable.getRow());

    // allocate a boundedcompletion pool of some multiple of number of replicas.
    // We want to accomodate some RPCs for redundant replica scans (but are still in progress)
    BoundedCompletionService<Pair<Result[], ScannerCallable>> cs =
        new BoundedCompletionService<Pair<Result[], ScannerCallable>>(pool, rl.size() * 5);

    List<ExecutionException> exceptions = null;
    int submitted = 0, completed = 0;
    AtomicBoolean done = new AtomicBoolean(false);
    replicaSwitched.set(false);
    // submit call for the primary replica.
    submitted += addCallsForCurrentReplica(cs, rl);
    try {
      // wait for the timeout to see whether the primary responds back
      Future<Pair<Result[], ScannerCallable>> f =
          cs.poll(timeBeforeReplicas, TimeUnit.MICROSECONDS); // Yes, microseconds
      if (f != null) {
        Pair<Result[], ScannerCallable> r = f.get();
        if (r != null && r.getSecond() != null) {
          updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool);
        }
        return r == null ? null : r.getFirst(); // great we got a response
      }
    } catch (ExecutionException e) {
      // the primary call failed with RetriesExhaustedException or DoNotRetryIOException
      // but the secondaries might still succeed. Continue on the replica RPCs.
      exceptions = new ArrayList<ExecutionException>(rl.size());
      exceptions.add(e);
      completed++;
    } catch (CancellationException e) {
      throw new InterruptedIOException(e.getMessage());
    } catch (InterruptedException e) {
      throw new InterruptedIOException(e.getMessage());
    }
    // submit call for the all of the secondaries at once
    // TODO: this may be an overkill for large region replication
    submitted += addCallsForOtherReplicas(cs, rl, 0, rl.size() - 1);
    try {
      while (completed < submitted) {
        try {
          Future<Pair<Result[], ScannerCallable>> f = cs.take();
          Pair<Result[], ScannerCallable> r = f.get();
          if (r != null && r.getSecond() != null) {
            updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, pool);
          }
          return r == null ? null : r.getFirst(); // great we got an answer
        } catch (ExecutionException e) {
          // if not cancel or interrupt, wait until all RPC's are done
          // one of the tasks failed. Save the exception for later.
          if (exceptions == null) exceptions = new ArrayList<ExecutionException>(rl.size());
          exceptions.add(e);
          completed++;
        }
      }
    } catch (CancellationException e) {
      throw new InterruptedIOException(e.getMessage());
    } catch (InterruptedException e) {
      throw new InterruptedIOException(e.getMessage());
    } finally {
      // We get there because we were interrupted or because one or more of the
      // calls succeeded or failed. In all case, we stop all our tasks.
      cs.cancelAll(true);
    }

    if (exceptions != null && !exceptions.isEmpty()) {
      RpcRetryingCallerWithReadReplicas.throwEnrichedException(
          exceptions.get(0), retries); // just rethrow the first exception for now.
    }
    return null; // unreachable
  }