private void binMutations(
        MutationSet mutationsToProcess,
        Map<String, TabletServerMutations<Mutation>> binnedMutations) {
      try {
        Set<Entry<String, List<Mutation>>> es = mutationsToProcess.getMutations().entrySet();
        for (Entry<String, List<Mutation>> entry : es) {
          TabletLocator locator = getLocator(entry.getKey());

          String table = entry.getKey();
          List<Mutation> tableMutations = entry.getValue();

          if (tableMutations != null) {
            ArrayList<Mutation> tableFailures = new ArrayList<Mutation>();
            locator.binMutations(credentials, tableMutations, binnedMutations, tableFailures);

            if (tableFailures.size() > 0) {
              failedMutations.add(table, tableFailures);

              if (tableFailures.size() == tableMutations.size())
                if (!Tables.exists(instance, entry.getKey()))
                  throw new TableDeletedException(entry.getKey());
                else if (Tables.getTableState(instance, table) == TableState.OFFLINE)
                  throw new TableOfflineException(instance, entry.getKey());
            }
          }
        }
        return;
      } catch (AccumuloServerException ase) {
        updateServerErrors(ase.getServer(), ase);
      } catch (AccumuloException ae) {
        // assume an IOError communicating with !METADATA tablet
        failedMutations.add(mutationsToProcess);
      } catch (AccumuloSecurityException e) {
        updateAuthorizationFailures(
            Collections.singletonMap(
                new KeyExtent(new Text(MetadataTable.ID), null, null),
                SecurityErrorCode.valueOf(e.getSecurityErrorCode().name())));
      } catch (TableDeletedException e) {
        updateUnknownErrors(e.getMessage(), e);
      } catch (TableOfflineException e) {
        updateUnknownErrors(e.getMessage(), e);
      } catch (TableNotFoundException e) {
        updateUnknownErrors(e.getMessage(), e);
      }

      // an error ocurred
      binnedMutations.clear();
    }
  private void updateAuthorizationFailures(
      Map<KeyExtent, SecurityErrorCode> authorizationFailures) {
    if (authorizationFailures.size() > 0) {

      // was a table deleted?
      HashSet<String> tableIds = new HashSet<String>();
      for (KeyExtent ke : authorizationFailures.keySet()) tableIds.add(ke.getTableId().toString());

      Tables.clearCache(instance);
      for (String tableId : tableIds)
        if (!Tables.exists(instance, tableId)) throw new TableDeletedException(tableId);

      synchronized (this) {
        somethingFailed = true;
        mergeAuthorizationFailures(this.authorizationFailures, authorizationFailures);
        this.notifyAll();
      }
    }
  }
  /**
   * Gets the splits of the tables that have been set on the job by reading the metadata table for
   * the specified ranges.
   *
   * @return the splits from the tables based on the ranges.
   * @throws java.io.IOException if a table set on the job doesn't exist or an error occurs
   *     initializing the tablet locator
   */
  @Override
  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Level logLevel = getLogLevel(job);
    log.setLevel(logLevel);
    validateOptions(job);

    Random random = new Random();
    LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
    Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
    for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
      String tableName = tableConfigEntry.getKey();
      InputTableConfig tableConfig = tableConfigEntry.getValue();

      Instance instance = getInstance(job);
      String tableId;
      // resolve table name to id once, and use id from this point forward
      if (DeprecationUtil.isMockInstance(instance)) {
        tableId = "";
      } else {
        try {
          tableId = Tables.getTableId(instance, tableName);
        } catch (TableNotFoundException e) {
          throw new IOException(e);
        }
      }

      Authorizations auths = getScanAuthorizations(job);
      String principal = getPrincipal(job);
      AuthenticationToken token = getAuthenticationToken(job);

      boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
      boolean supportBatchScan =
          !(tableConfig.isOfflineScan()
              || tableConfig.shouldUseIsolatedScanners()
              || tableConfig.shouldUseLocalIterators());
      if (batchScan && !supportBatchScan)
        throw new IllegalArgumentException(
            "BatchScanner optimization not available for offline scan, isolated, or local iterators");

      boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
      if (batchScan && !autoAdjust)
        throw new IllegalArgumentException(
            "AutoAdjustRanges must be enabled when using BatchScanner optimization");

      List<Range> ranges =
          autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
      if (ranges.isEmpty()) {
        ranges = new ArrayList<Range>(1);
        ranges.add(new Range());
      }

      // get the metadata information for these ranges
      Map<String, Map<KeyExtent, List<Range>>> binnedRanges =
          new HashMap<String, Map<KeyExtent, List<Range>>>();
      TabletLocator tl;
      try {
        if (tableConfig.isOfflineScan()) {
          binnedRanges = binOfflineTable(job, tableId, ranges);
          while (binnedRanges == null) {
            // Some tablets were still online, try again
            // sleep randomly between 100 and 200 ms
            sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
            binnedRanges = binOfflineTable(job, tableId, ranges);
          }
        } else {
          tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
          // its possible that the cache could contain complete, but old information about a tables
          // tablets... so clear it
          tl.invalidateCache();

          ClientContext context =
              new ClientContext(
                  getInstance(job),
                  new Credentials(getPrincipal(job), getAuthenticationToken(job)),
                  getClientConfiguration(job));
          while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
            if (!DeprecationUtil.isMockInstance(instance)) {
              if (!Tables.exists(instance, tableId)) throw new TableDeletedException(tableId);
              if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
                throw new TableOfflineException(instance, tableId);
            }
            binnedRanges.clear();
            log.warn("Unable to locate bins for specified ranges. Retrying.");
            // sleep randomly between 100 and 200 ms
            sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
            tl.invalidateCache();
          }
        }
      } catch (Exception e) {
        throw new IOException(e);
      }

      HashMap<Range, ArrayList<String>> splitsToAdd = null;

      if (!autoAdjust) splitsToAdd = new HashMap<Range, ArrayList<String>>();

      HashMap<String, String> hostNameCache = new HashMap<String, String>();
      for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
        String ip = tserverBin.getKey().split(":", 2)[0];
        String location = hostNameCache.get(ip);
        if (location == null) {
          InetAddress inetAddress = InetAddress.getByName(ip);
          location = inetAddress.getCanonicalHostName();
          hostNameCache.put(ip, location);
        }
        for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
          Range ke = extentRanges.getKey().toDataRange();
          if (batchScan) {
            // group ranges by tablet to be read by a BatchScanner
            ArrayList<Range> clippedRanges = new ArrayList<Range>();
            for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));

            BatchInputSplit split =
                new BatchInputSplit(tableName, tableId, clippedRanges, new String[] {location});
            SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);

            splits.add(split);
          } else {
            // not grouping by tablet
            for (Range r : extentRanges.getValue()) {
              if (autoAdjust) {
                // divide ranges into smaller ranges, based on the tablets
                RangeInputSplit split =
                    new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
                SplitUtils.updateSplit(
                    split, instance, tableConfig, principal, token, auths, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                splits.add(split);
              } else {
                // don't divide ranges
                ArrayList<String> locations = splitsToAdd.get(r);
                if (locations == null) locations = new ArrayList<String>(1);
                locations.add(location);
                splitsToAdd.put(r, locations);
              }
            }
          }
        }
      }

      if (!autoAdjust)
        for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
          RangeInputSplit split =
              new RangeInputSplit(
                  tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
          SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
          split.setOffline(tableConfig.isOfflineScan());
          split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
          split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

          splits.add(split);
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
  }
Ejemplo n.º 4
0
  public static List<KeyValue> scan(ClientContext context, ScanState scanState, int timeOut)
      throws ScanTimedOutException, AccumuloException, AccumuloSecurityException,
          TableNotFoundException {
    TabletLocation loc = null;
    Instance instance = context.getInstance();
    long startTime = System.currentTimeMillis();
    String lastError = null;
    String error = null;
    int tooManyFilesCount = 0;
    long sleepMillis = 100;

    List<KeyValue> results = null;

    Span span = Trace.start("scan");
    try {
      while (results == null && !scanState.finished) {
        if (Thread.currentThread().isInterrupted()) {
          throw new AccumuloException("Thread interrupted");
        }

        if ((System.currentTimeMillis() - startTime) / 1000.0 > timeOut)
          throw new ScanTimedOutException();

        while (loc == null) {
          long currentTime = System.currentTimeMillis();
          if ((currentTime - startTime) / 1000.0 > timeOut) throw new ScanTimedOutException();

          Span locateSpan = Trace.start("scan:locateTablet");
          try {
            loc =
                TabletLocator.getLocator(context, scanState.tableId)
                    .locateTablet(context, scanState.startRow, scanState.skipStartRow, false);

            if (loc == null) {
              if (!Tables.exists(instance, scanState.tableId))
                throw new TableDeletedException(scanState.tableId);
              else if (Tables.getTableState(instance, scanState.tableId) == TableState.OFFLINE)
                throw new TableOfflineException(instance, scanState.tableId);

              error =
                  "Failed to locate tablet for table : "
                      + scanState.tableId
                      + " row : "
                      + scanState.startRow;
              if (!error.equals(lastError)) log.debug("{}", error);
              else if (log.isTraceEnabled()) log.trace("{}", error);
              lastError = error;
              sleepMillis = pause(sleepMillis);
            } else {
              // when a tablet splits we do want to continue scanning the low child
              // of the split if we are already passed it
              Range dataRange = loc.tablet_extent.toDataRange();

              if (scanState.range.getStartKey() != null
                  && dataRange.afterEndKey(scanState.range.getStartKey())) {
                // go to the next tablet
                scanState.startRow = loc.tablet_extent.getEndRow();
                scanState.skipStartRow = true;
                loc = null;
              } else if (scanState.range.getEndKey() != null
                  && dataRange.beforeStartKey(scanState.range.getEndKey())) {
                // should not happen
                throw new RuntimeException(
                    "Unexpected tablet, extent : "
                        + loc.tablet_extent
                        + "  range : "
                        + scanState.range
                        + " startRow : "
                        + scanState.startRow);
              }
            }
          } catch (AccumuloServerException e) {
            log.debug("Scan failed, server side exception : {}", e.getMessage());
            throw e;
          } catch (AccumuloException e) {
            error = "exception from tablet loc " + e.getMessage();
            if (!error.equals(lastError)) log.debug("{}", error);
            else if (log.isTraceEnabled()) log.trace("{}", error);

            lastError = error;
            sleepMillis = pause(sleepMillis);
          } finally {
            locateSpan.stop();
          }
        }

        Span scanLocation = Trace.start("scan:location");
        scanLocation.data("tserver", loc.tablet_location);
        try {
          results = scan(loc, scanState, context);
        } catch (AccumuloSecurityException e) {
          Tables.clearCache(instance);
          if (!Tables.exists(instance, scanState.tableId))
            throw new TableDeletedException(scanState.tableId);
          e.setTableInfo(Tables.getPrintableTableInfoFromId(instance, scanState.tableId));
          throw e;
        } catch (TApplicationException tae) {
          throw new AccumuloServerException(loc.tablet_location, tae);
        } catch (TSampleNotPresentException tsnpe) {
          String message =
              "Table "
                  + Tables.getPrintableTableInfoFromId(instance, scanState.tableId)
                  + " does not have sampling configured or built";
          throw new SampleNotPresentException(message, tsnpe);
        } catch (NotServingTabletException e) {
          error = "Scan failed, not serving tablet " + loc;
          if (!error.equals(lastError)) log.debug("{}", error);
          else if (log.isTraceEnabled()) log.trace("{}", error);
          lastError = error;

          TabletLocator.getLocator(context, scanState.tableId).invalidateCache(loc.tablet_extent);
          loc = null;

          // no need to try the current scan id somewhere else
          scanState.scanID = null;

          if (scanState.isolated) throw new IsolationException();

          sleepMillis = pause(sleepMillis);
        } catch (NoSuchScanIDException e) {
          error = "Scan failed, no such scan id " + scanState.scanID + " " + loc;
          if (!error.equals(lastError)) log.debug("{}", error);
          else if (log.isTraceEnabled()) log.trace("{}", error);
          lastError = error;

          if (scanState.isolated) throw new IsolationException();

          scanState.scanID = null;
        } catch (TooManyFilesException e) {
          error = "Tablet has too many files " + loc + " retrying...";
          if (!error.equals(lastError)) {
            log.debug("{}", error);
            tooManyFilesCount = 0;
          } else {
            tooManyFilesCount++;
            if (tooManyFilesCount == 300) log.warn("{}", error);
            else if (log.isTraceEnabled()) log.trace("{}", error);
          }
          lastError = error;

          // not sure what state the scan session on the server side is
          // in after this occurs, so lets be cautious and start a new
          // scan session
          scanState.scanID = null;

          if (scanState.isolated) throw new IsolationException();

          sleepMillis = pause(sleepMillis);
        } catch (TException e) {
          TabletLocator.getLocator(context, scanState.tableId)
              .invalidateCache(context.getInstance(), loc.tablet_location);
          error =
              "Scan failed, thrift error "
                  + e.getClass().getName()
                  + "  "
                  + e.getMessage()
                  + " "
                  + loc;
          if (!error.equals(lastError)) log.debug("{}", error);
          else if (log.isTraceEnabled()) log.trace("{}", error);
          lastError = error;
          loc = null;

          // do not want to continue using the same scan id, if a timeout occurred could cause a
          // batch to be skipped
          // because a thread on the server side may still be processing the timed out continue scan
          scanState.scanID = null;

          if (scanState.isolated) throw new IsolationException();

          sleepMillis = pause(sleepMillis);
        } finally {
          scanLocation.stop();
        }
      }

      if (results != null && results.size() == 0 && scanState.finished) {
        results = null;
      }

      return results;
    } catch (InterruptedException ex) {
      throw new AccumuloException(ex);
    } finally {
      span.stop();
    }
  }