/**
  * Check whether a configuration is fully configured to be used with an Accumulo {@link
  * InputFormat}.
  *
  * @param job the Hadoop context for the configured job
  * @throws java.io.IOException if the context is improperly configured
  * @since 1.5.0
  */
 protected static void validateOptions(JobConf job) throws IOException {
   final Instance inst = InputConfigurator.validateInstance(CLASS, job);
   String principal = InputConfigurator.getPrincipal(CLASS, job);
   AuthenticationToken token = InputConfigurator.getAuthenticationToken(CLASS, job);
   // In secure mode, we need to convert the DelegationTokenStub into a real DelegationToken
   token = ConfiguratorBase.unwrapAuthenticationToken(job, token);
   Connector conn;
   try {
     conn = inst.getConnector(principal, token);
   } catch (Exception e) {
     throw new IOException(e);
   }
   InputConfigurator.validatePermissions(CLASS, job, conn);
 }
  /**
   * Sets the connector information needed to communicate with Accumulo in this job.
   *
   * <p><b>WARNING:</b> Some tokens, when serialized, divulge sensitive information in the
   * configuration as a means to pass the token to MapReduce tasks. This information is BASE64
   * encoded to provide a charset safe conversion to a string, but this conversion is not intended
   * to be secure. {@link PasswordToken} is one example that is insecure in this way; however {@link
   * DelegationToken}s, acquired using {@link
   * SecurityOperations#getDelegationToken(DelegationTokenConfig)}, is not subject to this concern.
   *
   * @param job the Hadoop job instance to be configured
   * @param principal a valid Accumulo user name (user must have Table.CREATE permission)
   * @param token the user's password
   * @since 1.5.0
   */
  public static void setConnectorInfo(JobConf job, String principal, AuthenticationToken token)
      throws AccumuloSecurityException {
    if (token instanceof KerberosToken) {
      log.info("Received KerberosToken, attempting to fetch DelegationToken");
      try {
        Instance instance = getInstance(job);
        Connector conn = instance.getConnector(principal, token);
        token = conn.securityOperations().getDelegationToken(new DelegationTokenConfig());
      } catch (Exception e) {
        log.warn(
            "Failed to automatically obtain DelegationToken, Mappers/Reducers will likely fail to communicate with Accumulo",
            e);
      }
    }
    // DelegationTokens can be passed securely from user to task without serializing insecurely in
    // the configuration
    if (token instanceof DelegationTokenImpl) {
      DelegationTokenImpl delegationToken = (DelegationTokenImpl) token;

      // Convert it into a Hadoop Token
      AuthenticationTokenIdentifier identifier = delegationToken.getIdentifier();
      Token<AuthenticationTokenIdentifier> hadoopToken =
          new Token<>(
              identifier.getBytes(),
              delegationToken.getPassword(),
              identifier.getKind(),
              delegationToken.getServiceName());

      // Add the Hadoop Token to the Job so it gets serialized and passed along.
      job.getCredentials().addToken(hadoopToken.getService(), hadoopToken);
    }

    InputConfigurator.setConnectorInfo(CLASS, job, principal, token);
  }
  Map<String, Map<KeyExtent, List<Range>>> binOfflineTable(
      JobConf job, String tableId, List<Range> ranges)
      throws TableNotFoundException, AccumuloException, AccumuloSecurityException {

    Instance instance = getInstance(job);
    Connector conn = instance.getConnector(getPrincipal(job), getAuthenticationToken(job));

    return InputConfigurator.binOffline(tableId, ranges, instance, conn);
  }
 /**
  * Configures a {@link org.apache.accumulo.core.client.mock.MockInstance} for this job.
  *
  * @param job the Hadoop job instance to be configured
  * @param instanceName the Accumulo instance name
  * @since 1.5.0
  * @deprecated since 1.8.0; use MiniAccumuloCluster or a standard mock framework
  */
 @Deprecated
 public static void setMockInstance(JobConf job, String instanceName) {
   InputConfigurator.setMockInstance(CLASS, job, instanceName);
 }
 /**
  * Initializes an Accumulo {@link org.apache.accumulo.core.client.Instance} based on the
  * configuration.
  *
  * @param job the Hadoop context for the configured job
  * @return an Accumulo instance
  * @since 1.5.0
  * @see #setZooKeeperInstance(JobConf, ClientConfiguration)
  */
 protected static Instance getInstance(JobConf job) {
   return InputConfigurator.getInstance(CLASS, job);
 }
  /**
   * Gets the splits of the tables that have been set on the job by reading the metadata table for
   * the specified ranges.
   *
   * @return the splits from the tables based on the ranges.
   * @throws java.io.IOException if a table set on the job doesn't exist or an error occurs
   *     initializing the tablet locator
   */
  @Override
  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Level logLevel = getLogLevel(job);
    log.setLevel(logLevel);
    validateOptions(job);

    Random random = new Random();
    LinkedList<InputSplit> splits = new LinkedList<InputSplit>();
    Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job);
    for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) {
      String tableName = tableConfigEntry.getKey();
      InputTableConfig tableConfig = tableConfigEntry.getValue();

      Instance instance = getInstance(job);
      String tableId;
      // resolve table name to id once, and use id from this point forward
      if (DeprecationUtil.isMockInstance(instance)) {
        tableId = "";
      } else {
        try {
          tableId = Tables.getTableId(instance, tableName);
        } catch (TableNotFoundException e) {
          throw new IOException(e);
        }
      }

      Authorizations auths = getScanAuthorizations(job);
      String principal = getPrincipal(job);
      AuthenticationToken token = getAuthenticationToken(job);

      boolean batchScan = InputConfigurator.isBatchScan(CLASS, job);
      boolean supportBatchScan =
          !(tableConfig.isOfflineScan()
              || tableConfig.shouldUseIsolatedScanners()
              || tableConfig.shouldUseLocalIterators());
      if (batchScan && !supportBatchScan)
        throw new IllegalArgumentException(
            "BatchScanner optimization not available for offline scan, isolated, or local iterators");

      boolean autoAdjust = tableConfig.shouldAutoAdjustRanges();
      if (batchScan && !autoAdjust)
        throw new IllegalArgumentException(
            "AutoAdjustRanges must be enabled when using BatchScanner optimization");

      List<Range> ranges =
          autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges();
      if (ranges.isEmpty()) {
        ranges = new ArrayList<Range>(1);
        ranges.add(new Range());
      }

      // get the metadata information for these ranges
      Map<String, Map<KeyExtent, List<Range>>> binnedRanges =
          new HashMap<String, Map<KeyExtent, List<Range>>>();
      TabletLocator tl;
      try {
        if (tableConfig.isOfflineScan()) {
          binnedRanges = binOfflineTable(job, tableId, ranges);
          while (binnedRanges == null) {
            // Some tablets were still online, try again
            // sleep randomly between 100 and 200 ms
            sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
            binnedRanges = binOfflineTable(job, tableId, ranges);
          }
        } else {
          tl = InputConfigurator.getTabletLocator(CLASS, job, tableId);
          // its possible that the cache could contain complete, but old information about a tables
          // tablets... so clear it
          tl.invalidateCache();

          ClientContext context =
              new ClientContext(
                  getInstance(job),
                  new Credentials(getPrincipal(job), getAuthenticationToken(job)),
                  getClientConfiguration(job));
          while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) {
            if (!DeprecationUtil.isMockInstance(instance)) {
              if (!Tables.exists(instance, tableId)) throw new TableDeletedException(tableId);
              if (Tables.getTableState(instance, tableId) == TableState.OFFLINE)
                throw new TableOfflineException(instance, tableId);
            }
            binnedRanges.clear();
            log.warn("Unable to locate bins for specified ranges. Retrying.");
            // sleep randomly between 100 and 200 ms
            sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS);
            tl.invalidateCache();
          }
        }
      } catch (Exception e) {
        throw new IOException(e);
      }

      HashMap<Range, ArrayList<String>> splitsToAdd = null;

      if (!autoAdjust) splitsToAdd = new HashMap<Range, ArrayList<String>>();

      HashMap<String, String> hostNameCache = new HashMap<String, String>();
      for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) {
        String ip = tserverBin.getKey().split(":", 2)[0];
        String location = hostNameCache.get(ip);
        if (location == null) {
          InetAddress inetAddress = InetAddress.getByName(ip);
          location = inetAddress.getCanonicalHostName();
          hostNameCache.put(ip, location);
        }
        for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) {
          Range ke = extentRanges.getKey().toDataRange();
          if (batchScan) {
            // group ranges by tablet to be read by a BatchScanner
            ArrayList<Range> clippedRanges = new ArrayList<Range>();
            for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r));

            BatchInputSplit split =
                new BatchInputSplit(tableName, tableId, clippedRanges, new String[] {location});
            SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);

            splits.add(split);
          } else {
            // not grouping by tablet
            for (Range r : extentRanges.getValue()) {
              if (autoAdjust) {
                // divide ranges into smaller ranges, based on the tablets
                RangeInputSplit split =
                    new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location});
                SplitUtils.updateSplit(
                    split, instance, tableConfig, principal, token, auths, logLevel);
                split.setOffline(tableConfig.isOfflineScan());
                split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
                split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

                splits.add(split);
              } else {
                // don't divide ranges
                ArrayList<String> locations = splitsToAdd.get(r);
                if (locations == null) locations = new ArrayList<String>(1);
                locations.add(location);
                splitsToAdd.put(r, locations);
              }
            }
          }
        }
      }

      if (!autoAdjust)
        for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) {
          RangeInputSplit split =
              new RangeInputSplit(
                  tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0]));
          SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel);
          split.setOffline(tableConfig.isOfflineScan());
          split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners());
          split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators());

          splits.add(split);
        }
    }

    return splits.toArray(new InputSplit[splits.size()]);
  }
 /**
  * Initializes an Accumulo {@link org.apache.accumulo.core.client.Instance} based on the
  * configuration.
  *
  * @param context the Hadoop context for the configured job
  * @return an Accumulo instance
  * @since 1.5.0
  * @see #setZooKeeperInstance(Job, ClientConfiguration)
  */
 protected static Instance getInstance(JobContext context) {
   return InputConfigurator.getInstance(CLASS, context.getConfiguration());
 }
 /**
  * Sets the log level for this job.
  *
  * @param job the Hadoop job instance to be configured
  * @param level the logging level
  * @since 1.5.0
  */
 public static void setLogLevel(Job job, Level level) {
   InputConfigurator.setLogLevel(CLASS, job.getConfiguration(), level);
 }
 /**
  * Fetches a {@link InputTableConfig} that has been set on the configuration for a specific table.
  *
  * <p>null is returned in the event that the table doesn't exist.
  *
  * @param job the Hadoop job instance to be configured
  * @param tableName the table name for which to grab the config object
  * @return the {@link InputTableConfig} for the given table
  * @since 1.6.0
  */
 public static InputTableConfig getInputTableConfig(JobConf job, String tableName) {
   return InputConfigurator.getInputTableConfig(CLASS, job, tableName);
 }
 /**
  * Sets the {@link org.apache.accumulo.core.security.Authorizations} used to scan. Must be a
  * subset of the user's authorization. Defaults to the empty set.
  *
  * @param job the Hadoop job instance to be configured
  * @param auths the user's authorizations
  * @since 1.5.0
  */
 public static void setScanAuthorizations(JobConf job, Authorizations auths) {
   InputConfigurator.setScanAuthorizations(CLASS, job, auths);
 }
 /**
  * Gets the authorizations to set for the scans from the configuration.
  *
  * @param context the Hadoop context for the configured job
  * @return the Accumulo scan authorizations
  * @since 1.5.0
  * @see #setScanAuthorizations(Job, Authorizations)
  */
 protected static Authorizations getScanAuthorizations(JobContext context) {
   return InputConfigurator.getScanAuthorizations(CLASS, context.getConfiguration());
 }
 /**
  * Construct the {@link ClientConfiguration} given the provided context.
  *
  * @param context The Job
  * @return The ClientConfiguration
  * @since 1.7.0
  */
 protected static ClientConfiguration getClientConfiguration(JobContext context) {
   return InputConfigurator.getClientConfiguration(CLASS, context.getConfiguration());
 }
 /**
  * Returns the name of the current classloader context set on this scanner
  *
  * @param job the Hadoop job instance to be configured
  * @return name of the current context
  * @since 1.8.0
  */
 public static String getClassLoaderContext(JobConf job) {
   return InputConfigurator.getClassLoaderContext(CLASS, job);
 }
 /**
  * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the
  * configuration.
  *
  * @param context the Hadoop context for the configured job
  * @param table the table for which to initialize the locator
  * @return an Accumulo tablet locator
  * @throws org.apache.accumulo.core.client.TableNotFoundException if the table name set on the
  *     configuration doesn't exist
  * @since 1.6.0
  * @deprecated since 1.7.0 This method returns a type that is not part of the public API and is
  *     not guaranteed to be stable. The method was deprecated to discourage its use.
  */
 @Deprecated
 protected static TabletLocator getTabletLocator(JobContext context, String table)
     throws TableNotFoundException {
   return InputConfigurator.getTabletLocator(CLASS, context.getConfiguration(), table);
 }
 /**
  * Fetches a {@link InputTableConfig} that has been set on the configuration for a specific table.
  *
  * <p>null is returned in the event that the table doesn't exist.
  *
  * @param context the Hadoop job instance to be configured
  * @param tableName the table name for which to grab the config object
  * @return the {@link InputTableConfig} for the given table
  * @since 1.6.0
  */
 protected static InputTableConfig getInputTableConfig(JobContext context, String tableName) {
   return InputConfigurator.getInputTableConfig(CLASS, context.getConfiguration(), tableName);
 }
 /**
  * Fetches all {@link InputTableConfig}s that have been set on the given job.
  *
  * @param context the Hadoop job instance to be configured
  * @return the {@link InputTableConfig} objects for the job
  * @since 1.6.0
  */
 protected static Map<String, InputTableConfig> getInputTableConfigs(JobContext context) {
   return InputConfigurator.getInputTableConfigs(CLASS, context.getConfiguration());
 }
 /**
  * Sets the log level for this job.
  *
  * @param job the Hadoop job instance to be configured
  * @param level the logging level
  * @since 1.5.0
  */
 public static void setLogLevel(JobConf job, Level level) {
   InputConfigurator.setLogLevel(CLASS, job, level);
 }
 /**
  * Initializes an Accumulo {@link org.apache.accumulo.core.client.impl.TabletLocator} based on the
  * configuration.
  *
  * @param job the Hadoop context for the configured job
  * @return an Accumulo tablet locator
  * @throws org.apache.accumulo.core.client.TableNotFoundException if the table name set on the
  *     configuration doesn't exist
  * @since 1.6.0
  * @deprecated since 1.7.0 This method returns a type that is not part of the public API and is
  *     not guaranteed to be stable. The method was deprecated to discourage its use.
  */
 @Deprecated
 protected static TabletLocator getTabletLocator(JobConf job, String tableId)
     throws TableNotFoundException {
   return InputConfigurator.getTabletLocator(CLASS, job, tableId);
 }
 /**
  * Gets the log level from this configuration.
  *
  * @param job the Hadoop context for the configured job
  * @return the log level
  * @since 1.5.0
  * @see #setLogLevel(JobConf, Level)
  */
 protected static Level getLogLevel(JobConf job) {
   return InputConfigurator.getLogLevel(CLASS, job);
 }
 /**
  * Gets the log level from this configuration.
  *
  * @param context the Hadoop context for the configured job
  * @return the log level
  * @since 1.5.0
  * @see #setLogLevel(Job, Level)
  */
 protected static Level getLogLevel(JobContext context) {
   return InputConfigurator.getLogLevel(CLASS, context.getConfiguration());
 }
 /**
  * Gets the authorizations to set for the scans from the configuration.
  *
  * @param job the Hadoop context for the configured job
  * @return the Accumulo scan authorizations
  * @since 1.5.0
  * @see #setScanAuthorizations(JobConf, Authorizations)
  */
 protected static Authorizations getScanAuthorizations(JobConf job) {
   return InputConfigurator.getScanAuthorizations(CLASS, job);
 }
 /**
  * Sets the connector information needed to communicate with Accumulo in this job.
  *
  * <p>Stores the password in a file in HDFS and pulls that into the Distributed Cache in an
  * attempt to be more secure than storing it in the Configuration.
  *
  * @param job the Hadoop job instance to be configured
  * @param principal a valid Accumulo user name (user must have Table.CREATE permission)
  * @param tokenFile the path to the token file
  * @since 1.6.0
  */
 public static void setConnectorInfo(JobConf job, String principal, String tokenFile)
     throws AccumuloSecurityException {
   InputConfigurator.setConnectorInfo(CLASS, job, principal, tokenFile);
 }
 /**
  * Fetch the client configuration from the job.
  *
  * @param job The job
  * @return The client configuration for the job
  * @since 1.7.0
  */
 protected static ClientConfiguration getClientConfiguration(JobConf job) {
   return InputConfigurator.getClientConfiguration(CLASS, job);
 }
 /**
  * Determines if the connector has been configured.
  *
  * @param job the Hadoop context for the configured job
  * @return true if the connector has been configured, false otherwise
  * @since 1.5.0
  * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
  */
 protected static Boolean isConnectorInfoSet(JobConf job) {
   return InputConfigurator.isConnectorInfoSet(CLASS, job);
 }
 /**
  * Fetches all {@link InputTableConfig}s that have been set on the given Hadoop job.
  *
  * @param job the Hadoop job instance to be configured
  * @return the {@link InputTableConfig} objects set on the job
  * @since 1.6.0
  */
 public static Map<String, InputTableConfig> getInputTableConfigs(JobConf job) {
   return InputConfigurator.getInputTableConfigs(CLASS, job);
 }
 /**
  * Gets the user name from the configuration.
  *
  * @param job the Hadoop context for the configured job
  * @return the user name
  * @since 1.5.0
  * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
  */
 protected static String getPrincipal(JobConf job) {
   return InputConfigurator.getPrincipal(CLASS, job);
 }
 /**
  * Gets the authenticated token from either the specified token file or directly from the
  * configuration, whichever was used when the job was configured.
  *
  * @param job the Hadoop context for the configured job
  * @return the principal's authentication token
  * @since 1.6.0
  * @see #setConnectorInfo(JobConf, String, AuthenticationToken)
  * @see #setConnectorInfo(JobConf, String, String)
  */
 protected static AuthenticationToken getAuthenticationToken(JobConf job) {
   AuthenticationToken token = InputConfigurator.getAuthenticationToken(CLASS, job);
   return ConfiguratorBase.unwrapAuthenticationToken(job, token);
 }
 /**
  * Configures a {@link org.apache.accumulo.core.client.ZooKeeperInstance} for this job.
  *
  * @param job the Hadoop job instance to be configured
  * @param clientConfig client configuration containing connection options
  * @since 1.6.0
  */
 public static void setZooKeeperInstance(JobConf job, ClientConfiguration clientConfig) {
   InputConfigurator.setZooKeeperInstance(CLASS, job, clientConfig);
 }
 /**
  * Sets the name of the classloader context on this scanner
  *
  * @param job the Hadoop job instance to be configured
  * @param context name of the classloader context
  * @since 1.8.0
  */
 public static void setClassLoaderContext(JobConf job, String context) {
   InputConfigurator.setClassLoaderContext(CLASS, job, context);
 }
 /**
  * Gets the authenticated token from either the specified token file or directly from the
  * configuration, whichever was used when the job was configured.
  *
  * @param context the Hadoop context for the configured job
  * @return the principal's authentication token
  * @since 1.6.0
  * @see #setConnectorInfo(Job, String, AuthenticationToken)
  * @see #setConnectorInfo(Job, String, String)
  */
 protected static AuthenticationToken getAuthenticationToken(JobContext context) {
   AuthenticationToken token =
       InputConfigurator.getAuthenticationToken(CLASS, context.getConfiguration());
   return ConfiguratorBase.unwrapAuthenticationToken(context, token);
 }