/** Initialize a scanner over the given input split using this task attempt configuration. */ public void initialize(InputSplit inSplit, JobConf job) throws IOException { baseSplit = (org.apache.accumulo.core.client.mapreduce.RangeInputSplit) inSplit; log.debug("Initializing input split: " + baseSplit.toString()); Instance instance = baseSplit.getInstance(getClientConfiguration(job)); if (null == instance) { instance = getInstance(job); } String principal = baseSplit.getPrincipal(); if (null == principal) { principal = getPrincipal(job); } AuthenticationToken token = baseSplit.getToken(); if (null == token) { token = getAuthenticationToken(job); } Authorizations authorizations = baseSplit.getAuths(); if (null == authorizations) { authorizations = getScanAuthorizations(job); } String classLoaderContext = getClassLoaderContext(job); String table = baseSplit.getTableName(); // in case the table name changed, we can still use the previous name for terms of // configuration, // but the scanner will use the table id resolved at job setup time InputTableConfig tableConfig = getInputTableConfig(job, baseSplit.getTableName()); log.debug("Creating connector with user: "******"Creating scanner for table: " + table); log.debug("Authorizations are: " + authorizations); if (baseSplit instanceof BatchInputSplit) { BatchScanner scanner; BatchInputSplit multiRangeSplit = (BatchInputSplit) baseSplit; try { // Note: BatchScanner will use at most one thread per tablet, currently BatchInputSplit // will not span tablets int scanThreads = 1; scanner = instance .getConnector(principal, token) .createBatchScanner(baseSplit.getTableName(), authorizations, scanThreads); setupIterators(job, scanner, baseSplit.getTableName(), baseSplit); if (null != classLoaderContext) { scanner.setClassLoaderContext(classLoaderContext); } } catch (Exception e) { throw new IOException(e); } scanner.setRanges(multiRangeSplit.getRanges()); scannerBase = scanner; } else if (baseSplit instanceof RangeInputSplit) { split = (RangeInputSplit) baseSplit; Boolean isOffline = baseSplit.isOffline(); if (null == isOffline) { isOffline = tableConfig.isOfflineScan(); } Boolean isIsolated = baseSplit.isIsolatedScan(); if (null == isIsolated) { isIsolated = tableConfig.shouldUseIsolatedScanners(); } Boolean usesLocalIterators = baseSplit.usesLocalIterators(); if (null == usesLocalIterators) { usesLocalIterators = tableConfig.shouldUseLocalIterators(); } Scanner scanner; try { if (isOffline) { scanner = new OfflineScanner( instance, new Credentials(principal, token), baseSplit.getTableId(), authorizations); } else if (DeprecationUtil.isMockInstance(instance)) { scanner = instance .getConnector(principal, token) .createScanner(baseSplit.getTableName(), authorizations); } else { ClientConfiguration clientConf = getClientConfiguration(job); ClientContext context = new ClientContext(instance, new Credentials(principal, token), clientConf); scanner = new ScannerImpl(context, baseSplit.getTableId(), authorizations); } if (isIsolated) { log.info("Creating isolated scanner"); scanner = new IsolatedScanner(scanner); } if (usesLocalIterators) { log.info("Using local iterators"); scanner = new ClientSideIteratorScanner(scanner); } setupIterators(job, scanner, baseSplit.getTableName(), baseSplit); } catch (Exception e) { throw new IOException(e); } scanner.setRange(baseSplit.getRange()); scannerBase = scanner; } else { throw new IllegalArgumentException( "Can not initialize from " + baseSplit.getClass().toString()); } Collection<Pair<Text, Text>> columns = baseSplit.getFetchedColumns(); if (null == columns) { columns = tableConfig.getFetchedColumns(); } // setup a scanner within the bounds of this split for (Pair<Text, Text> c : columns) { if (c.getSecond() != null) { log.debug("Fetching column " + c.getFirst() + ":" + c.getSecond()); scannerBase.fetchColumn(c.getFirst(), c.getSecond()); } else { log.debug("Fetching column family " + c.getFirst()); scannerBase.fetchColumnFamily(c.getFirst()); } } SamplerConfiguration samplerConfig = baseSplit.getSamplerConfiguration(); if (null == samplerConfig) { samplerConfig = tableConfig.getSamplerConfiguration(); } if (samplerConfig != null) { scannerBase.setSamplerConfiguration(samplerConfig); } scannerIterator = scannerBase.iterator(); numKeysRead = 0; }