private void binMutations( MutationSet mutationsToProcess, Map<String, TabletServerMutations<Mutation>> binnedMutations) { try { Set<Entry<String, List<Mutation>>> es = mutationsToProcess.getMutations().entrySet(); for (Entry<String, List<Mutation>> entry : es) { TabletLocator locator = getLocator(entry.getKey()); String table = entry.getKey(); List<Mutation> tableMutations = entry.getValue(); if (tableMutations != null) { ArrayList<Mutation> tableFailures = new ArrayList<Mutation>(); locator.binMutations(credentials, tableMutations, binnedMutations, tableFailures); if (tableFailures.size() > 0) { failedMutations.add(table, tableFailures); if (tableFailures.size() == tableMutations.size()) if (!Tables.exists(instance, entry.getKey())) throw new TableDeletedException(entry.getKey()); else if (Tables.getTableState(instance, table) == TableState.OFFLINE) throw new TableOfflineException(instance, entry.getKey()); } } } return; } catch (AccumuloServerException ase) { updateServerErrors(ase.getServer(), ase); } catch (AccumuloException ae) { // assume an IOError communicating with !METADATA tablet failedMutations.add(mutationsToProcess); } catch (AccumuloSecurityException e) { updateAuthorizationFailures( Collections.singletonMap( new KeyExtent(new Text(MetadataTable.ID), null, null), SecurityErrorCode.valueOf(e.getSecurityErrorCode().name()))); } catch (TableDeletedException e) { updateUnknownErrors(e.getMessage(), e); } catch (TableOfflineException e) { updateUnknownErrors(e.getMessage(), e); } catch (TableNotFoundException e) { updateUnknownErrors(e.getMessage(), e); } // an error ocurred binnedMutations.clear(); }
private void updateAuthorizationFailures( Map<KeyExtent, SecurityErrorCode> authorizationFailures) { if (authorizationFailures.size() > 0) { // was a table deleted? HashSet<String> tableIds = new HashSet<String>(); for (KeyExtent ke : authorizationFailures.keySet()) tableIds.add(ke.getTableId().toString()); Tables.clearCache(instance); for (String tableId : tableIds) if (!Tables.exists(instance, tableId)) throw new TableDeletedException(tableId); synchronized (this) { somethingFailed = true; mergeAuthorizationFailures(this.authorizationFailures, authorizationFailures); this.notifyAll(); } } }
/** * Gets the splits of the tables that have been set on the job by reading the metadata table for * the specified ranges. * * @return the splits from the tables based on the ranges. * @throws java.io.IOException if a table set on the job doesn't exist or an error occurs * initializing the tablet locator */ @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Level logLevel = getLogLevel(job); log.setLevel(logLevel); validateOptions(job); Random random = new Random(); LinkedList<InputSplit> splits = new LinkedList<InputSplit>(); Map<String, InputTableConfig> tableConfigs = getInputTableConfigs(job); for (Map.Entry<String, InputTableConfig> tableConfigEntry : tableConfigs.entrySet()) { String tableName = tableConfigEntry.getKey(); InputTableConfig tableConfig = tableConfigEntry.getValue(); Instance instance = getInstance(job); String tableId; // resolve table name to id once, and use id from this point forward if (DeprecationUtil.isMockInstance(instance)) { tableId = ""; } else { try { tableId = Tables.getTableId(instance, tableName); } catch (TableNotFoundException e) { throw new IOException(e); } } Authorizations auths = getScanAuthorizations(job); String principal = getPrincipal(job); AuthenticationToken token = getAuthenticationToken(job); boolean batchScan = InputConfigurator.isBatchScan(CLASS, job); boolean supportBatchScan = !(tableConfig.isOfflineScan() || tableConfig.shouldUseIsolatedScanners() || tableConfig.shouldUseLocalIterators()); if (batchScan && !supportBatchScan) throw new IllegalArgumentException( "BatchScanner optimization not available for offline scan, isolated, or local iterators"); boolean autoAdjust = tableConfig.shouldAutoAdjustRanges(); if (batchScan && !autoAdjust) throw new IllegalArgumentException( "AutoAdjustRanges must be enabled when using BatchScanner optimization"); List<Range> ranges = autoAdjust ? Range.mergeOverlapping(tableConfig.getRanges()) : tableConfig.getRanges(); if (ranges.isEmpty()) { ranges = new ArrayList<Range>(1); ranges.add(new Range()); } // get the metadata information for these ranges Map<String, Map<KeyExtent, List<Range>>> binnedRanges = new HashMap<String, Map<KeyExtent, List<Range>>>(); TabletLocator tl; try { if (tableConfig.isOfflineScan()) { binnedRanges = binOfflineTable(job, tableId, ranges); while (binnedRanges == null) { // Some tablets were still online, try again // sleep randomly between 100 and 200 ms sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS); binnedRanges = binOfflineTable(job, tableId, ranges); } } else { tl = InputConfigurator.getTabletLocator(CLASS, job, tableId); // its possible that the cache could contain complete, but old information about a tables // tablets... so clear it tl.invalidateCache(); ClientContext context = new ClientContext( getInstance(job), new Credentials(getPrincipal(job), getAuthenticationToken(job)), getClientConfiguration(job)); while (!tl.binRanges(context, ranges, binnedRanges).isEmpty()) { if (!DeprecationUtil.isMockInstance(instance)) { if (!Tables.exists(instance, tableId)) throw new TableDeletedException(tableId); if (Tables.getTableState(instance, tableId) == TableState.OFFLINE) throw new TableOfflineException(instance, tableId); } binnedRanges.clear(); log.warn("Unable to locate bins for specified ranges. Retrying."); // sleep randomly between 100 and 200 ms sleepUninterruptibly(100 + random.nextInt(100), TimeUnit.MILLISECONDS); tl.invalidateCache(); } } } catch (Exception e) { throw new IOException(e); } HashMap<Range, ArrayList<String>> splitsToAdd = null; if (!autoAdjust) splitsToAdd = new HashMap<Range, ArrayList<String>>(); HashMap<String, String> hostNameCache = new HashMap<String, String>(); for (Map.Entry<String, Map<KeyExtent, List<Range>>> tserverBin : binnedRanges.entrySet()) { String ip = tserverBin.getKey().split(":", 2)[0]; String location = hostNameCache.get(ip); if (location == null) { InetAddress inetAddress = InetAddress.getByName(ip); location = inetAddress.getCanonicalHostName(); hostNameCache.put(ip, location); } for (Map.Entry<KeyExtent, List<Range>> extentRanges : tserverBin.getValue().entrySet()) { Range ke = extentRanges.getKey().toDataRange(); if (batchScan) { // group ranges by tablet to be read by a BatchScanner ArrayList<Range> clippedRanges = new ArrayList<Range>(); for (Range r : extentRanges.getValue()) clippedRanges.add(ke.clip(r)); BatchInputSplit split = new BatchInputSplit(tableName, tableId, clippedRanges, new String[] {location}); SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel); splits.add(split); } else { // not grouping by tablet for (Range r : extentRanges.getValue()) { if (autoAdjust) { // divide ranges into smaller ranges, based on the tablets RangeInputSplit split = new RangeInputSplit(tableName, tableId, ke.clip(r), new String[] {location}); SplitUtils.updateSplit( split, instance, tableConfig, principal, token, auths, logLevel); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } else { // don't divide ranges ArrayList<String> locations = splitsToAdd.get(r); if (locations == null) locations = new ArrayList<String>(1); locations.add(location); splitsToAdd.put(r, locations); } } } } } if (!autoAdjust) for (Map.Entry<Range, ArrayList<String>> entry : splitsToAdd.entrySet()) { RangeInputSplit split = new RangeInputSplit( tableName, tableId, entry.getKey(), entry.getValue().toArray(new String[0])); SplitUtils.updateSplit(split, instance, tableConfig, principal, token, auths, logLevel); split.setOffline(tableConfig.isOfflineScan()); split.setIsolatedScan(tableConfig.shouldUseIsolatedScanners()); split.setUsesLocalIterators(tableConfig.shouldUseLocalIterators()); splits.add(split); } } return splits.toArray(new InputSplit[splits.size()]); }
public static List<KeyValue> scan(ClientContext context, ScanState scanState, int timeOut) throws ScanTimedOutException, AccumuloException, AccumuloSecurityException, TableNotFoundException { TabletLocation loc = null; Instance instance = context.getInstance(); long startTime = System.currentTimeMillis(); String lastError = null; String error = null; int tooManyFilesCount = 0; long sleepMillis = 100; List<KeyValue> results = null; Span span = Trace.start("scan"); try { while (results == null && !scanState.finished) { if (Thread.currentThread().isInterrupted()) { throw new AccumuloException("Thread interrupted"); } if ((System.currentTimeMillis() - startTime) / 1000.0 > timeOut) throw new ScanTimedOutException(); while (loc == null) { long currentTime = System.currentTimeMillis(); if ((currentTime - startTime) / 1000.0 > timeOut) throw new ScanTimedOutException(); Span locateSpan = Trace.start("scan:locateTablet"); try { loc = TabletLocator.getLocator(context, scanState.tableId) .locateTablet(context, scanState.startRow, scanState.skipStartRow, false); if (loc == null) { if (!Tables.exists(instance, scanState.tableId)) throw new TableDeletedException(scanState.tableId); else if (Tables.getTableState(instance, scanState.tableId) == TableState.OFFLINE) throw new TableOfflineException(instance, scanState.tableId); error = "Failed to locate tablet for table : " + scanState.tableId + " row : " + scanState.startRow; if (!error.equals(lastError)) log.debug("{}", error); else if (log.isTraceEnabled()) log.trace("{}", error); lastError = error; sleepMillis = pause(sleepMillis); } else { // when a tablet splits we do want to continue scanning the low child // of the split if we are already passed it Range dataRange = loc.tablet_extent.toDataRange(); if (scanState.range.getStartKey() != null && dataRange.afterEndKey(scanState.range.getStartKey())) { // go to the next tablet scanState.startRow = loc.tablet_extent.getEndRow(); scanState.skipStartRow = true; loc = null; } else if (scanState.range.getEndKey() != null && dataRange.beforeStartKey(scanState.range.getEndKey())) { // should not happen throw new RuntimeException( "Unexpected tablet, extent : " + loc.tablet_extent + " range : " + scanState.range + " startRow : " + scanState.startRow); } } } catch (AccumuloServerException e) { log.debug("Scan failed, server side exception : {}", e.getMessage()); throw e; } catch (AccumuloException e) { error = "exception from tablet loc " + e.getMessage(); if (!error.equals(lastError)) log.debug("{}", error); else if (log.isTraceEnabled()) log.trace("{}", error); lastError = error; sleepMillis = pause(sleepMillis); } finally { locateSpan.stop(); } } Span scanLocation = Trace.start("scan:location"); scanLocation.data("tserver", loc.tablet_location); try { results = scan(loc, scanState, context); } catch (AccumuloSecurityException e) { Tables.clearCache(instance); if (!Tables.exists(instance, scanState.tableId)) throw new TableDeletedException(scanState.tableId); e.setTableInfo(Tables.getPrintableTableInfoFromId(instance, scanState.tableId)); throw e; } catch (TApplicationException tae) { throw new AccumuloServerException(loc.tablet_location, tae); } catch (TSampleNotPresentException tsnpe) { String message = "Table " + Tables.getPrintableTableInfoFromId(instance, scanState.tableId) + " does not have sampling configured or built"; throw new SampleNotPresentException(message, tsnpe); } catch (NotServingTabletException e) { error = "Scan failed, not serving tablet " + loc; if (!error.equals(lastError)) log.debug("{}", error); else if (log.isTraceEnabled()) log.trace("{}", error); lastError = error; TabletLocator.getLocator(context, scanState.tableId).invalidateCache(loc.tablet_extent); loc = null; // no need to try the current scan id somewhere else scanState.scanID = null; if (scanState.isolated) throw new IsolationException(); sleepMillis = pause(sleepMillis); } catch (NoSuchScanIDException e) { error = "Scan failed, no such scan id " + scanState.scanID + " " + loc; if (!error.equals(lastError)) log.debug("{}", error); else if (log.isTraceEnabled()) log.trace("{}", error); lastError = error; if (scanState.isolated) throw new IsolationException(); scanState.scanID = null; } catch (TooManyFilesException e) { error = "Tablet has too many files " + loc + " retrying..."; if (!error.equals(lastError)) { log.debug("{}", error); tooManyFilesCount = 0; } else { tooManyFilesCount++; if (tooManyFilesCount == 300) log.warn("{}", error); else if (log.isTraceEnabled()) log.trace("{}", error); } lastError = error; // not sure what state the scan session on the server side is // in after this occurs, so lets be cautious and start a new // scan session scanState.scanID = null; if (scanState.isolated) throw new IsolationException(); sleepMillis = pause(sleepMillis); } catch (TException e) { TabletLocator.getLocator(context, scanState.tableId) .invalidateCache(context.getInstance(), loc.tablet_location); error = "Scan failed, thrift error " + e.getClass().getName() + " " + e.getMessage() + " " + loc; if (!error.equals(lastError)) log.debug("{}", error); else if (log.isTraceEnabled()) log.trace("{}", error); lastError = error; loc = null; // do not want to continue using the same scan id, if a timeout occurred could cause a // batch to be skipped // because a thread on the server side may still be processing the timed out continue scan scanState.scanID = null; if (scanState.isolated) throw new IsolationException(); sleepMillis = pause(sleepMillis); } finally { scanLocation.stop(); } } if (results != null && results.size() == 0 && scanState.finished) { results = null; } return results; } catch (InterruptedException ex) { throw new AccumuloException(ex); } finally { span.stop(); } }