@Override public UploadResult uploadBackup( Exhibitor exhibitor, BackupMetaData backup, File source, final Map<String, String> configValues) throws Exception { List<BackupMetaData> availableBackups = getAvailableBackups(exhibitor, configValues); if (availableBackups.contains(backup)) { return UploadResult.DUPLICATE; } RetryPolicy retryPolicy = makeRetryPolicy(configValues); Throttle throttle = makeThrottle(configValues); String key = toKey(backup); InitiateMultipartUploadRequest initRequest = new InitiateMultipartUploadRequest(configValues.get(CONFIG_BUCKET.getKey()), key); InitiateMultipartUploadResult initResponse = s3Client.initiateMultipartUpload(initRequest); CompressorIterator compressorIterator = compressor.compress(source); try { List<PartETag> eTags = Lists.newArrayList(); int index = 0; for (; ; ) { ByteBuffer chunk = compressorIterator.next(); if (chunk == null) { break; } throttle.throttle(chunk.limit()); PartETag eTag = uploadChunkWithRetry(chunk, initResponse, index++, retryPolicy); eTags.add(eTag); } completeUpload(initResponse, eTags); } catch (Exception e) { abortUpload(initResponse); throw e; } finally { Closeables.closeQuietly(compressorIterator); } UploadResult result = UploadResult.SUCCEEDED; for (BackupMetaData existing : availableBackups) { if (existing.getName().equals(backup.getName())) { deleteBackup(exhibitor, existing, configValues); result = UploadResult.REPLACED_OLD_VERSION; } } return result; }
@Override public void downloadBackup( Exhibitor exhibitor, BackupMetaData backup, File destination, Map<String, String> configValues) throws Exception { S3Object object = s3Client.getObject(configValues.get(CONFIG_BUCKET.getKey()), toKey(backup)); long startMs = System.currentTimeMillis(); RetryPolicy retryPolicy = makeRetryPolicy(configValues); int retryCount = 0; boolean done = false; while (!done) { Throttle throttle = makeThrottle(configValues); InputStream in = null; FileOutputStream out = null; try { out = new FileOutputStream(destination); in = object.getObjectContent(); FileChannel channel = out.getChannel(); CompressorIterator compressorIterator = compressor.decompress(in); for (; ; ) { ByteBuffer bytes = compressorIterator.next(); if (bytes == null) { break; } throttle.throttle(bytes.limit()); channel.write(bytes); } done = true; } catch (Exception e) { if (!retryPolicy.allowRetry(retryCount++, System.currentTimeMillis() - startMs)) { done = true; } } finally { Closeables.closeQuietly(in); Closeables.closeQuietly(out); } } }
/** Create throttle data structures. */ public void requestThrottleSetup(LocoAddress address, boolean control) { if (currentThrottle != null) { log.error("DCC direct cannot handle more than one throttle now"); failedThrottleRequest( (DccLocoAddress) address, "DCC direct cannot handle more than one throttle " + address); return; } log.warn("requestThrottleSetup should preserve actual address object, not use ints"); currentThrottle = new Throttle(((DccLocoAddress) address).getNumber()); notifyThrottleKnown(currentThrottle, currentThrottle.getLocoAddress()); }
/** * Helper method for handling role based Access throttling * * @param messageContext MessageContext - message level states * @return true if access is allowed through concurrent throttling ,o.w false */ private boolean doRoleBasedAccessThrottling( Throttle throttle, MessageContext messageContext, boolean isClusteringEnable) throws AxisFault, ThrottleException { boolean canAccess = true; ConfigurationContext cc = messageContext.getConfigurationContext(); String throttleId = throttle.getId(); String key = null; ConcurrentAccessController cac = null; if (isClusteringEnable) { // for clustered env.,gets it from axis configuration context key = ThrottleConstants.THROTTLE_PROPERTY_PREFIX + throttleId + ThrottleConstants.CAC_SUFFIX; cac = (ConcurrentAccessController) cc.getProperty(key); } if (messageContext.getFLOW() == MessageContext.IN_FLOW) { // gets the remote caller role name String consumerKey = null; boolean isAuthenticated = false; String roleID = null; HttpServletRequest request = (HttpServletRequest) messageContext.getPropertyNonReplicable(HTTPConstants.MC_HTTP_SERVLETREQUEST); if (request != null) { String oAuthHeader = request.getHeader("OAuth"); // consumerKey = Utils.extractCustomerKeyFromAuthHeader(oAuthHeader); // roleID = Utils.extractCustomerKeyFromAuthHeader(oAuthHeader); DummyAuthenticator authFuture = new DummyAuthenticator(oAuthHeader); consumerKey = authFuture.getAPIKey(); new DummyHandler().authenticateUser(authFuture); roleID = (String) authFuture.getAuthorizedRoles().get(0); isAuthenticated = authFuture.isAuthenticated(); } if (!isAuthenticated) { throw new AxisFault( " Access deny for a " + "caller with consumer Key: " + consumerKey + " " + " : Reason : Authentication failure"); } // Domain name based throttling // check whether a configuration has been defined for this role name or not String consumerRoleID = null; if (consumerKey != null && isAuthenticated) { // loads the ThrottleContext ThrottleContext context = throttle.getThrottleContext(ThrottleConstants.ROLE_BASED_THROTTLE_KEY); if (context != null) { // Loads the ThrottleConfiguration ThrottleConfiguration config = context.getThrottleConfiguration(); if (config != null) { // check for configuration for this caller consumerRoleID = config.getConfigurationKeyOfCaller(roleID); if (consumerRoleID != null) { // If this is a clustered env. if (isClusteringEnable) { context.setConfigurationContext(cc); context.setThrottleId(throttleId); } AccessInformation infor = roleBasedAccessController.canAccess(context, consumerKey, consumerRoleID); StatCollector.collect(infor, consumerKey, ThrottleConstants.ROLE_BASE); // check for the permission for access if (!infor.isAccessAllowed()) { // In the case of both of concurrency throttling and // rate based throttling have enabled , // if the access rate less than maximum concurrent access , // then it is possible to occur death situation.To avoid that reset, // if the access has denied by rate based throttling if (cac != null) { cac.incrementAndGet(); // set back if this is a clustered env if (isClusteringEnable) { cc.setProperty(key, cac); // replicate the current state of ConcurrentAccessController try { if (debugOn) { log.debug( "Going to replicates the " + "states of the ConcurrentAccessController" + " with key : " + key); } Replicator.replicate(cc, new String[] {key}); } catch (ClusteringFault clusteringFault) { log.error("Error during replicating states ", clusteringFault); } } } throw new AxisFault( " Access deny for a " + "caller with Domain " + consumerKey + " " + " : Reason : " + infor.getFaultReason()); } } else { if (debugOn) { log.debug( "Could not find the Throttle Context for role-Based " + "Throttling for role name " + consumerKey + " Throttling for this " + "role name may not be configured from policy"); } } } } } else { if (debugOn) { log.debug("Could not find the role of the caller - role based throttling NOT applied"); } } } return canAccess; }
/** * processing through the throttle 1) concurrent throttling 2) access rate based throttling - * domain or ip * * @param throttle The Throttle object - holds all configuration and state data of the throttle * @param messageContext The MessageContext , that holds all data per message basis * @throws AxisFault Throws when access must deny for caller * @throws ThrottleException ThrottleException */ public void process(Throttle throttle, MessageContext messageContext) throws ThrottleException, AxisFault { String throttleId = throttle.getId(); ConfigurationContext cc = messageContext.getConfigurationContext(); // check the env - whether clustered or not boolean isClusteringEnable = false; ClusteringAgent clusteringAgent = null; if (cc != null) { clusteringAgent = cc.getAxisConfiguration().getClusteringAgent(); } if (clusteringAgent != null && clusteringAgent.getStateManager() != null) { isClusteringEnable = true; } // Get the concurrent access controller ConcurrentAccessController cac; String key = null; if (isClusteringEnable) { // for clustered env.,gets it from axis configuration context key = ThrottleConstants.THROTTLE_PROPERTY_PREFIX + throttleId + ThrottleConstants.CAC_SUFFIX; cac = (ConcurrentAccessController) cc.getProperty(key); } else { // for non-clustered env.,gets it from axis configuration context cac = throttle.getConcurrentAccessController(); } // check for concurrent access boolean canAccess = doConcurrentThrottling(cac, messageContext); if (canAccess) { // if the concurrent access is success then // do the access rate based throttling if (messageContext.getFLOW() == MessageContext.IN_FLOW) { // gets the remote caller domain name String domain = null; HttpServletRequest request = (HttpServletRequest) messageContext.getPropertyNonReplicable(HTTPConstants.MC_HTTP_SERVLETREQUEST); if (request != null) { domain = request.getRemoteHost(); } // Domain name based throttling // check whether a configuration has been defined for this domain name or not String callerId = null; if (domain != null) { // loads the ThrottleContext ThrottleContext context = throttle.getThrottleContext(ThrottleConstants.DOMAIN_BASED_THROTTLE_KEY); if (context != null) { // Loads the ThrottleConfiguration ThrottleConfiguration config = context.getThrottleConfiguration(); if (config != null) { // check for configuration for this caller callerId = config.getConfigurationKeyOfCaller(domain); if (callerId != null) { // If this is a clustered env. if (isClusteringEnable) { context.setConfigurationContext(cc); context.setThrottleId(throttleId); } AccessInformation infor = accessRateController.canAccess( context, callerId, ThrottleConstants.DOMAIN_BASE); StatCollector.collect(infor, domain, ThrottleConstants.DOMAIN_BASE); // check for the permission for access if (!infor.isAccessAllowed()) { // In the case of both of concurrency throttling and // rate based throttling have enabled , // if the access rate less than maximum concurrent access , // then it is possible to occur death situation.To avoid that reset, // if the access has denied by rate based throttling if (cac != null) { cac.incrementAndGet(); // set back if this is a clustered env if (isClusteringEnable) { cc.setProperty(key, cac); // replicate the current state of ConcurrentAccessController try { if (debugOn) { log.debug( "Going to replicates the " + "states of the ConcurrentAccessController" + " with key : " + key); } Replicator.replicate(cc, new String[] {key}); } catch (ClusteringFault clusteringFault) { log.error("Error during replicating states ", clusteringFault); } } } throw new AxisFault( " Access deny for a " + "caller with Domain " + domain + " " + " : Reason : " + infor.getFaultReason()); } } else { if (debugOn) { log.debug( "Could not find the Throttle Context for domain-Based " + "Throttling for domain name " + domain + " Throttling for this " + "domain name may not be configured from policy"); } } } } } else { if (debugOn) { log.debug("Could not find the domain of the caller - IP-based throttling may occur"); } } // IP based throttling - Only if there is no configuration for caller domain name if (callerId == null) { String ip = (String) messageContext.getProperty(MessageContext.REMOTE_ADDR); if (ip != null) { // loads IP based throttle context ThrottleContext context = throttle.getThrottleContext(ThrottleConstants.IP_BASED_THROTTLE_KEY); if (context != null) { // Loads the ThrottleConfiguration ThrottleConfiguration config = context.getThrottleConfiguration(); if (config != null) { // check for configuration for this ip callerId = config.getConfigurationKeyOfCaller(ip); if (callerId != null) { // for clustered env. if (isClusteringEnable) { context.setConfigurationContext(cc); context.setThrottleId(throttleId); } AccessInformation infor = accessRateController.canAccess(context, callerId, ThrottleConstants.IP_BASE); // check for the permission for access StatCollector.collect(infor, ip, ThrottleConstants.IP_BASE); if (!infor.isAccessAllowed()) { // In the case of both of concurrency throttling and // rate based throttling have enabled , // if the access rate less than maximum concurrent access , // then it is possible to occur death situation.To avoid that reset, // if the access has denied by rate based throttling if (cac != null) { cac.incrementAndGet(); // set back if this is a clustered env if (isClusteringEnable) { cc.setProperty(key, cac); // replicate the current state of ConcurrentAccessController try { if (debugOn) { log.debug( "Going to replicates the " + "states of the ConcurrentAccessController" + " with key : " + key); } Replicator.replicate(cc, new String[] {key}); } catch (ClusteringFault clusteringFault) { log.error("Error during replicating states ", clusteringFault); } } } throw new AxisFault( " Access deny for a " + "caller with IP " + ip + " " + " : Reason : " + infor.getFaultReason()); } } } } else { if (debugOn) { log.debug("Could not find the throttle Context for IP-Based throttling"); } } } else { if (debugOn) { log.debug( "Could not find the IP address of the caller " + "- throttling will not occur"); } } } } // all the replication functionality of the access rate based throttling handles by itself // just replicate the current state of ConcurrentAccessController if (isClusteringEnable && cac != null) { try { if (debugOn) { log.debug( "Going to replicates the states of the ConcurrentAccessController" + " with key : " + key); } Replicator.replicate(cc, new String[] {key}); } catch (ClusteringFault clusteringFault) { log.error("Error during replicating states ", clusteringFault); } } // finally engage rolebased access throttling if available doRoleBasedAccessThrottling(throttle, messageContext, isClusteringEnable); } else { // replicate the current state of ConcurrentAccessController if (isClusteringEnable) { try { if (debugOn) { log.debug( "Going to replicates the states of the ConcurrentAccessController" + " with key : " + key); } Replicator.replicate(cc, new String[] {key}); } catch (ClusteringFault clusteringFault) { log.error("Error during replicating states ", clusteringFault); } } throw new AxisFault( "Access has currently been denied since " + " maximum concurrent access have exceeded"); } }
/** Execute a query plan using Hadoop. */ @SuppressWarnings({"deprecation", "unchecked"}) @Override public int execute(DriverContext driverContext) { IOPrepareCache ioPrepareCache = IOPrepareCache.get(); ioPrepareCache.clear(); boolean success = true; Context ctx = driverContext.getCtx(); boolean ctxCreated = false; Path emptyScratchDir; MapWork mWork = work.getMapWork(); ReduceWork rWork = work.getReduceWork(); try { if (ctx == null) { ctx = new Context(job); ctxCreated = true; } emptyScratchDir = ctx.getMRTmpPath(); FileSystem fs = emptyScratchDir.getFileSystem(job); fs.mkdirs(emptyScratchDir); } catch (IOException e) { e.printStackTrace(); console.printError( "Error launching map-reduce job", "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); return 5; } HiveFileFormatUtils.prepareJobOutput(job); // See the javadoc on HiveOutputFormatImpl and HadoopShims.prepareJobOutput() job.setOutputFormat(HiveOutputFormatImpl.class); job.setMapperClass(ExecMapper.class); job.setMapOutputKeyClass(HiveKey.class); job.setMapOutputValueClass(BytesWritable.class); try { String partitioner = HiveConf.getVar(job, ConfVars.HIVEPARTITIONER); job.setPartitionerClass(JavaUtils.loadClass(partitioner)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } if (mWork.getNumMapTasks() != null) { job.setNumMapTasks(mWork.getNumMapTasks().intValue()); } if (mWork.getMaxSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMAXSPLITSIZE, mWork.getMaxSplitSize().longValue()); } if (mWork.getMinSplitSize() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZE, mWork.getMinSplitSize().longValue()); } if (mWork.getMinSplitSizePerNode() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERNODE, mWork.getMinSplitSizePerNode().longValue()); } if (mWork.getMinSplitSizePerRack() != null) { HiveConf.setLongVar( job, HiveConf.ConfVars.MAPREDMINSPLITSIZEPERRACK, mWork.getMinSplitSizePerRack().longValue()); } job.setNumReduceTasks(rWork != null ? rWork.getNumReduceTasks().intValue() : 0); job.setReducerClass(ExecReducer.class); // set input format information if necessary setInputAttributes(job); // Turn on speculative execution for reducers boolean useSpeculativeExecReducers = HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVESPECULATIVEEXECREDUCERS); HiveConf.setBoolVar( job, HiveConf.ConfVars.HADOOPSPECULATIVEEXECREDUCERS, useSpeculativeExecReducers); String inpFormat = HiveConf.getVar(job, HiveConf.ConfVars.HIVEINPUTFORMAT); if (mWork.isUseBucketizedHiveInputFormat()) { inpFormat = BucketizedHiveInputFormat.class.getName(); } LOG.info("Using " + inpFormat); try { job.setInputFormat(JavaUtils.loadClass(inpFormat)); } catch (ClassNotFoundException e) { throw new RuntimeException(e.getMessage(), e); } // No-Op - we don't really write anything here .. job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Transfer HIVEAUXJARS and HIVEADDEDJARS to "tmpjars" so hadoop understands // it String auxJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEAUXJARS); String addedJars = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDJARS); if (StringUtils.isNotBlank(auxJars) || StringUtils.isNotBlank(addedJars)) { String allJars = StringUtils.isNotBlank(auxJars) ? (StringUtils.isNotBlank(addedJars) ? addedJars + "," + auxJars : auxJars) : addedJars; LOG.info("adding libjars: " + allJars); initializeFiles("tmpjars", allJars); } // Transfer HIVEADDEDFILES to "tmpfiles" so hadoop understands it String addedFiles = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDFILES); if (StringUtils.isNotBlank(addedFiles)) { initializeFiles("tmpfiles", addedFiles); } int returnVal = 0; boolean noName = StringUtils.isEmpty(HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJOBNAME)); if (noName) { // This is for a special case to ensure unit tests pass HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } String addedArchives = HiveConf.getVar(job, HiveConf.ConfVars.HIVEADDEDARCHIVES); // Transfer HIVEADDEDARCHIVES to "tmparchives" so hadoop understands it if (StringUtils.isNotBlank(addedArchives)) { initializeFiles("tmparchives", addedArchives); } try { MapredLocalWork localwork = mWork.getMapRedLocalWork(); if (localwork != null && localwork.hasStagedAlias()) { if (!ShimLoader.getHadoopShims().isLocalMode(job)) { Path localPath = localwork.getTmpPath(); Path hdfsPath = mWork.getTmpHDFSPath(); FileSystem hdfs = hdfsPath.getFileSystem(job); FileSystem localFS = localPath.getFileSystem(job); FileStatus[] hashtableFiles = localFS.listStatus(localPath); int fileNumber = hashtableFiles.length; String[] fileNames = new String[fileNumber]; for (int i = 0; i < fileNumber; i++) { fileNames[i] = hashtableFiles[i].getPath().getName(); } // package and compress all the hashtable files to an archive file String stageId = this.getId(); String archiveFileName = Utilities.generateTarFileName(stageId); localwork.setStageID(stageId); CompressionUtils.tar(localPath.toUri().getPath(), fileNames, archiveFileName); Path archivePath = Utilities.generateTarPath(localPath, stageId); LOG.info("Archive " + hashtableFiles.length + " hash table files to " + archivePath); // upload archive file to hdfs Path hdfsFilePath = Utilities.generateTarPath(hdfsPath, stageId); short replication = (short) job.getInt("mapred.submit.replication", 10); hdfs.copyFromLocalFile(archivePath, hdfsFilePath); hdfs.setReplication(hdfsFilePath, replication); LOG.info("Upload 1 archive file from" + archivePath + " to: " + hdfsFilePath); // add the archive file to distributed cache DistributedCache.createSymlink(job); DistributedCache.addCacheArchive(hdfsFilePath.toUri(), job); LOG.info( "Add 1 archive file to distributed cache. Archive file: " + hdfsFilePath.toUri()); } } work.configureJobConf(job); List<Path> inputPaths = Utilities.getInputPaths(job, mWork, emptyScratchDir, ctx, false); Utilities.setInputPaths(job, inputPaths); Utilities.setMapRedWork(job, work, ctx.getMRTmpPath()); if (mWork.getSamplingType() > 0 && rWork != null && job.getNumReduceTasks() > 1) { try { handleSampling(ctx, mWork, job); job.setPartitionerClass(HiveTotalOrderPartitioner.class); } catch (IllegalStateException e) { console.printInfo("Not enough sampling data.. Rolling back to single reducer task"); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } catch (Exception e) { LOG.error("Sampling error", e); console.printError( e.toString(), "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); rWork.setNumReduceTasks(1); job.setNumReduceTasks(1); } } // remove the pwd from conf file so that job tracker doesn't show this // logs String pwd = HiveConf.getVar(job, HiveConf.ConfVars.METASTOREPWD); if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE"); } JobClient jc = new JobClient(job); // make this client wait if job tracker is not behaving well. Throttle.checkJobTracker(job, LOG); if (mWork.isGatheringStats() || (rWork != null && rWork.isGatheringStats())) { // initialize stats publishing table StatsPublisher statsPublisher; StatsFactory factory = StatsFactory.newFactory(job); if (factory != null) { statsPublisher = factory.getStatsPublisher(); List<String> statsTmpDir = Utilities.getStatsTmpDirs(mWork, job); if (rWork != null) { statsTmpDir.addAll(Utilities.getStatsTmpDirs(rWork, job)); } StatsCollectionContext sc = new StatsCollectionContext(job); sc.setStatsTmpDirs(statsTmpDir); if (!statsPublisher.init(sc)) { // creating stats table if not exists if (HiveConf.getBoolVar(job, HiveConf.ConfVars.HIVE_STATS_RELIABLE)) { throw new HiveException( ErrorMsg.STATSPUBLISHER_INITIALIZATION_ERROR.getErrorCodedMsg()); } } } } Utilities.createTmpDirs(job, mWork); Utilities.createTmpDirs(job, rWork); SessionState ss = SessionState.get(); if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") && ss != null) { TezSessionState session = ss.getTezSession(); TezSessionPoolManager.getInstance().close(session, true); } // Finally SUBMIT the JOB! rj = jc.submitJob(job); // replace it back if (pwd != null) { HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, pwd); } returnVal = jobExecHelper.progress(rj, jc, ctx.getHiveTxnManager()); success = (returnVal == 0); } catch (Exception e) { e.printStackTrace(); String mesg = " with exception '" + Utilities.getNameMessage(e) + "'"; if (rj != null) { mesg = "Ended Job = " + rj.getJobID() + mesg; } else { mesg = "Job Submission failed" + mesg; } // Has to use full name to make sure it does not conflict with // org.apache.commons.lang.StringUtils console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); success = false; returnVal = 1; } finally { Utilities.clearWork(job); try { if (ctxCreated) { ctx.clear(); } if (rj != null) { if (returnVal != 0) { rj.killJob(); } jobID = rj.getID().toString(); } } catch (Exception e) { LOG.warn("Failed while cleaning up ", e); } finally { HadoopJobExecHelper.runningJobs.remove(rj); } } // get the list of Dynamic partition paths try { if (rj != null) { if (mWork.getAliasToWork() != null) { for (Operator<? extends OperatorDesc> op : mWork.getAliasToWork().values()) { op.jobClose(job, success); } } if (rWork != null) { rWork.getReducer().jobClose(job, success); } } } catch (Exception e) { // jobClose needs to execute successfully otherwise fail task if (success) { success = false; returnVal = 3; String mesg = "Job Commit failed with exception '" + Utilities.getNameMessage(e) + "'"; console.printError(mesg, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } return (returnVal); }