public static void setupDistributedCache( Configuration conf, Map<String, LocalResource> localResources) throws IOException { // Cache archives parseDistributedCacheArtifacts( conf, localResources, LocalResourceType.ARCHIVE, DistributedCache.getCacheArchives(conf), parseTimeStamps(DistributedCache.getArchiveTimestamps(conf)), getFileSizes(conf, MRJobConfig.CACHE_ARCHIVES_SIZES), DistributedCache.getArchiveVisibilities(conf), DistributedCache.getArchiveClassPaths(conf)); // Cache files parseDistributedCacheArtifacts( conf, localResources, LocalResourceType.FILE, DistributedCache.getCacheFiles(conf), parseTimeStamps(DistributedCache.getFileTimestamps(conf)), getFileSizes(conf, MRJobConfig.CACHE_FILES_SIZES), DistributedCache.getFileVisibilities(conf), DistributedCache.getFileClassPaths(conf)); }
/** * ���� 1 ����·�� 2 ��������ݵ����ID 3 ������������� 4 com.mysql.jdbc.Driver 5 * jdbc:mysql://127.0.0.1:3306/etl 6 user for mysql 7 password for mysql */ public static void main(String args[]) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); DistributedCache.addFileToClassPath( new Path("/user/hdfs/examples/libs/mysql-connector-java-5.1.22-bin.jar"), conf); if (otherArgs.length != 7) { System.out.println("args:" + otherArgs.length + ",is should be 3"); for (int i = 0; i < otherArgs.length; i++) { System.out.println(otherArgs[i]); } System.exit(7); } conf.set("datasetid", otherArgs[1]); conf.set("sampleNum", otherArgs[2]); DBConfiguration.configureDB(conf, otherArgs[3], otherArgs[4], otherArgs[5], otherArgs[6]); Job job = new Job(conf, "sampleToMySql"); job.setJarByClass(toMysql.class); job.setMapperClass(ConnMysqlMapper.class); job.setReducerClass(ConnMysqlReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(DBOutputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); DBOutputFormat.setOutput(job, "sofa_wf_sample", "datasetid", "columns", "deltag"); System.exit(job.waitForCompletion(true) ? 0 : 1); }
// configures -files, -libjars and -archives. //// default replication is 10 private void copyAndConfigureFiles(Job job, Path submitJobDir, short replication) throws IOException { Configuration conf = job.getConfiguration(); if (!(conf.getBoolean(Job.USED_GENERIC_PARSER, false))) { LOG.warn( "Use GenericOptionsParser for parsing the arguments. " + "Applications should implement Tool for the same."); } // get all the command line arguments passed in by the user conf String files = conf.get("tmpfiles"); String libjars = conf.get("tmpjars"); String archives = conf.get("tmparchives"); //// "mapreduce.job.jar" String jobJar = job.getJar(); // // Figure out what fs the JobTracker is using. Copy the // job to it, under a temporary name. This allows DFS to work, // and under the local fs also provides UNIX-like object loading // semantics. (that is, if the job file is deleted right after // submission, we can still run the submission to completion) // // Create a number of filenames in the JobTracker's fs namespace LOG.debug("default FileSystem: " + jtFs.getUri()); if (jtFs.exists(submitJobDir)) { throw new IOException( "Not submitting job. Job directory " + submitJobDir + " already exists!! This is unexpected.Please check what's there in" + " that directory"); } submitJobDir = jtFs.makeQualified(submitJobDir); submitJobDir = new Path(submitJobDir.toUri().getPath()); FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION); FileSystem.mkdirs(jtFs, submitJobDir, mapredSysPerms); Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir); Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir); Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir); // add all the command line files/ jars and archive // first copy them to jobtrackers filesystem if (files != null) { FileSystem.mkdirs(jtFs, filesDir, mapredSysPerms); String[] fileArr = files.split(","); for (String tmpFile : fileArr) { URI tmpURI = null; try { tmpURI = new URI(tmpFile); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = copyRemoteFiles(filesDir, tmp, conf, replication); try { URI pathURI = getPathURI(newPath, tmpURI.getFragment()); DistributedCache.addCacheFile(pathURI, conf); } catch (URISyntaxException ue) { // should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } DistributedCache.createSymlink(conf); } } if (libjars != null) { FileSystem.mkdirs(jtFs, libjarsDir, mapredSysPerms); String[] libjarsArr = libjars.split(","); for (String tmpjars : libjarsArr) { Path tmp = new Path(tmpjars); Path newPath = copyRemoteFiles(libjarsDir, tmp, conf, replication); DistributedCache.addFileToClassPath(new Path(newPath.toUri().getPath()), conf); } } if (archives != null) { FileSystem.mkdirs(jtFs, archivesDir, mapredSysPerms); String[] archivesArr = archives.split(","); for (String tmpArchives : archivesArr) { URI tmpURI; try { tmpURI = new URI(tmpArchives); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path tmp = new Path(tmpURI); Path newPath = copyRemoteFiles(archivesDir, tmp, conf, replication); try { URI pathURI = getPathURI(newPath, tmpURI.getFragment()); DistributedCache.addCacheArchive(pathURI, conf); } catch (URISyntaxException ue) { // should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } DistributedCache.createSymlink(conf); } } if (jobJar != null) { // copy jar to JobTracker's fs // use jar name if job is not named. if ("".equals(job.getJobName())) { job.setJobName(new Path(jobJar).getName()); } copyJar(new Path(jobJar), JobSubmissionFiles.getJobJar(submitJobDir), replication); job.setJar(JobSubmissionFiles.getJobJar(submitJobDir).toString()); } else { LOG.warn( "No job jar file set. User classes may not be found. " + "See Job or Job#setJar(String)."); } // set the timestamps of the archives and files ClientDistributedCacheManager.determineTimestamps(conf); // set the public/private visibility of the archives and files ClientDistributedCacheManager.determineCacheVisibilities(conf); // get DelegationToken for each cached file ClientDistributedCacheManager.getDelegationTokens(conf, job.getCredentials()); }