public static void setupDistributedCache(
      Configuration conf, Map<String, LocalResource> localResources) throws IOException {

    // Cache archives
    parseDistributedCacheArtifacts(
        conf,
        localResources,
        LocalResourceType.ARCHIVE,
        DistributedCache.getCacheArchives(conf),
        parseTimeStamps(DistributedCache.getArchiveTimestamps(conf)),
        getFileSizes(conf, MRJobConfig.CACHE_ARCHIVES_SIZES),
        DistributedCache.getArchiveVisibilities(conf),
        DistributedCache.getArchiveClassPaths(conf));

    // Cache files
    parseDistributedCacheArtifacts(
        conf,
        localResources,
        LocalResourceType.FILE,
        DistributedCache.getCacheFiles(conf),
        parseTimeStamps(DistributedCache.getFileTimestamps(conf)),
        getFileSizes(conf, MRJobConfig.CACHE_FILES_SIZES),
        DistributedCache.getFileVisibilities(conf),
        DistributedCache.getFileClassPaths(conf));
  }
Exemple #2
0
  /**
   * ���� 1 ����·�� 2 ��������ݵ����ID 3 ������������� 4 com.mysql.jdbc.Driver 5
   * jdbc:mysql://127.0.0.1:3306/etl 6 user for mysql 7 password for mysql
   */
  public static void main(String args[])
      throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    DistributedCache.addFileToClassPath(
        new Path("/user/hdfs/examples/libs/mysql-connector-java-5.1.22-bin.jar"), conf);
    if (otherArgs.length != 7) {
      System.out.println("args:" + otherArgs.length + ",is should be 3");
      for (int i = 0; i < otherArgs.length; i++) {
        System.out.println(otherArgs[i]);
      }
      System.exit(7);
    }
    conf.set("datasetid", otherArgs[1]);
    conf.set("sampleNum", otherArgs[2]);
    DBConfiguration.configureDB(conf, otherArgs[3], otherArgs[4], otherArgs[5], otherArgs[6]);
    Job job = new Job(conf, "sampleToMySql");
    job.setJarByClass(toMysql.class);

    job.setMapperClass(ConnMysqlMapper.class);
    job.setReducerClass(ConnMysqlReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(DBOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

    DBOutputFormat.setOutput(job, "sofa_wf_sample", "datasetid", "columns", "deltag");
    System.exit(job.waitForCompletion(true) ? 0 : 1);
  }
  // configures -files, -libjars and -archives.
  //// default replication is 10
  private void copyAndConfigureFiles(Job job, Path submitJobDir, short replication)
      throws IOException {
    Configuration conf = job.getConfiguration();
    if (!(conf.getBoolean(Job.USED_GENERIC_PARSER, false))) {
      LOG.warn(
          "Use GenericOptionsParser for parsing the arguments. "
              + "Applications should implement Tool for the same.");
    }

    // get all the command line arguments passed in by the user conf
    String files = conf.get("tmpfiles");
    String libjars = conf.get("tmpjars");
    String archives = conf.get("tmparchives");
    //// "mapreduce.job.jar"
    String jobJar = job.getJar();

    //
    // Figure out what fs the JobTracker is using.  Copy the
    // job to it, under a temporary name.  This allows DFS to work,
    // and under the local fs also provides UNIX-like object loading
    // semantics.  (that is, if the job file is deleted right after
    // submission, we can still run the submission to completion)
    //

    // Create a number of filenames in the JobTracker's fs namespace
    LOG.debug("default FileSystem: " + jtFs.getUri());
    if (jtFs.exists(submitJobDir)) {
      throw new IOException(
          "Not submitting job. Job directory "
              + submitJobDir
              + " already exists!! This is unexpected.Please check what's there in"
              + " that directory");
    }
    submitJobDir = jtFs.makeQualified(submitJobDir);
    submitJobDir = new Path(submitJobDir.toUri().getPath());
    FsPermission mapredSysPerms = new FsPermission(JobSubmissionFiles.JOB_DIR_PERMISSION);
    FileSystem.mkdirs(jtFs, submitJobDir, mapredSysPerms);
    Path filesDir = JobSubmissionFiles.getJobDistCacheFiles(submitJobDir);
    Path archivesDir = JobSubmissionFiles.getJobDistCacheArchives(submitJobDir);
    Path libjarsDir = JobSubmissionFiles.getJobDistCacheLibjars(submitJobDir);
    // add all the command line files/ jars and archive
    // first copy them to jobtrackers filesystem

    if (files != null) {
      FileSystem.mkdirs(jtFs, filesDir, mapredSysPerms);
      String[] fileArr = files.split(",");
      for (String tmpFile : fileArr) {
        URI tmpURI = null;
        try {
          tmpURI = new URI(tmpFile);
        } catch (URISyntaxException e) {
          throw new IllegalArgumentException(e);
        }
        Path tmp = new Path(tmpURI);
        Path newPath = copyRemoteFiles(filesDir, tmp, conf, replication);
        try {
          URI pathURI = getPathURI(newPath, tmpURI.getFragment());
          DistributedCache.addCacheFile(pathURI, conf);
        } catch (URISyntaxException ue) {
          // should not throw a uri exception
          throw new IOException("Failed to create uri for " + tmpFile, ue);
        }
        DistributedCache.createSymlink(conf);
      }
    }

    if (libjars != null) {
      FileSystem.mkdirs(jtFs, libjarsDir, mapredSysPerms);
      String[] libjarsArr = libjars.split(",");
      for (String tmpjars : libjarsArr) {
        Path tmp = new Path(tmpjars);
        Path newPath = copyRemoteFiles(libjarsDir, tmp, conf, replication);
        DistributedCache.addFileToClassPath(new Path(newPath.toUri().getPath()), conf);
      }
    }

    if (archives != null) {
      FileSystem.mkdirs(jtFs, archivesDir, mapredSysPerms);
      String[] archivesArr = archives.split(",");
      for (String tmpArchives : archivesArr) {
        URI tmpURI;
        try {
          tmpURI = new URI(tmpArchives);
        } catch (URISyntaxException e) {
          throw new IllegalArgumentException(e);
        }
        Path tmp = new Path(tmpURI);
        Path newPath = copyRemoteFiles(archivesDir, tmp, conf, replication);
        try {
          URI pathURI = getPathURI(newPath, tmpURI.getFragment());
          DistributedCache.addCacheArchive(pathURI, conf);
        } catch (URISyntaxException ue) {
          // should not throw an uri excpetion
          throw new IOException("Failed to create uri for " + tmpArchives, ue);
        }
        DistributedCache.createSymlink(conf);
      }
    }

    if (jobJar != null) { // copy jar to JobTracker's fs
      // use jar name if job is not named.
      if ("".equals(job.getJobName())) {
        job.setJobName(new Path(jobJar).getName());
      }
      copyJar(new Path(jobJar), JobSubmissionFiles.getJobJar(submitJobDir), replication);
      job.setJar(JobSubmissionFiles.getJobJar(submitJobDir).toString());
    } else {
      LOG.warn(
          "No job jar file set.  User classes may not be found. "
              + "See Job or Job#setJar(String).");
    }

    //  set the timestamps of the archives and files
    ClientDistributedCacheManager.determineTimestamps(conf);
    //  set the public/private visibility of the archives and files
    ClientDistributedCacheManager.determineCacheVisibilities(conf);
    // get DelegationToken for each cached file
    ClientDistributedCacheManager.getDelegationTokens(conf, job.getCredentials());
  }