示例#1
0
  public static void main(String[] args) throws Exception {
    String inputDirectory = "/home/cs246/Desktop/HW2/input";
    String outputDirectory = "/home/cs246/Desktop/HW2/output";
    String centroidDirectory = "/home/cs246/Desktop/HW2/config";

    int iterations = 20;

    for (int i = 1; i <= iterations; i++) {
      Configuration conf = new Configuration();

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      String cFile = centroidDirectory + "/c" + i + ".txt";
      String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt";
      conf.set("CFILE", cFile);
      conf.set("NEXTCFILE", nextCFile);

      Job job = new Job(conf, "HW2_Q4." + i);
      job.setJarByClass(HW2_Q4.class);
      job.setOutputKeyClass(IntWritable.class);
      job.setOutputValueClass(Text.class);
      job.setMapperClass(Map1.class);
      job.setReducerClass(Reduce1.class);
      job.setInputFormatClass(TextInputFormat.class);
      job.setOutputFormatClass(TextOutputFormat.class);

      FileInputFormat.addInputPath(job, new Path(inputDirectory));
      FileOutputFormat.setOutputPath(job, new Path(outputDirectory + "/output" + i));

      job.waitForCompletion(true);
    }
  }
示例#2
0
  /** Initialize SecondaryNameNode. */
  private void initialize(Configuration conf) throws IOException {
    // initiate Java VM metrics
    JvmMetrics.init("SecondaryNameNode", conf.get("session.id"));

    // Create connection to the namenode.
    shouldRun = true;
    nameNodeAddr = NameNode.getAddress(conf);

    this.conf = conf;
    this.namenode =
        (NamenodeProtocol)
            RPC.waitForProxy(
                NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf);

    // initialize checkpoint directories
    fsName = getInfoServer();
    checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary");
    checkpointImage = new CheckpointStorage(conf);
    checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs);

    // Initialize other scheduling parameters from the configuration
    checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600);
    checkpointSize = conf.getLong("fs.checkpoint.size", 4194304);

    // initialize the webserver for uploading files.
    String infoAddr =
        NetUtils.getServerAddress(
            conf,
            "dfs.secondary.info.bindAddress",
            "dfs.secondary.info.port",
            "dfs.secondary.http.address");
    InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr);
    infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("name.system.image", checkpointImage);
    this.infoServer.setAttribute("name.conf", conf);
    infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class);
    infoServer.start();

    // The web-server port can be ephemeral... ensure we have the correct info
    infoPort = infoServer.getPort();
    conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort);
    LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort);
    LOG.warn(
        "Checkpoint Period   :"
            + checkpointPeriod
            + " secs "
            + "("
            + checkpointPeriod / 60
            + " min)");
    LOG.warn(
        "Log Size Trigger    :"
            + checkpointSize
            + " bytes "
            + "("
            + checkpointSize / 1024
            + " KB)");
  }
 /**
  * Add a file path to the current set of classpath entries. It adds the file to cache as well.
  * Intended to be used by user code.
  *
  * @param file Path of the file to be added
  * @param conf Configuration that contains the classpath setting
  * @param fs FileSystem with respect to which {@code archivefile} should be interpreted.
  */
 public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs)
     throws IOException {
   String filepath = file.toUri().getPath();
   String classpath = conf.get("mapred.job.classpath.files");
   conf.set(
       "mapred.job.classpath.files",
       classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath);
   URI uri = fs.makeQualified(file).toUri();
   addCacheFile(uri, conf);
 }
  /**
   * Add an archive path to the current set of classpath entries. It adds the archive to cache as
   * well. Intended to be used by user code.
   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @param fs FileSystem with respect to which {@code archive} should be interpreted.
   */
  public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs)
      throws IOException {
    String archivepath = archive.toUri().getPath();
    String classpath = conf.get("mapred.job.classpath.archives");
    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivepath
            : classpath + System.getProperty("path.separator") + archivepath);
    URI uri = fs.makeQualified(archive).toUri();

    addCacheArchive(uri, conf);
  }
  @Override
  public int run(String[] args) throws Exception {

    String locatorHost = args[0];
    int locatorPort = Integer.parseInt(args[1]);
    String hdfsHomeDir = args[2];

    System.out.println(
        "KnownKeysMRv2 invoked with args (locatorHost = "
            + locatorHost
            + " locatorPort = "
            + locatorPort
            + " hdfsHomeDir = "
            + hdfsHomeDir);

    Configuration conf = getConf();
    conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion");
    conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir);
    conf.setBoolean(GFInputFormat.CHECKPOINT, false);
    conf.set(GFOutputFormat.REGION, "validationRegion");
    conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost);
    conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort);

    Job job = Job.getInstance(conf, "knownKeysMRv2");
    job.setInputFormatClass(GFInputFormat.class);
    job.setOutputFormatClass(GFOutputFormat.class);

    job.setMapperClass(KnownKeysMRv2Mapper.class);
    job.setMapOutputKeyClass(GFKey.class);
    job.setMapOutputValueClass(PEIWritable.class);

    job.setReducerClass(KnownKeysMRv2Reducer.class);
    // job.setOutputKeyClass(String.class);
    // job.setOutputValueClass(ValueHolder.class);

    return job.waitForCompletion(false) ? 0 : 1;
  }
示例#6
0
  private static URI addArchiveToClassPathHelper(Path archive, Configuration conf)
      throws IOException {

    String classpath = conf.get("mapred.job.classpath.archives");

    // the scheme/authority use ':' as separator. put the unqualified path in classpath
    String archivePath = archive.toUri().getPath();

    conf.set(
        "mapred.job.classpath.archives",
        classpath == null
            ? archivePath
            : classpath + System.getProperty("path.separator") + archivePath);
    return archive.makeQualified(archive.getFileSystem(conf)).toUri();
  }
示例#7
0
 /**
  * This is to check the timestamp of the archives to be localized
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of archives. The order should be the same
  *     as the order in which the archives are added.
  */
 public static void setArchiveTimestamps(Configuration conf, String timestamps) {
   conf.set("mapred.cache.archives.timestamps", timestamps);
 }
示例#8
0
 /**
  * Set the configuration with the given set of files
  *
  * @param files The list of files that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheFiles(URI[] files, Configuration conf) {
   String sfiles = StringUtils.uriToString(files);
   conf.set("mapred.cache.files", sfiles);
 }
示例#9
0
 /**
  * Set the configuration with the given set of archives
  *
  * @param archives The list of archives that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheArchives(URI[] archives, Configuration conf) {
   String sarchives = StringUtils.uriToString(archives);
   conf.set("mapred.cache.archives", sarchives);
 }
 /**
  * Add a archive that has been localized to the conf. Used by internal DistributedCache code.
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local archives
  */
 public static void addLocalArchives(Configuration conf, String str) {
   String archives = conf.get(CACHE_LOCALARCHIVES);
   conf.set(CACHE_LOCALARCHIVES, archives == null ? str : archives + "," + str);
 }
示例#11
0
 FileDataGenNew(String HDFSMaster) {
   fsConf.set("fs.default.name", HDFSMaster);
 }
示例#12
0
 /**
  * Add a archives to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addSharedCacheArchive(URI uri, Configuration conf) {
   String archives = conf.get("mapred.cache.shared.archives");
   conf.set(
       "mapred.cache.shared.archives",
       archives == null ? uri.toString() : archives + "," + uri.toString());
 }
示例#13
0
 /**
  * Set the conf to contain the location for localized archives
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local archives
  */
 public static void setLocalArchives(Configuration conf, String str) {
   conf.set("mapred.cache.localArchives", str);
 }
 /**
  * This method allows you to create symlinks in the current working directory of the task to all
  * the cache files/archives. Intended to be used by user code.
  *
  * @param conf the jobconf
  */
 public static void createSymlink(Configuration conf) {
   conf.set(CACHE_SYMLINK, "yes");
 }
 /**
  * This is to check the timestamp of the files to be localized. Used by internal MapReduce code.
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of files. The order should be the same as
  *     the order in which the files are added.
  */
 public static void setFileTimestamps(Configuration conf, String timestamps) {
   conf.set(CACHE_FILES_TIMESTAMPS, timestamps);
 }
 /**
  * Set the conf to contain the location for localized archives. Used by internal DistributedCache
  * code.
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local archives
  */
 public static void setLocalArchives(Configuration conf, String str) {
   conf.set(CACHE_LOCALARCHIVES, str);
 }
 /**
  * Add a file to be localized to the conf. Intended to be used by user code.
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheFile(URI uri, Configuration conf) {
   String files = conf.get(CACHE_FILES);
   conf.set(CACHE_FILES, files == null ? uri.toString() : files + "," + uri.toString());
 }
 /**
  * Add a archives to be localized to the conf. Intended to be used by user code.
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheArchive(URI uri, Configuration conf) {
   String archives = conf.get(CACHE_ARCHIVES);
   conf.set(CACHE_ARCHIVES, archives == null ? uri.toString() : archives + "," + uri.toString());
 }
 /**
  * Add a file that has been localized to the conf.. Used by internal DistributedCache code.
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local files
  */
 public static void addLocalFiles(Configuration conf, String str) {
   String files = conf.get(CACHE_LOCALFILES);
   conf.set(CACHE_LOCALFILES, files == null ? str : files + "," + str);
 }
示例#20
0
 /**
  * This is to check the timestamp of the files to be localized
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of files. The order should be the same as
  *     the order in which the files are added.
  */
 public static void setFileTimestamps(Configuration conf, String timestamps) {
   conf.set("mapred.cache.files.timestamps", timestamps);
 }
示例#21
0
 public static void setSharedFileLength(Configuration conf, String length) {
   conf.set("mapred.cache.shared.files.length", length);
 }
 /**
  * This is to check the timestamp of the archives to be localized. Used by internal MapReduce
  * code.
  *
  * @param conf Configuration which stores the timestamp's
  * @param timestamps comma separated list of timestamps of archives. The order should be the same
  *     as the order in which the archives are added.
  */
 public static void setArchiveTimestamps(Configuration conf, String timestamps) {
   conf.set(CACHE_ARCHIVES_TIMESTAMPS, timestamps);
 }
示例#23
0
 /**
  * Set the conf to contain the location for localized files
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local files
  */
 public static void setLocalSharedFiles(Configuration conf, String str) {
   conf.set("mapred.cache.shared.localFiles", str);
 }
 /**
  * Set the configuration with the given set of files. Intended to be used by user code.
  *
  * @param files The list of files that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheFiles(URI[] files, Configuration conf) {
   String sfiles = StringUtils.uriToString(files);
   conf.set(CACHE_FILES, sfiles);
 }
示例#25
0
 /**
  * Add a file to be localized to the conf
  *
  * @param uri The uri of the cache to be localized
  * @param conf Configuration to add the cache to
  */
 public static void addCacheFile(URI uri, Configuration conf) {
   String files = conf.get("mapred.cache.files");
   conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString());
 }
示例#26
0
 /**
  * This method allows you to create symlinks in the current working directory of the task to all
  * the cache files/archives
  *
  * @param conf the jobconf
  */
 public static void createSymlink(Configuration conf) {
   conf.set("mapred.create.symlink", "yes");
 }
 /**
  * Set the configuration with the given set of archives. Intended to be used by user code.
  *
  * @param archives The list of archives that need to be localized
  * @param conf Configuration which will be changed
  */
 public static void setCacheArchives(URI[] archives, Configuration conf) {
   String sarchives = StringUtils.uriToString(archives);
   conf.set(CACHE_ARCHIVES, sarchives);
 }
 /**
  * Set the conf to contain the location for localized files. Used by internal DistributedCache
  * code.
  *
  * @param conf The conf to modify to contain the localized caches
  * @param str a comma separated list of local files
  */
 public static void setLocalFiles(Configuration conf, String str) {
   conf.set(CACHE_LOCALFILES, str);
 }