public static void main(String[] args) throws Exception { String inputDirectory = "/home/cs246/Desktop/HW2/input"; String outputDirectory = "/home/cs246/Desktop/HW2/output"; String centroidDirectory = "/home/cs246/Desktop/HW2/config"; int iterations = 20; for (int i = 1; i <= iterations; i++) { Configuration conf = new Configuration(); String cFile = centroidDirectory + "/c" + i + ".txt"; String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt"; conf.set("CFILE", cFile); conf.set("NEXTCFILE", nextCFile); String cFile = centroidDirectory + "/c" + i + ".txt"; String nextCFile = centroidDirectory + "/c" + (i + 1) + ".txt"; conf.set("CFILE", cFile); conf.set("NEXTCFILE", nextCFile); Job job = new Job(conf, "HW2_Q4." + i); job.setJarByClass(HW2_Q4.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map1.class); job.setReducerClass(Reduce1.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(inputDirectory)); FileOutputFormat.setOutputPath(job, new Path(outputDirectory + "/output" + i)); job.waitForCompletion(true); } }
/** Initialize SecondaryNameNode. */ private void initialize(Configuration conf) throws IOException { // initiate Java VM metrics JvmMetrics.init("SecondaryNameNode", conf.get("session.id")); // Create connection to the namenode. shouldRun = true; nameNodeAddr = NameNode.getAddress(conf); this.conf = conf; this.namenode = (NamenodeProtocol) RPC.waitForProxy( NamenodeProtocol.class, NamenodeProtocol.versionID, nameNodeAddr, conf); // initialize checkpoint directories fsName = getInfoServer(); checkpointDirs = FSImage.getCheckpointDirs(conf, "/tmp/hadoop/dfs/namesecondary"); checkpointEditsDirs = FSImage.getCheckpointEditsDirs(conf, "/tmp/hadoop/dfs/namesecondary"); checkpointImage = new CheckpointStorage(conf); checkpointImage.recoverCreate(checkpointDirs, checkpointEditsDirs); // Initialize other scheduling parameters from the configuration checkpointPeriod = conf.getLong("fs.checkpoint.period", 3600); checkpointSize = conf.getLong("fs.checkpoint.size", 4194304); // initialize the webserver for uploading files. String infoAddr = NetUtils.getServerAddress( conf, "dfs.secondary.info.bindAddress", "dfs.secondary.info.port", "dfs.secondary.http.address"); InetSocketAddress infoSocAddr = NetUtils.createSocketAddr(infoAddr); infoBindAddress = infoSocAddr.getHostName(); int tmpInfoPort = infoSocAddr.getPort(); infoServer = new HttpServer("secondary", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf); infoServer.setAttribute("name.system.image", checkpointImage); this.infoServer.setAttribute("name.conf", conf); infoServer.addInternalServlet("getimage", "/getimage", GetImageServlet.class); infoServer.start(); // The web-server port can be ephemeral... ensure we have the correct info infoPort = infoServer.getPort(); conf.set("dfs.secondary.http.address", infoBindAddress + ":" + infoPort); LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" + infoPort); LOG.warn( "Checkpoint Period :" + checkpointPeriod + " secs " + "(" + checkpointPeriod / 60 + " min)"); LOG.warn( "Log Size Trigger :" + checkpointSize + " bytes " + "(" + checkpointSize / 1024 + " KB)"); }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archivefile} should be interpreted. */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); addCacheFile(uri, conf); }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
@Override public int run(String[] args) throws Exception { String locatorHost = args[0]; int locatorPort = Integer.parseInt(args[1]); String hdfsHomeDir = args[2]; System.out.println( "KnownKeysMRv2 invoked with args (locatorHost = " + locatorHost + " locatorPort = " + locatorPort + " hdfsHomeDir = " + hdfsHomeDir); Configuration conf = getConf(); conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion"); conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir); conf.setBoolean(GFInputFormat.CHECKPOINT, false); conf.set(GFOutputFormat.REGION, "validationRegion"); conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost); conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort); Job job = Job.getInstance(conf, "knownKeysMRv2"); job.setInputFormatClass(GFInputFormat.class); job.setOutputFormatClass(GFOutputFormat.class); job.setMapperClass(KnownKeysMRv2Mapper.class); job.setMapOutputKeyClass(GFKey.class); job.setMapOutputValueClass(PEIWritable.class); job.setReducerClass(KnownKeysMRv2Reducer.class); // job.setOutputKeyClass(String.class); // job.setOutputValueClass(ValueHolder.class); return job.waitForCompletion(false) ? 0 : 1; }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
/** * This is to check the timestamp of the archives to be localized * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of archives. The order should be the same * as the order in which the archives are added. */ public static void setArchiveTimestamps(Configuration conf, String timestamps) { conf.set("mapred.cache.archives.timestamps", timestamps); }
/** * Set the configuration with the given set of files * * @param files The list of files that need to be localized * @param conf Configuration which will be changed */ public static void setCacheFiles(URI[] files, Configuration conf) { String sfiles = StringUtils.uriToString(files); conf.set("mapred.cache.files", sfiles); }
/** * Set the configuration with the given set of archives * * @param archives The list of archives that need to be localized * @param conf Configuration which will be changed */ public static void setCacheArchives(URI[] archives, Configuration conf) { String sarchives = StringUtils.uriToString(archives); conf.set("mapred.cache.archives", sarchives); }
/** * Add a archive that has been localized to the conf. Used by internal DistributedCache code. * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local archives */ public static void addLocalArchives(Configuration conf, String str) { String archives = conf.get(CACHE_LOCALARCHIVES); conf.set(CACHE_LOCALARCHIVES, archives == null ? str : archives + "," + str); }
FileDataGenNew(String HDFSMaster) { fsConf.set("fs.default.name", HDFSMaster); }
/** * Add a archives to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addSharedCacheArchive(URI uri, Configuration conf) { String archives = conf.get("mapred.cache.shared.archives"); conf.set( "mapred.cache.shared.archives", archives == null ? uri.toString() : archives + "," + uri.toString()); }
/** * Set the conf to contain the location for localized archives * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local archives */ public static void setLocalArchives(Configuration conf, String str) { conf.set("mapred.cache.localArchives", str); }
/** * This method allows you to create symlinks in the current working directory of the task to all * the cache files/archives. Intended to be used by user code. * * @param conf the jobconf */ public static void createSymlink(Configuration conf) { conf.set(CACHE_SYMLINK, "yes"); }
/** * This is to check the timestamp of the files to be localized. Used by internal MapReduce code. * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of files. The order should be the same as * the order in which the files are added. */ public static void setFileTimestamps(Configuration conf, String timestamps) { conf.set(CACHE_FILES_TIMESTAMPS, timestamps); }
/** * Set the conf to contain the location for localized archives. Used by internal DistributedCache * code. * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local archives */ public static void setLocalArchives(Configuration conf, String str) { conf.set(CACHE_LOCALARCHIVES, str); }
/** * Add a file to be localized to the conf. Intended to be used by user code. * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheFile(URI uri, Configuration conf) { String files = conf.get(CACHE_FILES); conf.set(CACHE_FILES, files == null ? uri.toString() : files + "," + uri.toString()); }
/** * Add a archives to be localized to the conf. Intended to be used by user code. * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheArchive(URI uri, Configuration conf) { String archives = conf.get(CACHE_ARCHIVES); conf.set(CACHE_ARCHIVES, archives == null ? uri.toString() : archives + "," + uri.toString()); }
/** * Add a file that has been localized to the conf.. Used by internal DistributedCache code. * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local files */ public static void addLocalFiles(Configuration conf, String str) { String files = conf.get(CACHE_LOCALFILES); conf.set(CACHE_LOCALFILES, files == null ? str : files + "," + str); }
/** * This is to check the timestamp of the files to be localized * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of files. The order should be the same as * the order in which the files are added. */ public static void setFileTimestamps(Configuration conf, String timestamps) { conf.set("mapred.cache.files.timestamps", timestamps); }
public static void setSharedFileLength(Configuration conf, String length) { conf.set("mapred.cache.shared.files.length", length); }
/** * This is to check the timestamp of the archives to be localized. Used by internal MapReduce * code. * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of archives. The order should be the same * as the order in which the archives are added. */ public static void setArchiveTimestamps(Configuration conf, String timestamps) { conf.set(CACHE_ARCHIVES_TIMESTAMPS, timestamps); }
/** * Set the conf to contain the location for localized files * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local files */ public static void setLocalSharedFiles(Configuration conf, String str) { conf.set("mapred.cache.shared.localFiles", str); }
/** * Set the configuration with the given set of files. Intended to be used by user code. * * @param files The list of files that need to be localized * @param conf Configuration which will be changed */ public static void setCacheFiles(URI[] files, Configuration conf) { String sfiles = StringUtils.uriToString(files); conf.set(CACHE_FILES, sfiles); }
/** * Add a file to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheFile(URI uri, Configuration conf) { String files = conf.get("mapred.cache.files"); conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString()); }
/** * This method allows you to create symlinks in the current working directory of the task to all * the cache files/archives * * @param conf the jobconf */ public static void createSymlink(Configuration conf) { conf.set("mapred.create.symlink", "yes"); }
/** * Set the configuration with the given set of archives. Intended to be used by user code. * * @param archives The list of archives that need to be localized * @param conf Configuration which will be changed */ public static void setCacheArchives(URI[] archives, Configuration conf) { String sarchives = StringUtils.uriToString(archives); conf.set(CACHE_ARCHIVES, sarchives); }
/** * Set the conf to contain the location for localized files. Used by internal DistributedCache * code. * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local files */ public static void setLocalFiles(Configuration conf, String str) { conf.set(CACHE_LOCALFILES, str); }