public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archivefile} should be interpreted. */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); addCacheFile(uri, conf); }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
/** * This method checks to see if symlinks are to be create for the localized cache files in the * current working directory * * @param conf the jobconf * @return true if symlinks are to be created- else return false */ public static boolean getSymlink(Configuration conf) { String result = conf.get("mapred.create.symlink"); if ("yes".equals(result)) { return true; } return false; }
/** * This method checks to see if symlinks are to be create for the localized cache files in the * current working directory Used by internal DistributedCache code. * * @param conf the jobconf * @return true if symlinks are to be created- else return false */ public static boolean getSymlink(Configuration conf) { String result = conf.get(CACHE_SYMLINK); if ("yes".equals(result)) { return true; } return false; }
private static String getConfigString(Configuration config) { String output = ""; Iterator<Map.Entry<String, String>> iterConfig = config.iterator(); while (iterConfig.hasNext()) { Map.Entry<String, String> curEntry = iterConfig.next(); output = output + "Key: \t" + curEntry.getKey() + "\nValue: \t" + curEntry.getValue() + "\n"; } return output; }
public static InetSocketAddress getAddress(Configuration conf) { String jobTrackerStr = conf.get("mapred.job.tracker", "localhost:8012"); int colon = jobTrackerStr.indexOf(":"); if (colon < 0) { throw new RuntimeException("Bad mapred.job.tracker: " + jobTrackerStr); } String jobTrackerName = jobTrackerStr.substring(0, colon); int jobTrackerPort = Integer.parseInt(jobTrackerStr.substring(colon + 1)); return new InetSocketAddress(jobTrackerName, jobTrackerPort); }
/** * Get the archive entries in classpath as an array of Path * * @param conf Configuration that contains the classpath setting */ public static Path[] getArchiveClassPaths(Configuration conf) { String classpath = conf.get("mapred.job.classpath.archives"); if (classpath == null) return null; ArrayList list = Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator"))); Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { paths[i] = new Path((String) list.get(i)); } return paths; }
/** An FSDataset has a directory where it loads its data files. */ public FSDataset(DataStorage storage, Configuration conf) throws IOException { this.maxBlocksPerDir = conf.getInt("dfs.datanode.numblocks", 64); FSVolume[] volArray = new FSVolume[storage.getNumStorageDirs()]; for (int idx = 0; idx < storage.getNumStorageDirs(); idx++) { volArray[idx] = new FSVolume(storage.getStorageDir(idx).getCurrentDir(), conf); } volumes = new FSVolumeSet(volArray); volumeMap = new HashMap<Block, DatanodeBlockInfo>(); volumes.getVolumeMap(volumeMap); registerMBean(storage.getStorageID()); }
FSVolume(File currentDir, Configuration conf) throws IOException { this.reserved = conf.getLong("dfs.datanode.du.reserved", 0); boolean supportAppends = conf.getBoolean("dfs.support.append", false); File parent = currentDir.getParentFile(); this.detachDir = new File(parent, "detach"); if (detachDir.exists()) { recoverDetachedBlocks(currentDir, detachDir); } // Files that were being written when the datanode was last shutdown // are now moved back to the data directory. It is possible that // in the future, we might want to do some sort of datanode-local // recovery for these blocks. For example, crc validation. // this.tmpDir = new File(parent, "tmp"); if (tmpDir.exists()) { if (supportAppends) { recoverDetachedBlocks(currentDir, tmpDir); } else { FileUtil.fullyDelete(tmpDir); } } this.dataDir = new FSDir(currentDir); if (!tmpDir.mkdirs()) { if (!tmpDir.isDirectory()) { throw new IOException("Mkdirs failed to create " + tmpDir.toString()); } } if (!detachDir.mkdirs()) { if (!detachDir.isDirectory()) { throw new IOException("Mkdirs failed to create " + detachDir.toString()); } } this.usage = new DF(parent, conf); this.dfsUsage = new DU(parent, conf); this.dfsUsage.start(); }
/** * Construct in a named directory. * * @param conf * @param dir * @throws IOException */ public NutchBean(Configuration conf, Path dir) throws IOException { this.conf = conf; this.fs = FileSystem.get(this.conf); if (dir == null) { dir = new Path(this.conf.get("searcher.dir", "crawl")); } final Path luceneConfig = new Path(dir, "search-servers.txt"); final Path solrConfig = new Path(dir, "solr-servers.txt"); if (fs.exists(luceneConfig) || fs.exists(solrConfig)) { searchBean = new DistributedSearchBean(conf, luceneConfig, solrConfig); } else { final Path indexDir = new Path(dir, "index"); final Path indexesDir = new Path(dir, "indexes"); searchBean = new LuceneSearchBean(conf, indexDir, indexesDir); } String tableName = conf.get("table.name", "webtable"); table = new HTable(tableName); summarizerFactory = new SummarizerFactory(conf); }
/** Start the JobTracker process, listen on the indicated port */ JobTracker(Configuration conf) throws IOException { // // Grab some static constants // maxCurrentTasks = conf.getInt("mapred.tasktracker.tasks.maximum", 2); RETIRE_JOB_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.interval", 24 * 60 * 60 * 1000); RETIRE_JOB_CHECK_INTERVAL = conf.getLong("mapred.jobtracker.retirejob.check", 60 * 1000); TASK_ALLOC_EPSILON = conf.getFloat("mapred.jobtracker.taskalloc.loadbalance.epsilon", 0.2f); PAD_FRACTION = conf.getFloat("mapred.jobtracker.taskalloc.capacitypad", 0.1f); MIN_SLOTS_FOR_PADDING = 3 * maxCurrentTasks; // This is a directory of temporary submission files. We delete it // on startup, and can delete any files that we're done with this.conf = conf; JobConf jobConf = new JobConf(conf); this.systemDir = jobConf.getSystemDir(); this.fs = FileSystem.get(conf); FileUtil.fullyDelete(fs, systemDir); fs.mkdirs(systemDir); // Same with 'localDir' except it's always on the local disk. jobConf.deleteLocalFiles(SUBDIR); // Set ports, start RPC servers, etc. InetSocketAddress addr = getAddress(conf); this.localMachine = addr.getHostName(); this.port = addr.getPort(); this.interTrackerServer = RPC.getServer(this, addr.getPort(), 10, false, conf); this.interTrackerServer.start(); Properties p = System.getProperties(); for (Iterator it = p.keySet().iterator(); it.hasNext(); ) { String key = (String) it.next(); String val = (String) p.getProperty(key); LOG.info("Property '" + key + "' is " + val); } this.infoPort = conf.getInt("mapred.job.tracker.info.port", 50030); this.infoServer = new JobTrackerInfoServer(this, infoPort); this.infoServer.start(); this.startTime = System.currentTimeMillis(); new Thread(this.expireTrackers).start(); new Thread(this.retireJobs).start(); new Thread(this.initJobs).start(); }
/** * Test how IPC cache map works. * * @throws Exception If failed. */ @SuppressWarnings("unchecked") public void testIpcCache() throws Exception { Field cacheField = GridGgfsHadoopIpcIo.class.getDeclaredField("ipcCache"); cacheField.setAccessible(true); Field activeCntField = GridGgfsHadoopIpcIo.class.getDeclaredField("activeCnt"); activeCntField.setAccessible(true); Map<String, GridGgfsHadoopIpcIo> cache = (Map<String, GridGgfsHadoopIpcIo>) cacheField.get(null); String name = "ggfs:" + getTestGridName(0) + "@"; Configuration cfg = new Configuration(); cfg.addResource(U.resolveGridGainUrl(HADOOP_FS_CFG)); cfg.setBoolean("fs.ggfs.impl.disable.cache", true); cfg.setBoolean(String.format(GridGgfsHadoopUtils.PARAM_GGFS_ENDPOINT_NO_EMBED, name), true); // Ensure that existing IO is reused. FileSystem fs1 = FileSystem.get(new URI("ggfs://" + name + "/"), cfg); assertEquals(1, cache.size()); GridGgfsHadoopIpcIo io = null; System.out.println("CACHE: " + cache); for (String key : cache.keySet()) { if (key.contains("10500")) { io = cache.get(key); break; } } assert io != null; assertEquals(1, ((AtomicInteger) activeCntField.get(io)).get()); // Ensure that when IO is used by multiple file systems and one of them is closed, IO is not // stopped. FileSystem fs2 = FileSystem.get(new URI("ggfs://" + name + "/abc"), cfg); assertEquals(1, cache.size()); assertEquals(2, ((AtomicInteger) activeCntField.get(io)).get()); fs2.close(); assertEquals(1, cache.size()); assertEquals(1, ((AtomicInteger) activeCntField.get(io)).get()); Field stopField = GridGgfsHadoopIpcIo.class.getDeclaredField("stopping"); stopField.setAccessible(true); assert !(Boolean) stopField.get(io); // Ensure that IO is stopped when nobody else is need it. fs1.close(); assert cache.isEmpty(); assert (Boolean) stopField.get(io); }
FileDataGenNew(String HDFSMaster) { fsConf.set("fs.default.name", HDFSMaster); }
/** * Add a file to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheFile(URI uri, Configuration conf) { String files = conf.get("mapred.cache.files"); conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString()); }
/** * Add a archives to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addSharedCacheArchive(URI uri, Configuration conf) { String archives = conf.get("mapred.cache.shared.archives"); conf.set( "mapred.cache.shared.archives", archives == null ? uri.toString() : archives + "," + uri.toString()); }
/** * Set the conf to contain the location for localized files * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local files */ public static void setLocalSharedFiles(Configuration conf, String str) { conf.set("mapred.cache.shared.localFiles", str); }
/** * Set the conf to contain the location for localized archives * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local archives */ public static void setLocalArchives(Configuration conf, String str) { conf.set("mapred.cache.localArchives", str); }
/** * Get cache files set in the Configuration * * @param conf The configuration which contains the files * @return A URI array of the files set in the Configuration * @throws IOException */ public static URI[] getSharedCacheFiles(Configuration conf) throws IOException { return StringUtils.stringToURI(conf.getStrings("mapred.cache.shared.files")); }
/** * This is to check the timestamp of the files to be localized * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of files. The order should be the same as * the order in which the files are added. */ public static void setFileTimestamps(Configuration conf, String timestamps) { conf.set("mapred.cache.files.timestamps", timestamps); }
/** * This is to check the timestamp of the archives to be localized * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of archives. The order should be the same * as the order in which the archives are added. */ public static void setArchiveTimestamps(Configuration conf, String timestamps) { conf.set("mapred.cache.archives.timestamps", timestamps); }
public static String[] getSharedFileLength(Configuration conf) { return conf.getStrings("mapred.cache.shared.files.length"); }
/** * Get the timestamps of the files * * @param conf The configuration which stored the timestamps * @return a string array of timestamps * @throws IOException */ public static String[] getFileTimestamps(Configuration conf) { return conf.getStrings("mapred.cache.files.timestamps"); }
/** * Get the timestamps of the archives * * @param conf The configuration which stored the timestamps * @return a string array of timestamps * @throws IOException */ public static String[] getArchiveTimestamps(Configuration conf) { return conf.getStrings("mapred.cache.archives.timestamps"); }
/** * Set the configuration with the given set of files * * @param files The list of files that need to be localized * @param conf Configuration which will be changed */ public static void setCacheFiles(URI[] files, Configuration conf) { String sfiles = StringUtils.uriToString(files); conf.set("mapred.cache.files", sfiles); }
/** * Return the path array of the localized caches * * @param conf Configuration that contains the localized archives * @return A path array of localized caches * @throws IOException */ public static Path[] getLocalCacheArchives(Configuration conf) throws IOException { return StringUtils.stringToPath(conf.getStrings("mapred.cache.localArchives")); }
public static void setSharedFileLength(Configuration conf, String length) { conf.set("mapred.cache.shared.files.length", length); }
/** * Return the path array of the localized files * * @param conf Configuration that contains the localized files * @return A path array of localized files * @throws IOException */ public static Path[] getLocalSharedCacheFiles(Configuration conf) throws IOException { return StringUtils.stringToPath(conf.getStrings("mapred.cache.shared.localFiles")); }
/** * Get cache archives set in the Configuration * * @param conf The configuration which contains the archives * @return A URI array of the caches set in the Configuration * @throws IOException */ public static URI[] getCacheArchives(Configuration conf) throws IOException { return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives")); }