/** * This method checks to see if symlinks are to be create for the localized cache files in the * current working directory * * @param conf the jobconf * @return true if symlinks are to be created- else return false */ public static boolean getSymlink(Configuration conf) { String result = conf.get("mapred.create.symlink"); if ("yes".equals(result)) { return true; } return false; }
/** * This method checks to see if symlinks are to be create for the localized cache files in the * current working directory Used by internal DistributedCache code. * * @param conf the jobconf * @return true if symlinks are to be created- else return false */ public static boolean getSymlink(Configuration conf) { String result = conf.get(CACHE_SYMLINK); if ("yes".equals(result)) { return true; } return false; }
public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
public static InetSocketAddress getAddress(Configuration conf) { String jobTrackerStr = conf.get("mapred.job.tracker", "localhost:8012"); int colon = jobTrackerStr.indexOf(":"); if (colon < 0) { throw new RuntimeException("Bad mapred.job.tracker: " + jobTrackerStr); } String jobTrackerName = jobTrackerStr.substring(0, colon); int jobTrackerPort = Integer.parseInt(jobTrackerStr.substring(colon + 1)); return new InetSocketAddress(jobTrackerName, jobTrackerPort); }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archivefile} should be interpreted. */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); addCacheFile(uri, conf); }
/** * Get the archive entries in classpath as an array of Path * * @param conf Configuration that contains the classpath setting */ public static Path[] getArchiveClassPaths(Configuration conf) { String classpath = conf.get("mapred.job.classpath.archives"); if (classpath == null) return null; ArrayList list = Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator"))); Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { paths[i] = new Path((String) list.get(i)); } return paths; }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
/** * Construct in a named directory. * * @param conf * @param dir * @throws IOException */ public NutchBean(Configuration conf, Path dir) throws IOException { this.conf = conf; this.fs = FileSystem.get(this.conf); if (dir == null) { dir = new Path(this.conf.get("searcher.dir", "crawl")); } final Path luceneConfig = new Path(dir, "search-servers.txt"); final Path solrConfig = new Path(dir, "solr-servers.txt"); if (fs.exists(luceneConfig) || fs.exists(solrConfig)) { searchBean = new DistributedSearchBean(conf, luceneConfig, solrConfig); } else { final Path indexDir = new Path(dir, "index"); final Path indexesDir = new Path(dir, "indexes"); searchBean = new LuceneSearchBean(conf, indexDir, indexesDir); } String tableName = conf.get("table.name", "webtable"); table = new HTable(tableName); summarizerFactory = new SummarizerFactory(conf); }
/** * Add a file to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheFile(URI uri, Configuration conf) { String files = conf.get("mapred.cache.files"); conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString()); }
/** * Add a archives to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addSharedCacheArchive(URI uri, Configuration conf) { String archives = conf.get("mapred.cache.shared.archives"); conf.set( "mapred.cache.shared.archives", archives == null ? uri.toString() : archives + "," + uri.toString()); }
/** Sets up configuration based on params */ private static boolean setup(Hashtable<String, String> curConf, Configuration argConf) { if (argConf.get("file") == null) { logger.fatal("Missing file parameter"); System.exit(1); } if (argConf.get("hdfs_base_path") == null) { logger.fatal("Missing HDFS base path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("hdfs_temp_path") == null) { logger.fatal("Missing HDFS temp path, check gestore-conf.xml"); System.exit(1); } if (argConf.get("local_temp_path") == null) { logger.fatal("Missing local temp path, check gestore-conf.xml"); System.exit(1); } // Input paramaters curConf.put("run_id", argConf.get("run", "")); curConf.put("task_id", argConf.get("task", "")); curConf.put("file_id", argConf.get("file")); curConf.put("local_path", argConf.get("path", "")); curConf.put("type", argConf.get("type", "l2r")); curConf.put("timestamp_start", argConf.get("timestamp_start", "1")); curConf.put( "timestamp_stop", argConf.get("timestamp_stop", Integer.toString(Integer.MAX_VALUE))); curConf.put("delimiter", argConf.get("regex", "ID=.*")); curConf.put("taxon", argConf.get("taxon", "all")); curConf.put("intermediate", argConf.get("full_run", "false")); curConf.put("quick_add", argConf.get("quick_add", "false")); Boolean full_run = curConf.get("intermediate").matches("(?i).*true.*"); curConf.put("format", argConf.get("format", "unknown")); curConf.put("split", argConf.get("split", "1")); curConf.put("copy", argConf.get("copy", "true")); // Constants curConf.put("base_path", argConf.get("hdfs_base_path")); curConf.put("temp_path", argConf.get("hdfs_temp_path")); curConf.put("local_temp_path", argConf.get("local_temp_path")); curConf.put("db_name_files", argConf.get("hbase_file_table")); curConf.put("db_name_runs", argConf.get("hbase_run_table")); curConf.put("db_name_updates", argConf.get("hbase_db_update_table")); // Timestamps Date currentTime = new Date(); Date endDate = new Date(new Long(curConf.get("timestamp_stop"))); curConf.put("timestamp_real", Long.toString(currentTime.getTime())); return true; }
/** * Add a file to be localized to the conf. Intended to be used by user code. * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheFile(URI uri, Configuration conf) { String files = conf.get(CACHE_FILES); conf.set(CACHE_FILES, files == null ? uri.toString() : files + "," + uri.toString()); }
/** * Add a archives to be localized to the conf. Intended to be used by user code. * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheArchive(URI uri, Configuration conf) { String archives = conf.get(CACHE_ARCHIVES); conf.set(CACHE_ARCHIVES, archives == null ? uri.toString() : archives + "," + uri.toString()); }
/** * Add a file that has been localized to the conf.. Used by internal DistributedCache code. * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local files */ public static void addLocalFiles(Configuration conf, String str) { String files = conf.get(CACHE_LOCALFILES); conf.set(CACHE_LOCALFILES, files == null ? str : files + "," + str); }
/** * Add a archive that has been localized to the conf. Used by internal DistributedCache code. * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local archives */ public static void addLocalArchives(Configuration conf, String str) { String archives = conf.get(CACHE_LOCALARCHIVES); conf.set(CACHE_LOCALARCHIVES, archives == null ? str : archives + "," + str); }