public static void main(final String[] pArgs) throws Exception { TreasuryYieldMulti tym = new TreasuryYieldMulti(); // Here is an example of how to use multiple collections as the input to // a hadoop job, from within Java code directly. MultiCollectionSplitBuilder mcsb = new MultiCollectionSplitBuilder(); mcsb.add( new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"), (MongoURI) null, // authuri true, // notimeout (DBObject) null, // fields (DBObject) null, // sort (DBObject) null, // query false) .add( new MongoURI("mongodb://localhost:27017/mongo_hadoop.yield_historical.in"), (MongoURI) null, // authuri true, // notimeout (DBObject) null, // fields (DBObject) null, // sort new BasicDBObject("_id", new BasicDBObject("$gt", new Date(883440000000L))), false); // range query Configuration conf = new Configuration(); conf.set(MultiMongoCollectionSplitter.MULTI_COLLECTION_CONF_KEY, mcsb.toJSON()); conf.setSplitterClass(conf, MultiMongoCollectionSplitter.class); System.exit(ToolRunner.run(conf, new TreasuryYieldXMLConfig(), pArgs)); }
public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archivefile} should be interpreted. */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); addCacheFile(uri, conf); }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
/** * This method checks to see if symlinks are to be create for the localized cache files in the * current working directory * * @param conf the jobconf * @return true if symlinks are to be created- else return false */ public static boolean getSymlink(Configuration conf) { String result = conf.get("mapred.create.symlink"); if ("yes".equals(result)) { return true; } return false; }
/** * This method checks to see if symlinks are to be create for the localized cache files in the * current working directory Used by internal DistributedCache code. * * @param conf the jobconf * @return true if symlinks are to be created- else return false */ public static boolean getSymlink(Configuration conf) { String result = conf.get(CACHE_SYMLINK); if ("yes".equals(result)) { return true; } return false; }
private static String getConfigString(Configuration config) { String output = ""; Iterator<Map.Entry<String, String>> iterConfig = config.iterator(); while (iterConfig.hasNext()) { Map.Entry<String, String> curEntry = iterConfig.next(); output = output + "Key: \t" + curEntry.getKey() + "\nValue: \t" + curEntry.getValue() + "\n"; } return output; }
/** * Get the archive entries in classpath as an array of Path * * @param conf Configuration that contains the classpath setting */ public static Path[] getArchiveClassPaths(Configuration conf) { String classpath = conf.get("mapred.job.classpath.archives"); if (classpath == null) return null; ArrayList list = Collections.list(new StringTokenizer(classpath, System.getProperty("path.separator"))); Path[] paths = new Path[list.size()]; for (int i = 0; i < list.size(); i++) { paths[i] = new Path((String) list.get(i)); } return paths; }
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path matFile = new Path(args[0]); FSDataInputStream matData = fs.open(matFile); BufferedReader br = new BufferedReader(new InputStreamReader(matData)); int i = 0; String line; while ((line = br.readLine()) != null) { StringTokenizer tokenizer = new StringTokenizer(line); String iRow = tokenizer.nextToken(); String iCol = tokenizer.nextToken(); if (Integer.parseInt(iRow) == Integer.parseInt(iCol)) { i++; } } br.close(); int dimention = i; conf.setInt("DIMENTION", dimention); Path xFile = new Path("preX/Result"); FSDataOutputStream xData = fs.create(xFile); BufferedWriter iniX = new BufferedWriter(new OutputStreamWriter(xData)); for (int j = 0; j < dimention; j++) { iniX.write(String.valueOf(j) + " 0"); iniX.newLine(); } iniX.close(); URI matVec = new URI(args[0]); DistributedCache.addCacheFile(matVec, conf); int iteration = 0; do { ToolRunner.run(conf, new Jacobi(), args); } while (iteration++ < max_iter && (!stopIteration(conf))); }
@Override public int run(String[] args) throws Exception { String locatorHost = args[0]; int locatorPort = Integer.parseInt(args[1]); String hdfsHomeDir = args[2]; System.out.println( "KnownKeysMRv2 invoked with args (locatorHost = " + locatorHost + " locatorPort = " + locatorPort + " hdfsHomeDir = " + hdfsHomeDir); Configuration conf = getConf(); conf.set(GFInputFormat.INPUT_REGION, "partitionedRegion"); conf.set(GFInputFormat.HOME_DIR, hdfsHomeDir); conf.setBoolean(GFInputFormat.CHECKPOINT, false); conf.set(GFOutputFormat.REGION, "validationRegion"); conf.set(GFOutputFormat.LOCATOR_HOST, locatorHost); conf.setInt(GFOutputFormat.LOCATOR_PORT, locatorPort); Job job = Job.getInstance(conf, "knownKeysMRv2"); job.setInputFormatClass(GFInputFormat.class); job.setOutputFormatClass(GFOutputFormat.class); job.setMapperClass(KnownKeysMRv2Mapper.class); job.setMapOutputKeyClass(GFKey.class); job.setMapOutputValueClass(PEIWritable.class); job.setReducerClass(KnownKeysMRv2Reducer.class); // job.setOutputKeyClass(String.class); // job.setOutputValueClass(ValueHolder.class); return job.waitForCompletion(false) ? 0 : 1; }
/** * Set the configuration with the given set of archives * * @param archives The list of archives that need to be localized * @param conf Configuration which will be changed */ public static void setCacheArchives(URI[] archives, Configuration conf) { String sarchives = StringUtils.uriToString(archives); conf.set("mapred.cache.archives", sarchives); }
/** * Add a file to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addCacheFile(URI uri, Configuration conf) { String files = conf.get("mapred.cache.files"); conf.set("mapred.cache.files", files == null ? uri.toString() : files + "," + uri.toString()); }
/** * Set the conf to contain the location for localized files * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local files */ public static void setLocalSharedFiles(Configuration conf, String str) { conf.set("mapred.cache.shared.localFiles", str); }
/** * Add a archives to be localized to the conf * * @param uri The uri of the cache to be localized * @param conf Configuration to add the cache to */ public static void addSharedCacheArchive(URI uri, Configuration conf) { String archives = conf.get("mapred.cache.shared.archives"); conf.set( "mapred.cache.shared.archives", archives == null ? uri.toString() : archives + "," + uri.toString()); }
public void setConf(Configuration conf) { this.scriptName = conf.get(SCRIPT_FILENAME_KEY); this.maxArgs = conf.getInt(SCRIPT_ARG_COUNT_KEY, DEFAULT_ARG_COUNT); this.conf = conf; }
/** * Get cache files set in the Configuration * * @param conf The configuration which contains the files * @return A URI array of the files set in the Configuration * @throws IOException */ public static URI[] getSharedCacheFiles(Configuration conf) throws IOException { return StringUtils.stringToURI(conf.getStrings("mapred.cache.shared.files")); }
/** * This is to check the timestamp of the archives to be localized * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of archives. The order should be the same * as the order in which the archives are added. */ public static void setArchiveTimestamps(Configuration conf, String timestamps) { conf.set("mapred.cache.archives.timestamps", timestamps); }
/** * This is to check the timestamp of the files to be localized * * @param conf Configuration which stores the timestamp's * @param timestamps comma separated list of timestamps of files. The order should be the same as * the order in which the files are added. */ public static void setFileTimestamps(Configuration conf, String timestamps) { conf.set("mapred.cache.files.timestamps", timestamps); }
/** * Set the configuration with the given set of files * * @param files The list of files that need to be localized * @param conf Configuration which will be changed */ public static void setCacheFiles(URI[] files, Configuration conf) { String sfiles = StringUtils.uriToString(files); conf.set("mapred.cache.files", sfiles); }
public static String[] getSharedFileLength(Configuration conf) { return conf.getStrings("mapred.cache.shared.files.length"); }
/** * Get the timestamps of the archives * * @param conf The configuration which stored the timestamps * @return a string array of timestamps * @throws IOException */ public static String[] getArchiveTimestamps(Configuration conf) { return conf.getStrings("mapred.cache.archives.timestamps"); }
/** * Return the path array of the localized files * * @param conf Configuration that contains the localized files * @return A path array of localized files * @throws IOException */ public static Path[] getLocalSharedCacheFiles(Configuration conf) throws IOException { return StringUtils.stringToPath(conf.getStrings("mapred.cache.shared.localFiles")); }
/** * Return the path array of the localized caches * * @param conf Configuration that contains the localized archives * @return A path array of localized caches * @throws IOException */ public static Path[] getLocalCacheArchives(Configuration conf) throws IOException { return StringUtils.stringToPath(conf.getStrings("mapred.cache.localArchives")); }
static { // Load the XML config defined in hadoop-local.xml Configuration.addDefaultResource("src/examples/hadoop-local.xml"); Configuration.addDefaultResource("src/examples/mongo-defaults.xml"); }
public static void setSharedFileLength(Configuration conf, String length) { conf.set("mapred.cache.shared.files.length", length); }
/** * Get the locally cached file or archive; it could either be previously cached (and valid) or * copy it from the {@link FileSystem} now. * * @param cache the cache to be localized, this should be specified as new * URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is * provided the file is assumed to be in the filesystem being used in the Configuration * @param conf The Confguration file which contains the filesystem * @param subDir The sub cache Dir where you want to localize the files/archives * @param fileStatus The file status on the dfs. * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or * .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred * automatically and the directory where the archive is unzipped/unjarred/untarred is returned * as the Path. In case of a file, the path to the file is returned * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be * cached hasn't changed since the job started * @param fileLength this is the length of the cache file * @param currentWorkDir this is the directory where you would want to create symlinks for the * locally cached files/archives * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says * so (this is required for an optimization in task launches * @param lDirAllocator LocalDirAllocator of the tracker * @return the path to directory where the archives are unjarred in case of archives, the path to * the file where the file is copied locally * @throws IOException */ private static Path getLocalCache( URI cache, Configuration conf, Path subDir, FileStatus fileStatus, boolean isArchive, long confFileStamp, long fileLength, Path currentWorkDir, boolean honorSymLinkConf, MRAsyncDiskService asyncDiskService, LocalDirAllocator lDirAllocator) throws IOException { String key = getKey(cache, conf, confFileStamp); CacheStatus lcacheStatus; Path localizedPath; synchronized (cachedArchives) { lcacheStatus = cachedArchives.get(key); if (lcacheStatus == null) { // was never localized Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong())); String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString(); Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf); lcacheStatus = new CacheStatus( new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir); cachedArchives.put(key, lcacheStatus); } lcacheStatus.refcount++; } boolean initSuccessful = false; try { synchronized (lcacheStatus) { if (!lcacheStatus.isInited()) { localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive); lcacheStatus.initComplete(); } else { if (fileStatus != null) { localizedPath = checkCacheStatusValidity( conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive); } else { // if fileStatus is null, then the md5 must be correct // so there is no need to check for cache validity localizedPath = lcacheStatus.localizedLoadPath; } } createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf); } // try deleting stuff if you can long size = 0; int numberSubDir = 0; synchronized (lcacheStatus) { synchronized (baseDirSize) { Long get = baseDirSize.get(lcacheStatus.getBaseDir()); if (get != null) { size = get.longValue(); } else { LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir()); } } synchronized (baseDirNumberSubDir) { Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir()); if (get != null) { numberSubDir = get.intValue(); } else { LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir()); } } } // setting the cache size to a default of 10GB long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE); long allowedNumberSubDir = conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT); if (allowedSize < size || allowedNumberSubDir < numberSubDir) { // try some cache deletions LOG.debug( "Start deleting released cache because" + " [size, allowedSize, numberSubDir, allowedNumberSubDir] =" + " [" + size + ", " + allowedSize + ", " + numberSubDir + ", " + allowedNumberSubDir + "]"); deleteCache(conf, asyncDiskService); } initSuccessful = true; return localizedPath; } finally { if (!initSuccessful) { synchronized (cachedArchives) { lcacheStatus.refcount--; } } } }
/** * Set the conf to contain the location for localized archives * * @param conf The conf to modify to contain the localized caches * @param str a comma separated list of local archives */ public static void setLocalArchives(Configuration conf, String str) { conf.set("mapred.cache.localArchives", str); }
/** * Get the timestamps of the files * * @param conf The configuration which stored the timestamps * @return a string array of timestamps * @throws IOException */ public static String[] getFileTimestamps(Configuration conf) { return conf.getStrings("mapred.cache.files.timestamps"); }
/** * Get cache archives set in the Configuration * * @param conf The configuration which contains the archives * @return A URI array of the caches set in the Configuration * @throws IOException */ public static URI[] getCacheArchives(Configuration conf) throws IOException { return StringUtils.stringToURI(conf.getStrings("mapred.cache.archives")); }