private InputStream OpenMultiplePartsWithOffset(FileSystem fs, Path pt, long offset) throws IOException { RemoteIterator<LocatedFileStatus> rit = fs.listFiles(pt, false); Vector<FSDataInputStream> fileHandleList = new Vector<FSDataInputStream>(); while (rit.hasNext()) { Path path = rit.next().getPath(); String filename = path.toString().substring(path.getParent().toString().length(), path.toString().length()); if (filename.startsWith("/part-")) { long filesize = fs.getFileStatus(path).getLen(); if (offset < filesize) { FSDataInputStream handle = fs.open(path); if (offset > 0) { handle.seek(offset); } fileHandleList.add(handle); } offset -= filesize; } } if (fileHandleList.size() == 1) return fileHandleList.get(0); else if (fileHandleList.size() > 1) { Enumeration<FSDataInputStream> enu = fileHandleList.elements(); return new SequenceInputStream(enu); } else { System.err.println("Error, no source file loaded. run genSeedDataset.sh fisrt!"); return null; } }
public int run(String[] args) throws Exception { Configuration conf = getConf(); conf.setLong("mapreduce.task.timeout", 10000 * 60 * 60); Path train_file = new Path(args[0]); Path test_file = new Path(args[1]); conf.set("train_file", train_file.getParent().getName()); Job job = new Job(conf, "MapTestID"); job.setJarByClass(MapTestID.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MapTestID.MapTestIDMap.class); job.setReducerClass(MapTestID.MapTestIDReduce.class); job.setNumReduceTasks(300); FileInputFormat.addInputPath(job, train_file); FileInputFormat.addInputPath(job, test_file); FileOutputFormat.setOutputPath(job, new Path(args[2])); return job.waitForCompletion(true) ? 0 : 1; }
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); m_Sb.setLength(0); m_Start = split.getStart(); m_End = m_Start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the m_Start of the split FileSystem fs = file.getFileSystem(job); // getFileStatus fileStatus = fs.getFileStatus(split.getPath()); //noinspection deprecation @SuppressWarnings(value = "deprecated") long length = fs.getLength(file); FSDataInputStream fileIn = fs.open(split.getPath()); if (m_Start > 0) fileIn.seek(m_Start); if (codec != null) { CompressionInputStream inputStream = codec.createInputStream(fileIn); m_Input = new BufferedReader(new InputStreamReader(inputStream)); m_End = length; } else { m_Input = new BufferedReader(new InputStreamReader(fileIn)); } m_Current = m_Start; m_Key = split.getPath().getName(); }
public static String absolutePath(Path p) { if (p == null) return ""; StringBuilder sb = new StringBuilder(); Path parentPath = p.getParent(); if (parentPath == null) return "/"; sb.append(absolutePath(parentPath)); if (sb.length() > 1) sb.append("/"); sb.append(p.getName()); return sb.toString(); }
/** * Generate the list of files and make them into FileSplits. This needs to be copied to insert a * filter on acceptable data */ @Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); long desiredMappers = job.getConfiguration().getLong("org.systemsbiology.jxtandem.DesiredXMLInputMappers", 0); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); List<FileStatus> fileStatuses = listStatus(job); boolean forceNumberMappers = fileStatuses.size() == 1; for (FileStatus file : fileStatuses) { Path path = file.getPath(); if (!isPathAcceptable(path)) // filter acceptable data continue; FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); // use desired mappers to force more splits if (forceNumberMappers && desiredMappers > 0) maxSize = Math.min(maxSize, (length / desiredMappers)); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (withinSlop(splitSize, bytesRemaining)) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add( new FileSplit( path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add( new FileSplit( path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } System.out.println("Total # of splits: " + splits.size()); // LOG.debug("Total # of splits: " + splits.size()); return splits; }
/** * Add an file path to the current set of classpath entries It adds the file to cache as well. * * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting */ public static void addFileToClassPath(Path file, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.files"); conf.set( "mapred.job.classpath.files", classpath == null ? file.toString() : classpath + System.getProperty("path.separator") + file.toString()); URI uri = file.makeQualified(file.getFileSystem(conf)).toUri(); addCacheFile(uri, conf); }
private static URI addArchiveToClassPathHelper(Path archive, Configuration conf) throws IOException { String classpath = conf.get("mapred.job.classpath.archives"); // the scheme/authority use ':' as separator. put the unqualified path in classpath String archivePath = archive.toUri().getPath(); conf.set( "mapred.job.classpath.archives", classpath == null ? archivePath : classpath + System.getProperty("path.separator") + archivePath); return archive.makeQualified(archive.getFileSystem(conf)).toUri(); }
@Override protected boolean isSplitable(JobContext context, Path file) { String fname = file.getName().toLowerCase(); //noinspection SimplifiableIfStatementf,RedundantIfStatement if (fname.endsWith(".gz")) return false; return true; }
private void setupProcedureStore() throws IOException { Path testDir = UTIL.getDataTestDir(); FileSystem fs = testDir.getFileSystem(conf); Path logDir = new Path(testDir, "proc-logs"); System.out.println("Logs directory : " + logDir.toString()); fs.delete(logDir, true); if ("nosync".equals(syncType)) { store = new NoSyncWalProcedureStore(conf, fs, logDir); } else { store = ProcedureTestingUtility.createWalStore(conf, fs, logDir); } store.start(numThreads); store.recoverLease(); store.load(new ProcedureTestingUtility.LoadCounter()); System.out.println( "Starting new log : " + store.getActiveLogs().get(store.getActiveLogs().size() - 1)); }
@Override public void run() { for (CacheStatus lcacheStatus : toBeDeletedCache) { synchronized (lcacheStatus) { Path fullUniqueParentDir = new Path(lcacheStatus.localizedBaseDir, lcacheStatus.uniqueParentDir); try { LOG.info("Deleting local cached path: " + fullUniqueParentDir.toString()); deleteLocalPath(asyncDiskService, fs, fullUniqueParentDir); // decrement the size of the cache from baseDirSize deleteCacheInfoUpdate(lcacheStatus); LOG.info("Removed cache " + lcacheStatus.localizedLoadPath); } catch (IOException e) { LOG.warn("Error when deleting " + fullUniqueParentDir, e); } } } }
protected boolean isPathAcceptable(final Path pPath1) { String path = pPath1.toString().toLowerCase(); if (path.startsWith("part-r-")) return true; String extension = getExtension(); if (extension != null && path.endsWith(extension.toLowerCase())) return true; if (extension != null && path.endsWith(extension.toLowerCase() + ".gz")) return true; //noinspection SimplifiableIfStatement,RedundantIfStatement if (extension == null) return true; return false; }
// the method which actually copies the caches locally and unjars/unzips them // and does chmod for the files private static Path localizeCache( Configuration conf, URI cache, long confFileStamp, CacheStatus cacheStatus, boolean isArchive) throws IOException { FileSystem fs = getFileSystem(cache, conf); FileSystem localFs = FileSystem.getLocal(conf); Path parchive = null; if (isArchive) { parchive = new Path( cacheStatus.localizedLoadPath, new Path(cacheStatus.localizedLoadPath.getName())); } else { parchive = cacheStatus.localizedLoadPath; } if (!localFs.mkdirs(parchive.getParent())) { throw new IOException( "Mkdirs failed to create directory " + cacheStatus.localizedLoadPath.toString()); } String cacheId = cache.getPath(); fs.copyToLocalFile(new Path(cacheId), parchive); if (isArchive) { String tmpArchive = parchive.toString().toLowerCase(); File srcFile = new File(parchive.toString()); File destDir = new File(parchive.getParent().toString()); if (tmpArchive.endsWith(".jar")) { RunJar.unJar(srcFile, destDir); } else if (tmpArchive.endsWith(".zip")) { FileUtil.unZip(srcFile, destDir); } else if (isTarFile(tmpArchive)) { FileUtil.unTar(srcFile, destDir); } // else will not do anyhting // and copy the file into the dir as it is } long cacheSize = FileUtil.getDU(new File(parchive.getParent().toString())); cacheStatus.size = cacheSize; addCacheInfoUpdate(cacheStatus); // do chmod here try { // Setting recursive permission to grant everyone read and execute Path localDir = new Path(cacheStatus.localizedBaseDir, cacheStatus.uniqueParentDir); LOG.info("Doing chmod on localdir :" + localDir); FileUtil.chmod(localDir.toString(), "ugo+rx", true); } catch (InterruptedException e) { LOG.warn("Exception in chmod" + e.toString()); } // update cacheStatus to reflect the newly cached file cacheStatus.mtime = getTimestamp(conf, cache); return cacheStatus.localizedLoadPath; }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which {@code archive} should be interpreted. */ public static void addArchiveToClassPath(Path archive, Configuration conf, FileSystem fs) throws IOException { String archivepath = archive.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.archives"); conf.set( "mapred.job.classpath.archives", classpath == null ? archivepath : classpath + System.getProperty("path.separator") + archivepath); URI uri = fs.makeQualified(archive).toUri(); addCacheArchive(uri, conf); }
public static void recursePath(Configuration conf, Path path, Job job) { try { FileSystem fs = path.getFileSystem(conf); FileStatus[] fstats = fs.listStatus(path); if (fstats != null) { for (FileStatus f : fstats) { Path p = f.getPath(); ; if (fs.isFile(p)) { // connection times out otherwise System.err.println("file:" + p.toString()); FileInputFormat.addInputPath(job, p); } else { System.err.println("dir:" + p.toString()); recursePath(conf, p, job); } } } } catch (IOException e) { // shouldn't be here throw new RuntimeException(e); } }
private static boolean writeChecksum(Path localPath, String checksum) { try { java.io.BufferedWriter writer = Files.newBufferedWriter( java.nio.file.Paths.get(localPath.toString() + ".crc"), Charset.forName("UTF-8")); writer.write(checksum); writer.flush(); writer.close(); return true; } catch (Exception e) { logger.warn("Unable to write CRC file!"); return false; } }
/** * open a file for writing * * @param hdfsPath !null path - * @return !null stream */ @Override public OutputStream openFileForWrite(final Path src) { if (isRunningAsUser()) { return super.openFileForWrite(src); } if (true) throw new UnsupportedOperationException("Fix This"); // ToDo final FileSystem fs = getDFS(); try { Path parent = src.getParent(); guaranteeDirectory(parent); return FileSystem.create(fs, src, FULL_FILE_ACCESS); } catch (IOException e) { throw new RuntimeException(e); } }
private static boolean checksumLocalTest(Path localPath, String checksum) { String crcPath = localPath.toString() + ".crc"; if (Files.isReadable(java.nio.file.Paths.get(crcPath))) { try { List<String> lines = Files.readAllLines(java.nio.file.Paths.get(crcPath), Charset.forName("UTF-8")); for (String line : lines) { if (line.equals(checksum)) { return true; } } } catch (Exception e) { return false; } } return false; }
private static void createSymlink( Configuration conf, URI cache, CacheStatus cacheStatus, boolean isArchive, Path currentWorkDir, boolean honorSymLinkConf) throws IOException { boolean doSymlink = honorSymLinkConf && DistributedCache.getSymlink(conf); if (cache.getFragment() == null) { doSymlink = false; } String link = currentWorkDir.toString() + Path.SEPARATOR + cache.getFragment(); File flink = new File(link); if (doSymlink) { if (!flink.exists()) { FileUtil.symLink(cacheStatus.localizedLoadPath.toString(), link); } } }
/** * Delete a local path with asyncDiskService if available, or otherwise synchronously with local * file system. */ private static void deleteLocalPath( MRAsyncDiskService asyncDiskService, LocalFileSystem fs, Path path) throws IOException { boolean deleted = false; if (asyncDiskService != null) { // Try to delete using asyncDiskService String localPathToDelete = path.toUri().getPath(); deleted = asyncDiskService.moveAndDeleteAbsolutePath(localPathToDelete); if (!deleted) { LOG.warn( "Cannot find DistributedCache path " + localPathToDelete + " on any of the asyncDiskService volumes!"); } } if (!deleted) { // If no asyncDiskService, we will delete the files synchronously fs.delete(path, true); } LOG.info("Deleted path " + path); }
/** * open a file for reading * * @param hdfsPath !null path - probably of an existing file * @return !null stream */ @Override public InputStream openFileForRead(Path src) { if (isRunningAsUser()) { return super.openFileForRead(src); } String hdfsPath = src.toString(); if (isFileNameLocal(hdfsPath)) { try { return new FileInputStream(hdfsPath); // better be local } catch (FileNotFoundException e) { throw new RuntimeException(e); } } if (true) throw new UnsupportedOperationException("Fix This"); // ToDo final FileSystem fs = getDFS(); try { return fs.open(src); } catch (IOException e) { throw new RuntimeException(e); } }
/** * Get the locally cached file or archive; it could either be previously cached (and valid) or * copy it from the {@link FileSystem} now. * * @param cache the cache to be localized, this should be specified as new * URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is * provided the file is assumed to be in the filesystem being used in the Configuration * @param conf The Confguration file which contains the filesystem * @param subDir The sub cache Dir where you want to localize the files/archives * @param fileStatus The file status on the dfs. * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or * .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred * automatically and the directory where the archive is unzipped/unjarred/untarred is returned * as the Path. In case of a file, the path to the file is returned * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be * cached hasn't changed since the job started * @param fileLength this is the length of the cache file * @param currentWorkDir this is the directory where you would want to create symlinks for the * locally cached files/archives * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says * so (this is required for an optimization in task launches * @param lDirAllocator LocalDirAllocator of the tracker * @return the path to directory where the archives are unjarred in case of archives, the path to * the file where the file is copied locally * @throws IOException */ private static Path getLocalCache( URI cache, Configuration conf, Path subDir, FileStatus fileStatus, boolean isArchive, long confFileStamp, long fileLength, Path currentWorkDir, boolean honorSymLinkConf, MRAsyncDiskService asyncDiskService, LocalDirAllocator lDirAllocator) throws IOException { String key = getKey(cache, conf, confFileStamp); CacheStatus lcacheStatus; Path localizedPath; synchronized (cachedArchives) { lcacheStatus = cachedArchives.get(key); if (lcacheStatus == null) { // was never localized Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong())); String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString(); Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf); lcacheStatus = new CacheStatus( new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir); cachedArchives.put(key, lcacheStatus); } lcacheStatus.refcount++; } boolean initSuccessful = false; try { synchronized (lcacheStatus) { if (!lcacheStatus.isInited()) { localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive); lcacheStatus.initComplete(); } else { if (fileStatus != null) { localizedPath = checkCacheStatusValidity( conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive); } else { // if fileStatus is null, then the md5 must be correct // so there is no need to check for cache validity localizedPath = lcacheStatus.localizedLoadPath; } } createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf); } // try deleting stuff if you can long size = 0; int numberSubDir = 0; synchronized (lcacheStatus) { synchronized (baseDirSize) { Long get = baseDirSize.get(lcacheStatus.getBaseDir()); if (get != null) { size = get.longValue(); } else { LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir()); } } synchronized (baseDirNumberSubDir) { Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir()); if (get != null) { numberSubDir = get.intValue(); } else { LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir()); } } } // setting the cache size to a default of 10GB long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE); long allowedNumberSubDir = conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT); if (allowedSize < size || allowedNumberSubDir < numberSubDir) { // try some cache deletions LOG.debug( "Start deleting released cache because" + " [size, allowedSize, numberSubDir, allowedNumberSubDir] =" + " [" + size + ", " + allowedSize + ", " + numberSubDir + ", " + allowedNumberSubDir + "]"); deleteCache(conf, asyncDiskService); } initSuccessful = true; return localizedPath; } finally { if (!initSuccessful) { synchronized (cachedArchives) { lcacheStatus.refcount--; } } } }
/** * Add a file path to the current set of classpath entries. It adds the file to cache as well. * Intended to be used by user code. * * @deprecated Please use {@link #addFileToClassPath(Path, Configuration, FileSystem)} instead. * The {@code FileSystem} should be obtained within an appropriate {@code doAs}. * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting */ @Deprecated public static void addFileToClassPath(Path file, Configuration conf) throws IOException { addFileToClassPath(file, conf, file.getFileSystem(conf)); }
public int run(String[] args) throws Exception { // printUsage(); /* * SETUP */ Configuration argConf = getConf(); Hashtable<String, String> confArg = new Hashtable<String, String>(); setup(confArg, argConf); Date currentTime = new Date(); Date endDate = new Date(new Long(confArg.get("timestamp_stop"))); Boolean full_run = confArg.get("intermediate").matches("(?i).*true.*"); Boolean quick_add = confArg.get("quick_add").matches("(?i).*true.*"); logger.info("Running GeStore"); // ZooKeeper setup Configuration config = HBaseConfiguration.create(); zkWatcher = new ZooKeeperWatcher(config, "Testing", new HBaseAdmin(config)); zkInstance = new ZooKeeper( ZKConfig.getZKQuorumServersString(config), config.getInt("zookeeper.session.timeout", -1), zkWatcher); if (!confArg.get("task_id").isEmpty()) { confArg.put("temp_path", confArg.get("temp_path") + confArg.get("task_id")); } String lockRequest = confArg.get("file_id"); if (!confArg.get("run_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("run_id") + "_"; if (!confArg.get("task_id").isEmpty()) lockRequest = lockRequest + "_" + confArg.get("task_id") + "_"; // Get type of movement toFrom type_move = checkArgs(confArg); if (type_move == toFrom.LOCAL2REMOTE && !confArg.get("format").equals("unknown")) { List<String> arguments = new ArrayList<String>(); arguments.add("-Dinput=" + confArg.get("local_path")); arguments.add("-Dtable=" + confArg.get("file_id")); arguments.add("-Dtimestamp=" + confArg.get("timestamp_stop")); arguments.add("-Dtype=" + confArg.get("format")); arguments.add("-Dtarget_dir=" + confArg.get("base_path") + "_" + confArg.get("file_id")); arguments.add("-Dtemp_hdfs_path=" + confArg.get("temp_path")); arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("run_id").isEmpty()) arguments.add("-Drun_id=" + confArg.get("run_id")); if (!confArg.get("task_id").isEmpty()) arguments.add("-Dtask_id=" + confArg.get("task_id")); if (quick_add) arguments.add("-Dquick_add=" + confArg.get("quick_add")); String lockName = lock(lockRequest); String[] argumentString = arguments.toArray(new String[arguments.size()]); adddb.main(argumentString); unlock(lockName); System.exit(0); } // Database registration dbutil db_util = new dbutil(config); db_util.register_database(confArg.get("db_name_files"), true); db_util.register_database(confArg.get("db_name_runs"), true); db_util.register_database(confArg.get("db_name_updates"), true); FileSystem hdfs = FileSystem.get(config); FileSystem localFS = FileSystem.getLocal(config); // Get source type confArg.put("source", getSource(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); confArg.put( "database", isDatabase(db_util, confArg.get("db_name_files"), confArg.get("file_id"))); if (!confArg.get("source").equals("local") && type_move == toFrom.REMOTE2LOCAL && !confArg.get("timestamp_stop").equals(Integer.toString(Integer.MAX_VALUE))) { confArg.put("timestamp_stop", Long.toString(latestVersion(confArg, db_util))); } /* * Get previous timestamp */ Get run_id_get = new Get(confArg.get("run_id").getBytes()); Result run_get = db_util.doGet(confArg.get("db_name_runs"), run_id_get); KeyValue run_file_prev = run_get.getColumnLatest( "d".getBytes(), (confArg.get("file_id") + "_db_timestamp").getBytes()); String last_timestamp = new String("0"); if (null != run_file_prev && !confArg.get("source").equals("local")) { long last_timestamp_real = run_file_prev.getTimestamp(); Long current_timestamp = new Long(confArg.get("timestamp_real")); if ((current_timestamp - last_timestamp_real) > 36000) { last_timestamp = new String(run_file_prev.getValue()); Integer lastTimestamp = new Integer(last_timestamp); lastTimestamp += 1; last_timestamp = lastTimestamp.toString(); logger.info("Last timestamp: " + last_timestamp + " End data: " + endDate); Date last_run = new Date(run_file_prev.getTimestamp()); if (last_run.before(endDate) && !full_run) { confArg.put("timestamp_start", last_timestamp); } } } Integer tse = new Integer(confArg.get("timestamp_stop")); Integer tss = new Integer(confArg.get("timestamp_start")); if (tss > tse) { logger.info("No new version of requested file."); return 0; } /* * Generate file */ String lockName = lock(lockRequest); Get file_id_get = new Get(confArg.get("file_id").getBytes()); Result file_get = db_util.doGet(confArg.get("db_name_files"), file_id_get); if (!file_get.isEmpty()) { boolean found = hasFile( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); if (confArg.get("source").equals("fullfile")) { found = false; } String filenames_put = getFileNames( db_util, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg)); // Filename not found in file database if (!found && type_move == toFrom.REMOTE2LOCAL) { if (!confArg.get("source").equals("local")) { // Generate intermediate file if (getFile(hdfs, confArg, db_util) == null) { unlock(lockName); return 1; } // Put generated file into file database if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), confArg.get("full_file_name"), confArg.get("source")); } } else { logger.warn("Remote file not found, and cannot be generated! File: " + confArg); unlock(lockName); return 1; } } } else { if (type_move == toFrom.REMOTE2LOCAL) { logger.warn("Remote file not found, and cannot be generated."); unlock(lockName); return 1; } } /* * Copy file * Update tables */ if (type_move == toFrom.LOCAL2REMOTE) { if (!confArg.get("format").equals("fullfile")) { putFileEntry( db_util, hdfs, confArg.get("db_name_files"), confArg.get("file_id"), getFullPath(confArg), confArg.get("source")); } putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); hdfs.copyFromLocalFile(new Path(confArg.get("local_path")), new Path(getFullPath(confArg))); } else if (type_move == toFrom.REMOTE2LOCAL) { FileStatus[] files = hdfs.globStatus(new Path(getFullPath(confArg) + "*")); putRunEntry( db_util, confArg.get("db_name_runs"), confArg.get("run_id"), confArg.get("file_id"), confArg.get("type"), confArg.get("timestamp_real"), confArg.get("timestamp_stop"), getFullPath(confArg), confArg.get("delimiter")); unlock(lockName); for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(new String(confArg.get("local_path") + confArg.get("file_id"))); String suffix = getSuffix(getFileName(confArg), cur_file.getName()); if (suffix.length() > 0) { cur_local_path = cur_local_path.suffix(new String("." + suffix)); } if (confArg.get("copy").equals("true")) { String crc = hdfs.getFileChecksum(cur_file).toString(); if (checksumLocalTest(cur_local_path, crc)) { continue; } else { hdfs.copyToLocalFile(cur_file, cur_local_path); writeChecksum(cur_local_path, crc); } } else { System.out.println(cur_local_path + "\t" + cur_file); } } } unlock(lockName); return 0; }
// Information needed to get a single file: // BASE_PATH, FILE_ID, TIMESTAMP_START, TIMESTAMP_STOP, SOURCE, FILESYSTEM private static Vector<Path> getFile( FileSystem fs, Hashtable<String, String> config, dbutil db_util) throws Exception { Long latestVersion = latestVersion(config, db_util); try { config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); } catch (Exception E) { logger.error("Tryign to get file that is impossible to generate: " + getFullPath(config)); return null; } if (Integer.parseInt(config.get("timestamp_start")) > Integer.parseInt(config.get("timestamp_stop"))) { return null; } logger.debug( "Getting DB for timestamp " + config.get("timestamp_start") + " to " + config.get("timestamp_stop")); String final_result = getFullPath(config); String temp_path_base = config.get("local_temp_path") + "_" + config.get("task_id") + "_" + config.get("run_id") + "/"; Path newPath = new Path(final_result + "*"); Vector<Path> ret_path = new Vector<Path>(); String lockName = lock(final_result.replaceAll("/", "_")); if (fs.globStatus(newPath).length != 0) { ret_path.add(newPath); unlock(lockName); config.put("full_file_name", final_result); return ret_path; } else { if (!config.get("source").equals("local")) { config.put("temp_path_base", temp_path_base); config.put("timestamp_start", config.get("timestamp_start")); config.put("timestamp_real", latestVersion.toString()); config.put("timestamp_stop", latestVersion.toString()); Class<?> sourceClass = Class.forName("org.gestore.plugin.source." + config.get("source") + "Source"); Method process_data = sourceClass.getMethod("process", Hashtable.class, FileSystem.class); Object processor = sourceClass.newInstance(); Object retVal; try { retVal = process_data.invoke(processor, config, fs); } catch (InvocationTargetException E) { Throwable exception = E.getTargetException(); logger.error("Unable to call method in child class: " + exception.toString()); exception.printStackTrace(System.out); unlock(lockName); return null; } FileStatus[] files = (FileStatus[]) retVal; if (files == null) { logger.error("Error getting files, no files returned"); return null; } for (FileStatus file : files) { Path cur_file = file.getPath(); Path cur_local_path = new Path(temp_path_base + config.get("file_id")); String suffix = getSuffix(config.get("file_id"), cur_file.getName()); cur_local_path = cur_local_path.suffix(suffix); Path res_path = new Path(new String(final_result + suffix)); logger.debug("Moving file" + cur_file.toString() + " to " + res_path.toString()); if (config.get("copy").equals("true")) { fs.moveFromLocalFile(cur_file, res_path); } else { fs.rename(cur_file, res_path); } } config.put("full_file_name", final_result); } } unlock(lockName); return ret_path; }
/** * Add an archive path to the current set of classpath entries. It adds the archive to cache as * well. Intended to be used by user code. * * @deprecated Please use {@link #addArchiveToClassPath(Path, Configuration, FileSystem)} instead. * The {@code FileSystem} should be obtained within an appropriate {@code doAs}. * @param archive Path of the archive to be added * @param conf Configuration that contains the classpath setting */ @Deprecated public static void addArchiveToClassPath(Path archive, Configuration conf) throws IOException { addArchiveToClassPath(archive, conf, archive.getFileSystem(conf)); }