/** * Combine the status stored in the index and the underlying status. * * @param h status stored in the index * @param cache caching the underlying file statuses * @return the combined file status * @throws IOException */ private FileStatus toFileStatus(HarStatus h, Map<String, FileStatus> cache) throws IOException { FileStatus underlying = null; if (cache != null) { underlying = cache.get(h.partName); } if (underlying == null) { final Path p = h.isDir ? archivePath : new Path(archivePath, h.partName); underlying = fs.getFileStatus(p); if (cache != null) { cache.put(h.partName, underlying); } } long modTime = 0; int version = metadata.getVersion(); if (version < 3) { modTime = underlying.getModificationTime(); } else if (version == 3) { modTime = h.getModificationTime(); } return new FileStatus( h.isDir() ? 0L : h.getLength(), h.isDir(), underlying.getReplication(), underlying.getBlockSize(), modTime, underlying.getAccessTime(), underlying.getPermission(), underlying.getOwner(), underlying.getGroup(), makeRelative(this.uri.getPath(), new Path(h.name))); }
// get the version of the filesystem from the masterindex file // the version is currently not useful since its the first version // of archives public int getHarVersion() throws IOException { if (metadata != null) { return metadata.getVersion(); } else { throw new IOException("Invalid meta data for the Har Filesystem"); } }
private String decodeFileName(String fname) throws UnsupportedEncodingException { int version = metadata.getVersion(); if (version == 2 || version == 3) { return decodeString(fname); } return fname; }
/** * Initialize a Har filesystem per har archive. The archive home directory is the top level * directory in the filesystem that contains the HAR archive. Be careful with this method, you do * not want to go on creating new Filesystem instances per call to path.getFileSystem(). the uri * of Har is har://underlyingfsscheme-host:port/archivepath. or har:///archivepath. This assumes * the underlying filesystem to be used in case not specified. */ @Override public void initialize(URI name, Configuration conf) throws IOException { // initialize the metadata cache, if needed initializeMetadataCache(conf); // decode the name URI underLyingURI = decodeHarURI(name, conf); // we got the right har Path- now check if this is // truly a har filesystem Path harPath = archivePath(new Path(name.getScheme(), name.getAuthority(), name.getPath())); if (harPath == null) { throw new IOException("Invalid path for the Har Filesystem. " + name.toString()); } if (fs == null) { fs = FileSystem.get(underLyingURI, conf); } uri = harPath.toUri(); archivePath = new Path(uri.getPath()); harAuth = getHarAuth(underLyingURI); // check for the underlying fs containing // the index file Path masterIndexPath = new Path(archivePath, "_masterindex"); Path archiveIndexPath = new Path(archivePath, "_index"); if (!fs.exists(masterIndexPath) || !fs.exists(archiveIndexPath)) { throw new IOException( "Invalid path for the Har Filesystem. " + "No index file in " + harPath); } metadata = harMetaCache.get(uri); if (metadata != null) { FileStatus mStat = fs.getFileStatus(masterIndexPath); FileStatus aStat = fs.getFileStatus(archiveIndexPath); if (mStat.getModificationTime() != metadata.getMasterIndexTimestamp() || aStat.getModificationTime() != metadata.getArchiveIndexTimestamp()) { // the archive has been overwritten since we last read it // remove the entry from the meta data cache metadata = null; harMetaCache.remove(uri); } } if (metadata == null) { metadata = new HarMetaData(fs, masterIndexPath, archiveIndexPath); metadata.parseMetaData(); harMetaCache.put(uri, metadata); } }
/** * Get block locations from the underlying fs and fix their offsets and lengths. * * @param file the input file status to get block locations * @param start the start of the desired range in the contained file * @param len the length of the desired range * @return block locations for this segment of file * @throws IOException */ @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { HarStatus hstatus = getFileHarStatus(file.getPath()); Path partPath = new Path(archivePath, hstatus.getPartName()); FileStatus partStatus = metadata.getPartFileStatus(partPath); // get all part blocks that overlap with the desired file blocks BlockLocation[] locations = fs.getFileBlockLocations(partStatus, hstatus.getStartIndex() + start, len); return fixBlockLocations(locations, start, len, hstatus.getStartIndex()); }