public static LinkedHashSet<String> findJars(LogicalPlan dag, Class<?>[] defaultClasses) { List<Class<?>> jarClasses = new ArrayList<Class<?>>(); for (String className : dag.getClassNames()) { try { Class<?> clazz = Thread.currentThread().getContextClassLoader().loadClass(className); jarClasses.add(clazz); } catch (ClassNotFoundException e) { throw new IllegalArgumentException("Failed to load class " + className, e); } } for (Class<?> clazz : Lists.newArrayList(jarClasses)) { // process class and super classes (super does not require deploy annotation) for (Class<?> c = clazz; c != Object.class && c != null; c = c.getSuperclass()) { // LOG.debug("checking " + c); jarClasses.add(c); jarClasses.addAll(Arrays.asList(c.getInterfaces())); } } jarClasses.addAll(Arrays.asList(defaultClasses)); if (dag.isDebug()) { LOG.debug("Deploy dependencies: {}", jarClasses); } LinkedHashSet<String> localJarFiles = new LinkedHashSet<String>(); // avoid duplicates HashMap<String, String> sourceToJar = new HashMap<String, String>(); for (Class<?> jarClass : jarClasses) { if (jarClass.getProtectionDomain().getCodeSource() == null) { // system class continue; } String sourceLocation = jarClass.getProtectionDomain().getCodeSource().getLocation().toString(); String jar = sourceToJar.get(sourceLocation); if (jar == null) { // don't create jar file from folders multiple times jar = JarFinder.getJar(jarClass); sourceToJar.put(sourceLocation, jar); LOG.debug("added sourceLocation {} as {}", sourceLocation, jar); } if (jar == null) { throw new AssertionError("Cannot resolve jar file for " + jarClass); } localJarFiles.add(jar); } String libJarsPath = dag.getValue(LogicalPlan.LIBRARY_JARS); if (!StringUtils.isEmpty(libJarsPath)) { String[] libJars = StringUtils.splitByWholeSeparator(libJarsPath, LIB_JARS_SEP); localJarFiles.addAll(Arrays.asList(libJars)); } LOG.info("Local jar file dependencies: " + localJarFiles); return localJarFiles; }
public int run(String[] args) throws Exception { Configuration argConf = getConf(); // JobConf conf = new JobConf(diffdb.class); Configuration config = HBaseConfiguration.create(); HBaseAdmin hbAdmin = new HBaseAdmin(config); dbutil db_util = new dbutil(config); HTable runTable = new HTable(config, "gestore_runs"); Get runGet = new Get(argConf.get("id").getBytes()); Result pipeline = runTable.get(runGet); NavigableMap<byte[], byte[]> pipeMap = pipeline.getFamilyMap("d".getBytes()); Map.Entry<byte[], byte[]> results = pipeMap.pollFirstEntry(); HashMap<String, HashMap<String, String>> resultMap = new HashMap<String, HashMap<String, String>>(); while (results != null) { String resultKey = new String(results.getKey()); String resultValue = new String(results.getValue()); String field = "type"; HashMap<String, String> tempMap = new HashMap<String, String>(); String entry = resultKey; if (resultKey.endsWith("_db_timestamp")) { field = "db_timestamp"; entry = resultKey.substring(0, resultKey.lastIndexOf("_db_timestamp")); } else if (resultKey.endsWith("_filename")) { field = "filename"; entry = resultKey.substring(0, resultKey.lastIndexOf("_filename")); } else if (resultKey.endsWith("_regex")) { field = "regex"; entry = resultKey.substring(0, resultKey.lastIndexOf("_regex")); } if (resultMap.containsKey(entry)) { tempMap = resultMap.get(entry); } tempMap.put(field, resultValue); resultMap.put(entry, tempMap); // System.out.println("Key: " + resultKey + " Value: " + resultValue); results = pipeMap.pollFirstEntry(); } for (String key : resultMap.keySet()) { System.out.println("File ID: " + key); for (String subKey : resultMap.get(key).keySet()) { // System.out.println("\t " + subKey + "\t\t" + resultMap.get(key).get(subKey)); System.out.format(" %1$-20s %2$s\n", subKey, resultMap.get(key).get(subKey)); } } return 0; }
/** Remove the temporary block file (if any) */ public synchronized void unfinalizeBlock(Block b) throws IOException { // remove the block from in-memory data structure ActiveFile activefile = ongoingCreates.remove(b); if (activefile == null) { return; } volumeMap.remove(b); // delete the on-disk temp file if (delBlockFromDisk(activefile.file, getMetaFile(activefile.file, b), b)) { DataNode.LOG.warn("Block " + b + " unfinalized and removed. "); } }
/** Turn the block identifier into a filename. */ public synchronized File getFile(Block b) { DatanodeBlockInfo info = volumeMap.get(b); if (info != null) { return info.getFile(); } return null; }
/** * Make a copy of the block if this block is linked to an existing snapshot. This ensures that * modifying this block does not modify data in any existing snapshots. * * @param block Block * @param numLinks Detach if the number of links exceed this value * @throws IOException * @return - true if the specified block was detached */ public boolean detachBlock(Block block, int numLinks) throws IOException { DatanodeBlockInfo info = null; synchronized (this) { info = volumeMap.get(block); } return info.detachBlock(block, numLinks); }
synchronized File createTmpFile(FSVolume vol, Block blk) throws IOException { if (vol == null) { vol = volumeMap.get(blk).getVolume(); if (vol == null) { throw new IOException("Could not find volume for block " + blk); } } return vol.createTmpFile(blk); }
/** Complete the block write! */ public synchronized void finalizeBlock(Block b) throws IOException { ActiveFile activeFile = ongoingCreates.get(b); if (activeFile == null) { throw new IOException("Block " + b + " is already finalized."); } File f = activeFile.file; if (f == null || !f.exists()) { throw new IOException("No temporary file " + f + " for block " + b); } FSVolume v = volumeMap.get(b).getVolume(); if (v == null) { throw new IOException("No volume for temporary file " + f + " for block " + b); } File dest = null; dest = v.addBlock(b, f); volumeMap.put(b, new DatanodeBlockInfo(v, dest)); ongoingCreates.remove(b); }
public int compare(Object path1, Object path2) { long date1 = hmTimestamps.get((Path) path1).longValue(); long date2 = hmTimestamps.get((Path) path2).longValue(); if (date1 > date2) { return 1; } else if (date1 < date2) { return -1; } else { return 0; } }
/** {@inheritDoc} */ public void validateBlockMetadata(Block b) throws IOException { DatanodeBlockInfo info = volumeMap.get(b); if (info == null) { throw new IOException("Block " + b + " does not exist in volumeMap."); } FSVolume v = info.getVolume(); File tmp = v.getTmpFile(b); File f = getFile(b); if (f == null) { f = tmp; } if (f == null) { throw new IOException("Block " + b + " does not exist on disk."); } if (!f.exists()) { throw new IOException("Block " + b + " block file " + f + " does not exist on disk."); } if (b.getNumBytes() != f.length()) { throw new IOException( "Block " + b + " length is " + b.getNumBytes() + " does not match block file length " + f.length()); } File meta = getMetaFile(f, b); if (meta == null) { throw new IOException("Block " + b + " metafile does not exist."); } if (!meta.exists()) { throw new IOException("Block " + b + " metafile " + meta + " does not exist on disk."); } if (meta.length() == 0) { throw new IOException("Block " + b + " metafile " + meta + " is empty."); } long stamp = parseGenerationStamp(f, meta); if (stamp != b.getGenerationStamp()) { throw new IOException( "Block " + b + " genstamp is " + b.getGenerationStamp() + " does not match meta file stamp " + stamp); } }
void getVolumeMap(HashMap<Block, DatanodeBlockInfo> volumeMap, FSVolume volume) { if (children != null) { for (int i = 0; i < children.length; i++) { children[i].getVolumeMap(volumeMap, volume); } } File blockFiles[] = dir.listFiles(); for (int i = 0; i < blockFiles.length; i++) { if (Block.isBlockFilename(blockFiles[i])) { long genStamp = getGenerationStampFromFile(blockFiles, blockFiles[i]); volumeMap.put( new Block(blockFiles[i], blockFiles[i].length(), genStamp), new DatanodeBlockInfo(volume, blockFiles[i])); } } }
/** Returns handles to the block file and its metadata file */ public synchronized BlockInputStreams getTmpInputStreams(Block b, long blkOffset, long ckoff) throws IOException { DatanodeBlockInfo info = volumeMap.get(b); if (info == null) { throw new IOException("Block " + b + " does not exist in volumeMap."); } FSVolume v = info.getVolume(); File blockFile = v.getTmpFile(b); RandomAccessFile blockInFile = new RandomAccessFile(blockFile, "r"); if (blkOffset > 0) { blockInFile.seek(blkOffset); } File metaFile = getMetaFile(blockFile, b); RandomAccessFile metaInFile = new RandomAccessFile(metaFile, "r"); if (ckoff > 0) { metaInFile.seek(ckoff); } return new BlockInputStreams( new FileInputStream(blockInFile.getFD()), new FileInputStream(metaInFile.getFD())); }
/** Sets the offset in the block to which the the next write will write data to. */ public void setChannelPosition(Block b, BlockWriteStreams streams, long dataOffset, long ckOffset) throws IOException { long size = 0; synchronized (this) { FSVolume vol = volumeMap.get(b).getVolume(); size = vol.getTmpFile(b).length(); } if (size < dataOffset) { String msg = "Trying to change block file offset of block " + b + " to " + dataOffset + " but actual size of file is " + size; throw new IOException(msg); } FileOutputStream file = (FileOutputStream) streams.dataOut; file.getChannel().position(dataOffset); file = (FileOutputStream) streams.checksumOut; file.getChannel().position(ckOffset); }
/** * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block, * and then reopen the file. */ public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException { // // Make sure the block isn't a valid one - we're still creating it! // if (isValidBlock(b)) { if (!isRecovery) { throw new BlockAlreadyExistsException( "Block " + b + " is valid, and cannot be written to."); } // If the block was successfully finalized because all packets // were successfully processed at the Datanode but the ack for // some of the packets were not received by the client. The client // re-opens the connection and retries sending those packets. // The other reason is that an "append" is occurring to this block. detachBlock(b, 1); } long blockSize = b.getNumBytes(); // // Serialize access to /tmp, and check if file already there. // File f = null; List<Thread> threads = null; synchronized (this) { // // Is it already in the create process? // ActiveFile activeFile = ongoingCreates.get(b); if (activeFile != null) { f = activeFile.file; threads = activeFile.threads; if (!isRecovery) { throw new BlockAlreadyExistsException( "Block " + b + " has already been started (though not completed), and thus cannot be created."); } else { for (Thread thread : threads) { thread.interrupt(); } } ongoingCreates.remove(b); } FSVolume v = null; if (!isRecovery) { v = volumes.getNextVolume(blockSize); // create temporary file to hold block in the designated volume f = createTmpFile(v, b); volumeMap.put(b, new DatanodeBlockInfo(v)); } else if (f != null) { DataNode.LOG.info("Reopen already-open Block for append " + b); // create or reuse temporary file to hold block in the // designated volume v = volumeMap.get(b).getVolume(); volumeMap.put(b, new DatanodeBlockInfo(v)); } else { // reopening block for appending to it. DataNode.LOG.info("Reopen Block for append " + b); v = volumeMap.get(b).getVolume(); f = createTmpFile(v, b); File blkfile = getBlockFile(b); File oldmeta = getMetaFile(b); File newmeta = getMetaFile(f, b); // rename meta file to tmp directory DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta); if (!oldmeta.renameTo(newmeta)) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to move meta file " + oldmeta + " to tmp dir " + newmeta); } // rename block file to tmp directory DataNode.LOG.debug("Renaming " + blkfile + " to " + f); if (!blkfile.renameTo(f)) { if (!f.delete()) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to remove file " + f); } if (!blkfile.renameTo(f)) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to move block file " + blkfile + " to tmp dir " + f); } } volumeMap.put(b, new DatanodeBlockInfo(v)); } if (f == null) { DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file."); throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file."); } ongoingCreates.put(b, new ActiveFile(f, threads)); } try { if (threads != null) { for (Thread thread : threads) { thread.join(); } } } catch (InterruptedException e) { throw new IOException("Recovery waiting for thread interrupted."); } // // Finally, allow a writer to the block file // REMIND - mjc - make this a filter stream that enforces a max // block size, so clients can't go crazy // File metafile = getMetaFile(f, b); DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length()); DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length()); return createBlockWriteStreams(f, metafile); }
/** * Try to update an old block to a new block. If there are ongoing create threads running for the * old block, the threads will be returned without updating the block. * * @return ongoing create threads if there is any. Otherwise, return null. */ private synchronized List<Thread> tryUpdateBlock(Block oldblock, Block newblock) throws IOException { // check ongoing create threads final ActiveFile activefile = ongoingCreates.get(oldblock); if (activefile != null && !activefile.threads.isEmpty()) { // remove dead threads for (Iterator<Thread> i = activefile.threads.iterator(); i.hasNext(); ) { final Thread t = i.next(); if (!t.isAlive()) { i.remove(); } } // return living threads if (!activefile.threads.isEmpty()) { return new ArrayList<Thread>(activefile.threads); } } // No ongoing create threads is alive. Update block. File blockFile = findBlockFile(oldblock.getBlockId()); if (blockFile == null) { throw new IOException("Block " + oldblock + " does not exist."); } File oldMetaFile = findMetaFile(blockFile); long oldgs = parseGenerationStamp(blockFile, oldMetaFile); // rename meta file to a tmp file File tmpMetaFile = new File( oldMetaFile.getParent(), oldMetaFile.getName() + "_tmp" + newblock.getGenerationStamp()); if (!oldMetaFile.renameTo(tmpMetaFile)) { throw new IOException("Cannot rename block meta file to " + tmpMetaFile); } // update generation stamp if (oldgs > newblock.getGenerationStamp()) { throw new IOException( "Cannot update block (id=" + newblock.getBlockId() + ") generation stamp from " + oldgs + " to " + newblock.getGenerationStamp()); } // update length if (newblock.getNumBytes() > oldblock.getNumBytes()) { throw new IOException( "Cannot update block file (=" + blockFile + ") length from " + oldblock.getNumBytes() + " to " + newblock.getNumBytes()); } if (newblock.getNumBytes() < oldblock.getNumBytes()) { truncateBlock(blockFile, tmpMetaFile, oldblock.getNumBytes(), newblock.getNumBytes()); } // rename the tmp file to the new meta file (with new generation stamp) File newMetaFile = getMetaFile(blockFile, newblock); if (!tmpMetaFile.renameTo(newMetaFile)) { throw new IOException("Cannot rename tmp meta file to " + newMetaFile); } updateBlockMap(ongoingCreates, oldblock, newblock); updateBlockMap(volumeMap, oldblock, newblock); // paranoia! verify that the contents of the stored block // matches the block file on disk. validateBlockMetadata(newblock); return null; }
/** * We're informed that a block is no longer valid. We could lazily garbage-collect the block, but * why bother? just get rid of it. */ public void invalidate(Block invalidBlks[]) throws IOException { boolean error = false; for (int i = 0; i < invalidBlks.length; i++) { File f = null; FSVolume v; synchronized (this) { f = getFile(invalidBlks[i]); DatanodeBlockInfo dinfo = volumeMap.get(invalidBlks[i]); if (dinfo == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". BlockInfo not found in volumeMap."); error = true; continue; } v = dinfo.getVolume(); if (f == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". Block not found in blockMap." + ((v == null) ? " " : " Block found in volumeMap.")); error = true; continue; } if (v == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". No volume for this block." + " Block found in blockMap. " + f + "."); error = true; continue; } File parent = f.getParentFile(); if (parent == null) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + ". Parent not found for file " + f + "."); error = true; continue; } v.clearPath(parent); volumeMap.remove(invalidBlks[i]); } File metaFile = getMetaFile(f, invalidBlks[i]); long blockSize = f.length() + metaFile.length(); if (!f.delete() || (!metaFile.delete() && metaFile.exists())) { DataNode.LOG.warn( "Unexpected error trying to delete block " + invalidBlks[i] + " at file " + f); error = true; continue; } v.decDfsUsed(blockSize); DataNode.LOG.info("Deleting block " + invalidBlks[i] + " file " + f); if (f.exists()) { // // This is a temporary check especially for hadoop-1220. // This will go away in the future. // DataNode.LOG.info("File " + f + " was deleted but still exists!"); } } if (error) { throw new IOException("Error in deleting blocks."); } }
/** * Method to go though the HDFS filesystem in a DFS to find all files * * <p>fs:FileSystem object from HDFS minDate: Oldest date for files to be backed up maxDate:Newest * date for files to be backed up p:Path in HDFS to look for files pathList:Will be filled with * all files in p hmTimestamps: hashmap of timestamps for later sorting */ public void checkDir( FileSystem fs, long minDate, long maxDate, Path p, ArrayList<Path> pathList, HashMap<Path, Long> hmTimestamps) { long tmpDate; FileStatus[] fStat; try { String sPath = p.toUri().getPath(); // If this is a directory if (fs.getFileStatus(p).isDir()) { // ignore certain directories if ("dfstmp".equals(p.getName()) || "tmp".equals(p.getName()) || "jobtracker".equals(p.getName()) || sPath.startsWith("/mapred") || "ops".equals(p.getName()) || p.getName().startsWith("_distcp_logs")) { return; } // dump the mkdir and chmod commands for this // directory -- skip root directory only { FileStatus stat = fs.getFileStatus(p); if (!sPath.equals("/")) { m_wrMkdirs.println("hadoop fs -mkdir " + sPath); } m_wrChmods.println( "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath); Short sh = new Short(stat.getPermission().toShort()); m_wrChmods.println( "hadoop fs -chmod " + Long.toOctalString(sh.longValue()) + " " + sPath); } fStat = fs.listStatus(p); // Do a recursive call to all elements for (int i = 0; i < fStat.length; i++) { checkDir(fs, minDate, maxDate, fStat[i].getPath(), pathList, hmTimestamps); } } else { // If not a directory then we've found a file // ignore crc files if (p.getName().endsWith(".crc")) { return; } // ignore other files if (sPath.startsWith("/user/oozie/etl/workflows/")) { return; } // try to get the table name from the path. There are // various types of tables, from those replicated from // another database to regular hive tables to // partitioned hive tables. We use table names to // both exclude some from the backup, and for the rest // to dump out the schema and partition name. if (m_ignoreTables != null && m_ignoreTables.doIgnoreFile(sPath)) { m_nIgnoredTables++; if (m_nIgnoredTables < 5) { System.out.println("Skipping ignore-table file: " + sPath); } else if (m_nIgnoredTables == 5) { System.out.println("(...not showing other skipped tables...)"); } return; } FileStatus stat = fs.getFileStatus(p); tmpDate = stat.getModificationTime() / 1000; // store the chmods/chowns for all files m_wrChmods.println( "hadoop fs -chown " + stat.getOwner() + ":" + stat.getGroup() + " " + sPath); m_wrChmods.println("hadoop fs -chmod " + stat.getPermission().toShort() + " " + sPath); // check dates. is it too young? if (tmpDate < minDate) { return; } // is the file too recent? if (tmpDate > maxDate) { // System.out.println("file too recent: " + sPath); return; } // file timestamp is ok pathList.add(p); hmTimestamps.put(p, new Long(tmpDate)); // store info about total bytes neeed to backup m_nTotalBytes += fs.getContentSummary(p).getLength(); } } catch (IOException e) { System.err.println("ERROR: could not open " + p + ": " + e); // System.exit(1) ; } }