/** {@inheritDoc} */ public void updateBlock(Block oldblock, Block newblock) throws IOException { if (oldblock.getBlockId() != newblock.getBlockId()) { throw new IOException( "Cannot update oldblock (=" + oldblock + ") to newblock (=" + newblock + ")."); } for (; ; ) { final List<Thread> threads = tryUpdateBlock(oldblock, newblock); if (threads == null) { return; } // interrupt and wait for all ongoing create threads for (Thread t : threads) { t.interrupt(); } for (Thread t : threads) { try { t.join(); } catch (InterruptedException e) { DataNode.LOG.warn("interruptOngoingCreates: t=" + t, e); } } } }
/** * The run method lives for the life of the JobTracker, and removes Jobs that are not still * running, but which finished a long time ago. */ public void run() { while (shouldRun) { try { Thread.sleep(RETIRE_JOB_CHECK_INTERVAL); } catch (InterruptedException ie) { } synchronized (jobs) { synchronized (jobInitQueue) { synchronized (jobsByArrival) { for (Iterator it = jobs.keySet().iterator(); it.hasNext(); ) { String jobid = (String) it.next(); JobInProgress job = (JobInProgress) jobs.get(jobid); if (job.getStatus().getRunState() != JobStatus.RUNNING && job.getStatus().getRunState() != JobStatus.PREP && (job.getFinishTime() + RETIRE_JOB_INTERVAL < System.currentTimeMillis())) { it.remove(); jobInitQueue.remove(job); jobsByArrival.remove(job); } } } } } } }
ActiveFile(File f, List<Thread> list) { file = f; if (list != null) { threads.addAll(list); } threads.add(Thread.currentThread()); }
private static void deleteCache(Configuration conf, MRAsyncDiskService asyncDiskService) throws IOException { List<CacheStatus> deleteSet = new LinkedList<CacheStatus>(); // try deleting cache Status with refcount of zero synchronized (cachedArchives) { for (Iterator<String> it = cachedArchives.keySet().iterator(); it.hasNext(); ) { String cacheId = (String) it.next(); CacheStatus lcacheStatus = cachedArchives.get(cacheId); if (lcacheStatus.refcount == 0) { // delete this cache entry from the global list // and mark the localized file for deletion deleteSet.add(lcacheStatus); it.remove(); } } } // do the deletion asynchronously, after releasing the global lock Thread cacheFileCleaner = new Thread(new CacheFileCleanTask(asyncDiskService, FileSystem.getLocal(conf), deleteSet)); cacheFileCleaner.start(); }
/** * The run method lives for the life of the JobTracker, and removes TaskTrackers that have not * checked in for some time. */ public void run() { while (shouldRun) { // // Thread runs periodically to check whether trackers should be expired. // The sleep interval must be no more than half the maximum expiry time // for a task tracker. // try { Thread.sleep(TASKTRACKER_EXPIRY_INTERVAL / 3); } catch (InterruptedException ie) { } // // Loop through all expired items in the queue // synchronized (taskTrackers) { synchronized (trackerExpiryQueue) { long now = System.currentTimeMillis(); TaskTrackerStatus leastRecent = null; while ((trackerExpiryQueue.size() > 0) && ((leastRecent = (TaskTrackerStatus) trackerExpiryQueue.first()) != null) && (now - leastRecent.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL)) { // Remove profile from head of queue trackerExpiryQueue.remove(leastRecent); String trackerName = leastRecent.getTrackerName(); // Figure out if last-seen time should be updated, or if tracker is dead TaskTrackerStatus newProfile = (TaskTrackerStatus) taskTrackers.get(leastRecent.getTrackerName()); // Items might leave the taskTracker set through other means; the // status stored in 'taskTrackers' might be null, which means the // tracker has already been destroyed. if (newProfile != null) { if (now - newProfile.getLastSeen() > TASKTRACKER_EXPIRY_INTERVAL) { // Remove completely updateTaskTrackerStatus(trackerName, null); lostTaskTracker(leastRecent.getTrackerName()); } else { // Update time by inserting latest profile trackerExpiryQueue.add(newProfile); } } } } } } }
public static void startTracker(Configuration conf) throws IOException { if (tracker != null) throw new IOException("JobTracker already running."); while (true) { try { tracker = new JobTracker(conf); break; } catch (IOException e) { LOG.log(Level.WARNING, "Starting tracker", e); } try { Thread.sleep(1000); } catch (InterruptedException e) { } } tracker.offerService(); }
private static String lock(String lock) { String realPath = ""; String parent = "/lock"; String lockName = parent + "/" + lock; logger.debug("Getting lock " + lockName); try { if (zkInstance.exists(parent, false) == null) zkInstance.create(parent, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.fromFlag(0)); } catch (Exception E) { logger.error("Error creating lock node: " + E.toString()); return null; } List<String> children = new LinkedList<String>(); try { // List <ACL> ACLList = zkInstance.getACL(lockName, zkInstance.exists(lock, false)); realPath = zkInstance.create( lockName, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL_SEQUENTIAL); // children = zkInstance.getChildren(realPath, false); checkLock: while (true) { children = zkInstance.getChildren(parent, false); for (String curChild : children) { String child = parent + "/" + curChild; // System.out.println(child + " " + realPath + " " + // Integer.toString(child.compareTo(realPath))); if (child.compareTo(realPath) < 0 && child.length() == realPath.length() && curChild.startsWith(lock)) { // System.out.println(child + " cmp to " + realPath); Thread.sleep(300); continue checkLock; } } logger.info("Got lock " + lockName); return realPath; } } catch (Exception E) { logger.error("Exception while trying to get lock " + lockName + " :" + E.toString()); E.printStackTrace(); return null; } }
/** * Get a proxy connection to a remote server * * @param protocol protocol class * @param clientVersion client version * @param addr remote address * @param conf configuration to use * @param rpcTimeout timeout for each RPC * @param timeout time in milliseconds before giving up * @return the proxy * @throws IOException if the far end through a RemoteException */ static <T extends VersionedProtocol> ProtocolProxy<T> waitForProtocolProxy( Class<T> protocol, long clientVersion, InetSocketAddress addr, Configuration conf, long timeout, int rpcTimeout) throws IOException { long startTime = System.currentTimeMillis(); UserGroupInformation ugi = null; try { ugi = UserGroupInformation.login(conf); } catch (LoginException le) { throw new RuntimeException("Couldn't login!"); } IOException ioe; while (true) { try { return getProtocolProxy( protocol, clientVersion, addr, ugi, conf, NetUtils.getDefaultSocketFactory(conf), rpcTimeout); } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + addr + " not available yet, Zzzzz..."); ioe = se; } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server: " + addr); ioe = te; } // check if timed out if (System.currentTimeMillis() - timeout >= startTime) { throw ioe; } // wait for retry try { Thread.sleep(1000); } catch (InterruptedException ie) { // IGNORE } } }
/** * Get a protocol proxy that contains a proxy connection to a remote server and a set of methods * that are supported by the server * * @param protocol protocol class * @param clientVersion client version * @param addr remote address * @param conf configuration to use * @param rpcTimeout timeout for each RPC * @param timeout time in milliseconds before giving up * @return the proxy * @throws IOException if the far end through a RemoteException */ public static <T> ProtocolProxy<T> waitForProtocolProxy( Class<T> protocol, long clientVersion, InetSocketAddress addr, Configuration conf, int rpcTimeout, RetryPolicy connectionRetryPolicy, long timeout) throws IOException { long startTime = Time.now(); IOException ioe; while (true) { try { return getProtocolProxy( protocol, clientVersion, addr, UserGroupInformation.getCurrentUser(), conf, NetUtils.getDefaultSocketFactory(conf), rpcTimeout, connectionRetryPolicy); } catch (ConnectException se) { // namenode has not been started LOG.info("Server at " + addr + " not available yet, Zzzzz..."); ioe = se; } catch (SocketTimeoutException te) { // namenode is busy LOG.info("Problem connecting to server: " + addr); ioe = te; } catch (NoRouteToHostException nrthe) { // perhaps a VIP is failing over LOG.info("No route to host for server: " + addr); ioe = nrthe; } // check if timed out if (Time.now() - timeout >= startTime) { throw ioe; } // wait for retry try { Thread.sleep(1000); } catch (InterruptedException ie) { // IGNORE } } }
// // The main work loop // public void run() { // // Poll the Namenode (once every 5 minutes) to find the size of the // pending edit log. // long period = 5 * 60; // 5 minutes long lastCheckpointTime = 0; if (checkpointPeriod < period) { period = checkpointPeriod; } while (shouldRun) { try { Thread.sleep(1000 * period); } catch (InterruptedException ie) { // do nothing } if (!shouldRun) { break; } try { long now = System.currentTimeMillis(); long size = namenode.getEditLogSize(); if (size >= checkpointSize || now >= lastCheckpointTime + 1000 * checkpointPeriod) { doCheckpoint(); lastCheckpointTime = now; } } catch (IOException e) { LOG.error("Exception in doCheckpoint: "); LOG.error(StringUtils.stringifyException(e)); e.printStackTrace(); checkpointImage.imageDigest = null; } catch (Throwable e) { LOG.error("Throwable Exception in doCheckpoint: "); LOG.error(StringUtils.stringifyException(e)); e.printStackTrace(); Runtime.getRuntime().exit(-1); } } }
/** * Start writing to a block file If isRecovery is true and the block pre-exists, then we kill all * volumeMap.put(b, v); volumeMap.put(b, v); other threads that might be writing to this block, * and then reopen the file. */ public BlockWriteStreams writeToBlock(Block b, boolean isRecovery) throws IOException { // // Make sure the block isn't a valid one - we're still creating it! // if (isValidBlock(b)) { if (!isRecovery) { throw new BlockAlreadyExistsException( "Block " + b + " is valid, and cannot be written to."); } // If the block was successfully finalized because all packets // were successfully processed at the Datanode but the ack for // some of the packets were not received by the client. The client // re-opens the connection and retries sending those packets. // The other reason is that an "append" is occurring to this block. detachBlock(b, 1); } long blockSize = b.getNumBytes(); // // Serialize access to /tmp, and check if file already there. // File f = null; List<Thread> threads = null; synchronized (this) { // // Is it already in the create process? // ActiveFile activeFile = ongoingCreates.get(b); if (activeFile != null) { f = activeFile.file; threads = activeFile.threads; if (!isRecovery) { throw new BlockAlreadyExistsException( "Block " + b + " has already been started (though not completed), and thus cannot be created."); } else { for (Thread thread : threads) { thread.interrupt(); } } ongoingCreates.remove(b); } FSVolume v = null; if (!isRecovery) { v = volumes.getNextVolume(blockSize); // create temporary file to hold block in the designated volume f = createTmpFile(v, b); volumeMap.put(b, new DatanodeBlockInfo(v)); } else if (f != null) { DataNode.LOG.info("Reopen already-open Block for append " + b); // create or reuse temporary file to hold block in the // designated volume v = volumeMap.get(b).getVolume(); volumeMap.put(b, new DatanodeBlockInfo(v)); } else { // reopening block for appending to it. DataNode.LOG.info("Reopen Block for append " + b); v = volumeMap.get(b).getVolume(); f = createTmpFile(v, b); File blkfile = getBlockFile(b); File oldmeta = getMetaFile(b); File newmeta = getMetaFile(f, b); // rename meta file to tmp directory DataNode.LOG.debug("Renaming " + oldmeta + " to " + newmeta); if (!oldmeta.renameTo(newmeta)) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to move meta file " + oldmeta + " to tmp dir " + newmeta); } // rename block file to tmp directory DataNode.LOG.debug("Renaming " + blkfile + " to " + f); if (!blkfile.renameTo(f)) { if (!f.delete()) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to remove file " + f); } if (!blkfile.renameTo(f)) { throw new IOException( "Block " + b + " reopen failed. " + " Unable to move block file " + blkfile + " to tmp dir " + f); } } volumeMap.put(b, new DatanodeBlockInfo(v)); } if (f == null) { DataNode.LOG.warn("Block " + b + " reopen failed " + " Unable to locate tmp file."); throw new IOException("Block " + b + " reopen failed " + " Unable to locate tmp file."); } ongoingCreates.put(b, new ActiveFile(f, threads)); } try { if (threads != null) { for (Thread thread : threads) { thread.join(); } } } catch (InterruptedException e) { throw new IOException("Recovery waiting for thread interrupted."); } // // Finally, allow a writer to the block file // REMIND - mjc - make this a filter stream that enforces a max // block size, so clients can't go crazy // File metafile = getMetaFile(f, b); DataNode.LOG.debug("writeTo blockfile is " + f + " of size " + f.length()); DataNode.LOG.debug("writeTo metafile is " + metafile + " of size " + metafile.length()); return createBlockWriteStreams(f, metafile); }
/** * Try to update an old block to a new block. If there are ongoing create threads running for the * old block, the threads will be returned without updating the block. * * @return ongoing create threads if there is any. Otherwise, return null. */ private synchronized List<Thread> tryUpdateBlock(Block oldblock, Block newblock) throws IOException { // check ongoing create threads final ActiveFile activefile = ongoingCreates.get(oldblock); if (activefile != null && !activefile.threads.isEmpty()) { // remove dead threads for (Iterator<Thread> i = activefile.threads.iterator(); i.hasNext(); ) { final Thread t = i.next(); if (!t.isAlive()) { i.remove(); } } // return living threads if (!activefile.threads.isEmpty()) { return new ArrayList<Thread>(activefile.threads); } } // No ongoing create threads is alive. Update block. File blockFile = findBlockFile(oldblock.getBlockId()); if (blockFile == null) { throw new IOException("Block " + oldblock + " does not exist."); } File oldMetaFile = findMetaFile(blockFile); long oldgs = parseGenerationStamp(blockFile, oldMetaFile); // rename meta file to a tmp file File tmpMetaFile = new File( oldMetaFile.getParent(), oldMetaFile.getName() + "_tmp" + newblock.getGenerationStamp()); if (!oldMetaFile.renameTo(tmpMetaFile)) { throw new IOException("Cannot rename block meta file to " + tmpMetaFile); } // update generation stamp if (oldgs > newblock.getGenerationStamp()) { throw new IOException( "Cannot update block (id=" + newblock.getBlockId() + ") generation stamp from " + oldgs + " to " + newblock.getGenerationStamp()); } // update length if (newblock.getNumBytes() > oldblock.getNumBytes()) { throw new IOException( "Cannot update block file (=" + blockFile + ") length from " + oldblock.getNumBytes() + " to " + newblock.getNumBytes()); } if (newblock.getNumBytes() < oldblock.getNumBytes()) { truncateBlock(blockFile, tmpMetaFile, oldblock.getNumBytes(), newblock.getNumBytes()); } // rename the tmp file to the new meta file (with new generation stamp) File newMetaFile = getMetaFile(blockFile, newblock); if (!tmpMetaFile.renameTo(newMetaFile)) { throw new IOException("Cannot rename tmp meta file to " + newMetaFile); } updateBlockMap(ongoingCreates, oldblock, newblock); updateBlockMap(volumeMap, oldblock, newblock); // paranoia! verify that the contents of the stored block // matches the block file on disk. validateBlockMetadata(newblock); return null; }