예제 #1
2
  void removeTaskEntry(String taskid) {
    // taskid --> tracker
    String tracker = (String) taskidToTrackerMap.remove(taskid);

    // tracker --> taskid
    TreeSet trackerSet = (TreeSet) trackerToTaskMap.get(tracker);
    if (trackerSet != null) {
      trackerSet.remove(taskid);
    }

    // taskid --> TIP
    taskidToTIPMap.remove(taskid);
  }
예제 #2
1
  /**
   * We lost the task tracker! All task-tracker structures have already been updated. Just process
   * the contained tasks and any jobs that might be affected.
   */
  void lostTaskTracker(String trackerName) {
    LOG.info("Lost tracker '" + trackerName + "'");
    TreeSet lostTasks = (TreeSet) trackerToTaskMap.get(trackerName);
    trackerToTaskMap.remove(trackerName);

    if (lostTasks != null) {
      for (Iterator it = lostTasks.iterator(); it.hasNext(); ) {
        String taskId = (String) it.next();
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);

        // Tell the job to fail the relevant task
        JobInProgress job = tip.getJob();
        job.failedTask(tip, taskId, trackerName);
      }
    }
  }
예제 #3
1
 /**
  * JobTracker.submitJob() kicks off a new job.
  *
  * <p>Create a 'JobInProgress' object, which contains both JobProfile and JobStatus. Those two
  * sub-objects are sometimes shipped outside of the JobTracker. But JobInProgress adds info that's
  * useful for the JobTracker alone.
  *
  * <p>We add the JIP to the jobInitQueue, which is processed asynchronously to handle
  * split-computation and build up the right TaskTracker/Block mapping.
  */
 public synchronized JobStatus submitJob(String jobFile) throws IOException {
   totalSubmissions++;
   JobInProgress job = new JobInProgress(jobFile, this, this.conf);
   synchronized (jobs) {
     synchronized (jobsByArrival) {
       synchronized (jobInitQueue) {
         jobs.put(job.getProfile().getJobId(), job);
         jobsByArrival.add(job);
         jobInitQueue.add(job);
         jobInitQueue.notifyAll();
       }
     }
   }
   return job.getStatus();
 }
예제 #4
0
 /**
  * Clear the entire contents of the cache and delete the backing files. This should only be used
  * when the server is reinitializing, because the users are going to lose their files.
  */
 public static void purgeCache(Configuration conf, MRAsyncDiskService service) throws IOException {
   synchronized (cachedArchives) {
     LocalFileSystem localFs = FileSystem.getLocal(conf);
     for (Map.Entry<String, CacheStatus> f : cachedArchives.entrySet()) {
       try {
         deleteLocalPath(service, localFs, f.getValue().localizedLoadPath);
       } catch (IOException ie) {
         LOG.debug("Error cleaning up cache", ie);
       }
     }
     cachedArchives.clear();
   }
 }
예제 #5
0
 /**
  * Update the last recorded status for the given task tracker. It assumes that the taskTrackers
  * are locked on entry.
  *
  * @author Owen O'Malley
  * @param trackerName The name of the tracker
  * @param status The new status for the task tracker
  * @return Was an old status found?
  */
 private boolean updateTaskTrackerStatus(String trackerName, TaskTrackerStatus status) {
   TaskTrackerStatus oldStatus = (TaskTrackerStatus) taskTrackers.get(trackerName);
   if (oldStatus != null) {
     totalMaps -= oldStatus.countMapTasks();
     totalReduces -= oldStatus.countReduceTasks();
     if (status == null) {
       taskTrackers.remove(trackerName);
     }
   }
   if (status != null) {
     totalMaps += status.countMapTasks();
     totalReduces += status.countReduceTasks();
     taskTrackers.put(trackerName, status);
   }
   return oldStatus != null;
 }
예제 #6
0
 public synchronized JobStatus getJobStatus(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job != null) {
     return job.getStatus();
   } else {
     return null;
   }
 }
예제 #7
0
 public synchronized JobProfile getJobProfile(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job != null) {
     return job.getProfile();
   } else {
     return null;
   }
 }
예제 #8
0
 /**
  * A tracker wants to know if any of its Tasks have been closed (because the job completed,
  * whether successfully or not)
  */
 public synchronized String pollForTaskWithClosedJob(String taskTracker) {
   TreeSet taskIds = (TreeSet) trackerToTaskMap.get(taskTracker);
   if (taskIds != null) {
     for (Iterator it = taskIds.iterator(); it.hasNext(); ) {
       String taskId = (String) it.next();
       TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(taskId);
       if (tip.shouldCloseForClosedJob(taskId)) {
         //
         // This is how the JobTracker ends a task at the TaskTracker.
         // It may be successfully completed, or may be killed in
         // mid-execution.
         //
         return taskId;
       }
     }
   }
   return null;
 }
예제 #9
0
 /**
  * Update the maps baseDirSize and baseDirNumberSubDir when deleting cache.
  *
  * @param cacheStatus cache status of the cache is deleted
  */
 private static void deleteCacheInfoUpdate(CacheStatus cacheStatus) {
   if (!cacheStatus.isInited()) {
     // if it is not created yet, do nothing.
     return;
   }
   synchronized (baseDirSize) {
     Long dirSize = baseDirSize.get(cacheStatus.getBaseDir());
     if (dirSize != null) {
       dirSize -= cacheStatus.size;
       baseDirSize.put(cacheStatus.getBaseDir(), dirSize);
     }
   }
   synchronized (baseDirNumberSubDir) {
     Integer dirSubDir = baseDirNumberSubDir.get(cacheStatus.getBaseDir());
     if (dirSubDir != null) {
       dirSubDir--;
       baseDirNumberSubDir.put(cacheStatus.getBaseDir(), dirSubDir);
     }
   }
 }
예제 #10
0
 public Vector completedJobs() {
   Vector v = new Vector();
   for (Iterator it = jobs.values().iterator(); it.hasNext(); ) {
     JobInProgress jip = (JobInProgress) it.next();
     JobStatus status = jip.getStatus();
     if (status.getRunState() == JobStatus.SUCCEEDED) {
       v.add(jip);
     }
   }
   return v;
 }
예제 #11
0
 public Vector runningJobs() {
   Vector v = new Vector();
   for (Iterator it = jobs.values().iterator(); it.hasNext(); ) {
     JobInProgress jip = (JobInProgress) it.next();
     JobStatus status = jip.getStatus();
     if (status.getRunState() == JobStatus.RUNNING) {
       v.add(jip);
     }
   }
   return v;
 }
예제 #12
0
 /**
  * Update the maps baseDirSize and baseDirNumberSubDir when adding cache.
  *
  * @param cacheStatus cache status of the cache is added
  */
 private static void addCacheInfoUpdate(CacheStatus cacheStatus) {
   long cacheSize = cacheStatus.size;
   synchronized (baseDirSize) {
     Long dirSize = baseDirSize.get(cacheStatus.getBaseDir());
     if (dirSize == null) {
       dirSize = Long.valueOf(cacheSize);
     } else {
       dirSize += cacheSize;
     }
     baseDirSize.put(cacheStatus.getBaseDir(), dirSize);
   }
   synchronized (baseDirNumberSubDir) {
     Integer dirSubDir = baseDirNumberSubDir.get(cacheStatus.getBaseDir());
     if (dirSubDir == null) {
       dirSubDir = 1;
     } else {
       dirSubDir += 1;
     }
     baseDirNumberSubDir.put(cacheStatus.getBaseDir(), dirSubDir);
   }
 }
예제 #13
0
 private static void deleteCache(Configuration conf, MRAsyncDiskService asyncDiskService)
     throws IOException {
   List<CacheStatus> deleteSet = new LinkedList<CacheStatus>();
   // try deleting cache Status with refcount of zero
   synchronized (cachedArchives) {
     for (Iterator<String> it = cachedArchives.keySet().iterator(); it.hasNext(); ) {
       String cacheId = (String) it.next();
       CacheStatus lcacheStatus = cachedArchives.get(cacheId);
       if (lcacheStatus.refcount == 0) {
         // delete this cache entry from the global list
         // and mark the localized file for deletion
         deleteSet.add(lcacheStatus);
         it.remove();
       }
     }
   }
   // do the deletion asynchronously, after releasing the global lock
   Thread cacheFileCleaner =
       new Thread(new CacheFileCleanTask(asyncDiskService, FileSystem.getLocal(conf), deleteSet));
   cacheFileCleaner.start();
 }
예제 #14
0
  /**
   * A TaskTracker wants to know the physical locations of completed, but not yet closed, tasks.
   * This exists so the reduce task thread can locate map task outputs.
   */
  public synchronized MapOutputLocation[] locateMapOutputs(
      String taskId, String[][] mapTasksNeeded) {
    ArrayList v = new ArrayList();
    for (int i = 0; i < mapTasksNeeded.length; i++) {
      for (int j = 0; j < mapTasksNeeded[i].length; j++) {
        TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(mapTasksNeeded[i][j]);
        if (tip != null && tip.isComplete(mapTasksNeeded[i][j])) {
          String trackerId = (String) taskidToTrackerMap.get(mapTasksNeeded[i][j]);
          TaskTrackerStatus tracker;
          synchronized (taskTrackers) {
            tracker = (TaskTrackerStatus) taskTrackers.get(trackerId);
          }
          v.add(new MapOutputLocation(mapTasksNeeded[i][j], tracker.getHost(), tracker.getPort()));
          break;
        }
      }
    }
    // randomly shuffle results to load-balance map output requests
    Collections.shuffle(v);

    return (MapOutputLocation[]) v.toArray(new MapOutputLocation[v.size()]);
  }
예제 #15
0
  /**
   * This is the opposite of getlocalcache. When you are done with using the cache, you need to
   * release the cache
   *
   * @param cache The cache URI to be released
   * @param conf configuration which contains the filesystem the cache is contained in.
   * @throws IOException
   */
  public static void releaseCache(URI cache, Configuration conf, long timeStamp)
      throws IOException {
    String cacheId = getKey(cache, conf, timeStamp);

    synchronized (cachedArchives) {
      CacheStatus lcacheStatus = cachedArchives.get(cacheId);
      if (lcacheStatus == null) {
        LOG.warn(
            "Cannot find localized cache: " + cache + " (key: " + cacheId + ") in releaseCache!");
        return;
      }
      lcacheStatus.refcount--;
    }
  }
예제 #16
0
  ///////////////////////////////////////////////////////
  // Maintain lookup tables; called by JobInProgress
  // and TaskInProgress
  ///////////////////////////////////////////////////////
  void createTaskEntry(String taskid, String taskTracker, TaskInProgress tip) {
    LOG.info(
        "Adding task '"
            + taskid
            + "' to tip "
            + tip.getTIPId()
            + ", for tracker '"
            + taskTracker
            + "'");

    // taskid --> tracker
    taskidToTrackerMap.put(taskid, taskTracker);

    // tracker --> taskid
    TreeSet taskset = (TreeSet) trackerToTaskMap.get(taskTracker);
    if (taskset == null) {
      taskset = new TreeSet();
      trackerToTaskMap.put(taskTracker, taskset);
    }
    taskset.add(taskid);

    // taskid --> TIP
    taskidToTIPMap.put(taskid, tip);
  }
예제 #17
0
 public synchronized TaskReport[] getReduceTaskReports(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   if (job == null) {
     return new TaskReport[0];
   } else {
     Vector reports = new Vector();
     Vector completeReduceTasks = job.reportTasksInProgress(false, true);
     for (Iterator it = completeReduceTasks.iterator(); it.hasNext(); ) {
       TaskInProgress tip = (TaskInProgress) it.next();
       reports.add(tip.generateSingleReport());
     }
     Vector incompleteReduceTasks = job.reportTasksInProgress(false, false);
     for (Iterator it = incompleteReduceTasks.iterator(); it.hasNext(); ) {
       TaskInProgress tip = (TaskInProgress) it.next();
       reports.add(tip.generateSingleReport());
     }
     return (TaskReport[]) reports.toArray(new TaskReport[reports.size()]);
   }
 }
예제 #18
0
  /**
   * Accept and process a new TaskTracker profile. We might have known about the TaskTracker
   * previously, or it might be brand-new. All task-tracker structures have already been updated.
   * Just process the contained tasks and any jobs that might be affected.
   */
  void updateTaskStatuses(TaskTrackerStatus status) {
    for (Iterator it = status.taskReports(); it.hasNext(); ) {
      TaskStatus report = (TaskStatus) it.next();
      TaskInProgress tip = (TaskInProgress) taskidToTIPMap.get(report.getTaskId());
      if (tip == null) {
        LOG.info(
            "Serious problem.  While updating status, cannot find taskid " + report.getTaskId());
      } else {
        JobInProgress job = tip.getJob();
        job.updateTaskStatus(tip, report);

        if (report.getRunState() == TaskStatus.SUCCEEDED) {
          job.completedTask(tip, report.getTaskId());
        } else if (report.getRunState() == TaskStatus.FAILED) {
          // Tell the job to fail the relevant task
          job.failedTask(tip, report.getTaskId(), status.getTrackerName());
        }
      }
    }
  }
예제 #19
0
 public synchronized void killJob(String jobid) {
   JobInProgress job = (JobInProgress) jobs.get(jobid);
   job.kill();
 }
예제 #20
0
 public Collection taskTrackers() {
   synchronized (taskTrackers) {
     return taskTrackers.values();
   }
 }
예제 #21
0
  /**
   * Get the locally cached file or archive; it could either be previously cached (and valid) or
   * copy it from the {@link FileSystem} now.
   *
   * @param cache the cache to be localized, this should be specified as new
   *     URI(hdfs://hostname:port/absolute_path_to_file#LINKNAME). If no schema or hostname:port is
   *     provided the file is assumed to be in the filesystem being used in the Configuration
   * @param conf The Confguration file which contains the filesystem
   * @param subDir The sub cache Dir where you want to localize the files/archives
   * @param fileStatus The file status on the dfs.
   * @param isArchive if the cache is an archive or a file. In case it is an archive with a .zip or
   *     .jar or .tar or .tgz or .tar.gz extension it will be unzipped/unjarred/untarred
   *     automatically and the directory where the archive is unzipped/unjarred/untarred is returned
   *     as the Path. In case of a file, the path to the file is returned
   * @param confFileStamp this is the hdfs file modification timestamp to verify that the file to be
   *     cached hasn't changed since the job started
   * @param fileLength this is the length of the cache file
   * @param currentWorkDir this is the directory where you would want to create symlinks for the
   *     locally cached files/archives
   * @param honorSymLinkConf if this is false, then the symlinks are not created even if conf says
   *     so (this is required for an optimization in task launches
   * @param lDirAllocator LocalDirAllocator of the tracker
   * @return the path to directory where the archives are unjarred in case of archives, the path to
   *     the file where the file is copied locally
   * @throws IOException
   */
  private static Path getLocalCache(
      URI cache,
      Configuration conf,
      Path subDir,
      FileStatus fileStatus,
      boolean isArchive,
      long confFileStamp,
      long fileLength,
      Path currentWorkDir,
      boolean honorSymLinkConf,
      MRAsyncDiskService asyncDiskService,
      LocalDirAllocator lDirAllocator)
      throws IOException {
    String key = getKey(cache, conf, confFileStamp);

    CacheStatus lcacheStatus;
    Path localizedPath;
    synchronized (cachedArchives) {
      lcacheStatus = cachedArchives.get(key);
      if (lcacheStatus == null) {
        // was never localized
        Path uniqueParentDir = new Path(subDir, String.valueOf(random.nextLong()));
        String cachePath = new Path(uniqueParentDir, makeRelative(cache, conf)).toString();
        Path localPath = lDirAllocator.getLocalPathForWrite(cachePath, fileLength, conf);
        lcacheStatus =
            new CacheStatus(
                new Path(localPath.toString().replace(cachePath, "")), localPath, uniqueParentDir);
        cachedArchives.put(key, lcacheStatus);
      }
      lcacheStatus.refcount++;
    }
    boolean initSuccessful = false;
    try {
      synchronized (lcacheStatus) {
        if (!lcacheStatus.isInited()) {
          localizedPath = localizeCache(conf, cache, confFileStamp, lcacheStatus, isArchive);
          lcacheStatus.initComplete();
        } else {
          if (fileStatus != null) {
            localizedPath =
                checkCacheStatusValidity(
                    conf, cache, confFileStamp, lcacheStatus, fileStatus, isArchive);
          } else {
            // if fileStatus is null, then the md5 must be correct
            // so there is no need to check for cache validity
            localizedPath = lcacheStatus.localizedLoadPath;
          }
        }
        createSymlink(conf, cache, lcacheStatus, isArchive, currentWorkDir, honorSymLinkConf);
      }

      // try deleting stuff if you can
      long size = 0;
      int numberSubDir = 0;
      synchronized (lcacheStatus) {
        synchronized (baseDirSize) {
          Long get = baseDirSize.get(lcacheStatus.getBaseDir());
          if (get != null) {
            size = get.longValue();
          } else {
            LOG.warn("Cannot find size of baseDir: " + lcacheStatus.getBaseDir());
          }
        }
        synchronized (baseDirNumberSubDir) {
          Integer get = baseDirNumberSubDir.get(lcacheStatus.getBaseDir());
          if (get != null) {
            numberSubDir = get.intValue();
          } else {
            LOG.warn("Cannot find subdirectories limit of baseDir: " + lcacheStatus.getBaseDir());
          }
        }
      }
      // setting the cache size to a default of 10GB
      long allowedSize = conf.getLong("local.cache.size", DEFAULT_CACHE_SIZE);
      long allowedNumberSubDir =
          conf.getLong("local.cache.numbersubdir", DEFAULT_CACHE_SUBDIR_LIMIT);
      if (allowedSize < size || allowedNumberSubDir < numberSubDir) {
        // try some cache deletions
        LOG.debug(
            "Start deleting released cache because"
                + " [size, allowedSize, numberSubDir, allowedNumberSubDir] ="
                + " ["
                + size
                + ", "
                + allowedSize
                + ", "
                + numberSubDir
                + ", "
                + allowedNumberSubDir
                + "]");
        deleteCache(conf, asyncDiskService);
      }
      initSuccessful = true;
      return localizedPath;
    } finally {
      if (!initSuccessful) {
        synchronized (cachedArchives) {
          lcacheStatus.refcount--;
        }
      }
    }
  }
예제 #22
0
 public TaskTrackerStatus getTaskTracker(String trackerID) {
   synchronized (taskTrackers) {
     return (TaskTrackerStatus) taskTrackers.get(trackerID);
   }
 }
예제 #23
0
  /**
   * A tracker wants to know if there's a Task to run. Returns a task we'd like the TaskTracker to
   * execute right now.
   *
   * <p>Eventually this function should compute load on the various TaskTrackers, and incorporate
   * knowledge of DFS file placement. But for right now, it just grabs a single item out of the
   * pending task list and hands it back.
   */
  public synchronized Task pollForNewTask(String taskTracker) {
    //
    // Compute average map and reduce task numbers across pool
    //
    int avgMaps = 0;
    int avgReduces = 0;
    int numTaskTrackers;
    TaskTrackerStatus tts;
    synchronized (taskTrackers) {
      numTaskTrackers = taskTrackers.size();
      tts = (TaskTrackerStatus) taskTrackers.get(taskTracker);
    }
    if (numTaskTrackers > 0) {
      avgMaps = totalMaps / numTaskTrackers;
      avgReduces = totalReduces / numTaskTrackers;
    }
    int totalCapacity = numTaskTrackers * maxCurrentTasks;
    //
    // Get map + reduce counts for the current tracker.
    //
    if (tts == null) {
      LOG.warning("Unknown task tracker polling; ignoring: " + taskTracker);
      return null;
    }

    int numMaps = tts.countMapTasks();
    int numReduces = tts.countReduceTasks();

    //
    // In the below steps, we allocate first a map task (if appropriate),
    // and then a reduce task if appropriate.  We go through all jobs
    // in order of job arrival; jobs only get serviced if their
    // predecessors are serviced, too.
    //

    //
    // We hand a task to the current taskTracker if the given machine
    // has a workload that's equal to or less than the averageMaps
    // +/- TASK_ALLOC_EPSILON.  (That epsilon is in place in case
    // there is an odd machine that is failing for some reason but
    // has not yet been removed from the pool, making capacity seem
    // larger than it really is.)
    //
    synchronized (jobsByArrival) {
      if ((numMaps < maxCurrentTasks) && (numMaps <= (avgMaps + TASK_ALLOC_EPSILON))) {

        int totalNeededMaps = 0;
        for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) {
          JobInProgress job = (JobInProgress) it.next();
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = job.obtainNewMapTask(taskTracker, tts);
          if (t != null) {
            return t;
          }

          //
          // Beyond the highest-priority task, reserve a little
          // room for failures and speculative executions; don't
          // schedule tasks to the hilt.
          //
          totalNeededMaps += job.desiredMaps();
          double padding = 0;
          if (totalCapacity > MIN_SLOTS_FOR_PADDING) {
            padding = Math.min(maxCurrentTasks, totalNeededMaps * PAD_FRACTION);
          }
          if (totalNeededMaps + padding >= totalCapacity) {
            break;
          }
        }
      }

      //
      // Same thing, but for reduce tasks
      //
      if ((numReduces < maxCurrentTasks) && (numReduces <= (avgReduces + TASK_ALLOC_EPSILON))) {

        int totalNeededReduces = 0;
        for (Iterator it = jobsByArrival.iterator(); it.hasNext(); ) {
          JobInProgress job = (JobInProgress) it.next();
          if (job.getStatus().getRunState() != JobStatus.RUNNING) {
            continue;
          }

          Task t = job.obtainNewReduceTask(taskTracker, tts);
          if (t != null) {
            return t;
          }

          //
          // Beyond the highest-priority task, reserve a little
          // room for failures and speculative executions; don't
          // schedule tasks to the hilt.
          //
          totalNeededReduces += job.desiredReduces();
          double padding = 0;
          if (totalCapacity > MIN_SLOTS_FOR_PADDING) {
            padding = Math.min(maxCurrentTasks, totalNeededReduces * PAD_FRACTION);
          }
          if (totalNeededReduces + padding >= totalCapacity) {
            break;
          }
        }
      }
    }
    return null;
  }
예제 #24
0
 public synchronized ClusterStatus getClusterStatus() {
   synchronized (taskTrackers) {
     return new ClusterStatus(taskTrackers.size(), totalMaps, totalReduces, maxCurrentTasks);
   }
 }
예제 #25
0
 ///////////////////////////////////////////////////////////////
 // JobTracker methods
 ///////////////////////////////////////////////////////////////
 public JobInProgress getJob(String jobid) {
   return (JobInProgress) jobs.get(jobid);
 }