Пример #1
0
 /**
  * Creates a {@link BlockInfo} form a given {@link MasterBlockInfo}, by populating worker
  * locations.
  *
  * <p>NOTE: {@link #mWorkers} should already be locked before calling this method.
  *
  * @param masterBlockInfo the {@link MasterBlockInfo}
  * @return a {@link BlockInfo} from a {@link MasterBlockInfo}. Populates worker locations
  */
 private BlockInfo generateBlockInfo(MasterBlockInfo masterBlockInfo) {
   // "Join" to get all the addresses of the workers.
   List<BlockLocation> locations = new ArrayList<BlockLocation>();
   List<MasterBlockLocation> blockLocations = masterBlockInfo.getBlockLocations();
   // Sort the block locations by their alias ordinal in the master storage tier mapping
   Collections.sort(
       blockLocations,
       new Comparator<MasterBlockLocation>() {
         @Override
         public int compare(MasterBlockLocation o1, MasterBlockLocation o2) {
           return mGlobalStorageTierAssoc.getOrdinal(o1.getTierAlias())
               - mGlobalStorageTierAssoc.getOrdinal(o2.getTierAlias());
         }
       });
   for (MasterBlockLocation masterBlockLocation : blockLocations) {
     MasterWorkerInfo workerInfo =
         mWorkers.getFirstByField(mIdIndex, masterBlockLocation.getWorkerId());
     if (workerInfo != null) {
       locations.add(
           new BlockLocation()
               .setWorkerId(masterBlockLocation.getWorkerId())
               .setWorkerAddress(workerInfo.getWorkerAddress())
               .setTierAlias(masterBlockLocation.getTierAlias()));
     }
   }
   return new BlockInfo()
       .setBlockId(masterBlockInfo.getBlockId())
       .setLength(masterBlockInfo.getLength())
       .setLocations(locations);
 }
Пример #2
0
  /**
   * Updates metadata when a worker periodically heartbeats with the master.
   *
   * @param workerId the worker id
   * @param usedBytesOnTiers a mapping from tier alias to the used bytes
   * @param removedBlockIds a list of block ids removed from this worker
   * @param addedBlocksOnTiers a mapping from tier alias to the added blocks
   * @return an optional command for the worker to execute
   */
  public Command workerHeartbeat(
      long workerId,
      Map<String, Long> usedBytesOnTiers,
      List<Long> removedBlockIds,
      Map<String, List<Long>> addedBlocksOnTiers) {
    synchronized (mBlocks) {
      synchronized (mWorkers) {
        if (!mWorkers.contains(mIdIndex, workerId)) {
          LOG.warn("Could not find worker id: {} for heartbeat.", workerId);
          return new Command(CommandType.Register, new ArrayList<Long>());
        }

        MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, workerId);
        processWorkerRemovedBlocks(workerInfo, removedBlockIds);
        processWorkerAddedBlocks(workerInfo, addedBlocksOnTiers);

        workerInfo.updateUsedBytes(usedBytesOnTiers);
        workerInfo.updateLastUpdatedTimeMs();

        List<Long> toRemoveBlocks = workerInfo.getToRemoveBlocks();
        if (toRemoveBlocks.isEmpty()) {
          return new Command(CommandType.Nothing, new ArrayList<Long>());
        }
        return new Command(CommandType.Free, toRemoveBlocks);
      }
    }
  }
Пример #3
0
  /**
   * Returns a worker id for the given worker.
   *
   * @param workerNetAddress the worker {@link WorkerNetAddress}
   * @return the worker id for this worker
   */
  public long getWorkerId(WorkerNetAddress workerNetAddress) {
    // TODO(gene): This NetAddress cloned in case thrift re-uses the object. Does thrift re-use it?
    synchronized (mWorkers) {
      if (mWorkers.contains(mAddressIndex, workerNetAddress)) {
        // This worker address is already mapped to a worker id.
        long oldWorkerId = mWorkers.getFirstByField(mAddressIndex, workerNetAddress).getId();
        LOG.warn("The worker {} already exists as id {}.", workerNetAddress, oldWorkerId);
        return oldWorkerId;
      }

      if (mLostWorkers.contains(mAddressIndex, workerNetAddress)) {
        // this is one of the lost workers
        final MasterWorkerInfo lostWorkerInfo =
            mLostWorkers.getFirstByField(mAddressIndex, workerNetAddress);
        final long lostWorkerId = lostWorkerInfo.getId();
        LOG.warn("A lost worker {} has requested its old id {}.", workerNetAddress, lostWorkerId);

        // Update the timestamp of the worker before it is considered an active worker.
        lostWorkerInfo.updateLastUpdatedTimeMs();
        mWorkers.add(lostWorkerInfo);
        mLostWorkers.remove(lostWorkerInfo);
        return lostWorkerId;
      }

      // Generate a new worker id.
      long workerId = mNextWorkerId.getAndIncrement();
      mWorkers.add(new MasterWorkerInfo(workerId, workerNetAddress));

      LOG.info("getWorkerId(): WorkerNetAddress: {} id: {}", workerNetAddress, workerId);
      return workerId;
    }
  }
Пример #4
0
  /**
   * Updates metadata when a worker registers with the master.
   *
   * @param workerId the worker id of the worker registering
   * @param storageTiers a list of storage tier aliases in order of their position in the worker's
   *     hierarchy
   * @param totalBytesOnTiers a mapping from storage tier alias to total bytes
   * @param usedBytesOnTiers a mapping from storage tier alias to the used byes
   * @param currentBlocksOnTiers a mapping from storage tier alias to a list of blocks
   * @throws NoWorkerException if workerId cannot be found
   */
  public void workerRegister(
      long workerId,
      List<String> storageTiers,
      Map<String, Long> totalBytesOnTiers,
      Map<String, Long> usedBytesOnTiers,
      Map<String, List<Long>> currentBlocksOnTiers)
      throws NoWorkerException {
    synchronized (mBlocks) {
      synchronized (mWorkers) {
        if (!mWorkers.contains(mIdIndex, workerId)) {
          throw new NoWorkerException("Could not find worker id: " + workerId + " to register.");
        }
        MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, workerId);
        workerInfo.updateLastUpdatedTimeMs();

        // Gather all blocks on this worker.
        HashSet<Long> blocks = new HashSet<Long>();
        for (List<Long> blockIds : currentBlocksOnTiers.values()) {
          blocks.addAll(blockIds);
        }

        // Detect any lost blocks on this worker.
        Set<Long> removedBlocks =
            workerInfo.register(
                mGlobalStorageTierAssoc, storageTiers, totalBytesOnTiers, usedBytesOnTiers, blocks);

        processWorkerRemovedBlocks(workerInfo, removedBlocks);
        processWorkerAddedBlocks(workerInfo, currentBlocksOnTiers);
        LOG.info("registerWorker(): {}", workerInfo);
      }
    }
  }
Пример #5
0
  /**
   * Marks a block as committed on a specific worker.
   *
   * @param workerId the worker id committing the block
   * @param usedBytesOnTier the updated used bytes on the tier of the worker
   * @param tierAlias the alias of the storage tier where the worker is committing the block to
   * @param blockId the committing block id
   * @param length the length of the block
   */
  public void commitBlock(
      long workerId, long usedBytesOnTier, String tierAlias, long blockId, long length) {
    LOG.debug(
        "Commit block from worker: {}",
        FormatUtils.parametersToString(workerId, usedBytesOnTier, blockId, length));
    synchronized (mBlocks) {
      synchronized (mWorkers) {
        MasterWorkerInfo workerInfo = mWorkers.getFirstByField(mIdIndex, workerId);
        workerInfo.addBlock(blockId);
        workerInfo.updateUsedBytes(tierAlias, usedBytesOnTier);
        workerInfo.updateLastUpdatedTimeMs();

        MasterBlockInfo masterBlockInfo = mBlocks.get(blockId);
        if (masterBlockInfo == null) {
          masterBlockInfo = new MasterBlockInfo(blockId, length);
          mBlocks.put(blockId, masterBlockInfo);
          BlockInfoEntry blockInfo =
              BlockInfoEntry.newBuilder()
                  .setBlockId(masterBlockInfo.getBlockId())
                  .setLength(masterBlockInfo.getLength())
                  .build();
          writeJournalEntry(JournalEntry.newBuilder().setBlockInfo(blockInfo).build());
          flushJournal();
        }
        masterBlockInfo.addWorker(workerId, tierAlias);
        mLostBlocks.remove(blockId);
      }
    }
  }
Пример #6
0
 /**
  * Gets info about the lost workers.
  *
  * @return a set of worker info
  */
 public Set<WorkerInfo> getLostWorkersInfo() {
   synchronized (mWorkers) {
     Set<WorkerInfo> ret = new HashSet<WorkerInfo>(mLostWorkers.size());
     for (MasterWorkerInfo worker : mLostWorkers) {
       ret.add(worker.generateClientWorkerInfo());
     }
     return ret;
   }
 }
Пример #7
0
 /** @return the total used bytes on all tiers, on all workers of Tachyon */
 public long getUsedBytes() {
   long ret = 0;
   synchronized (mWorkers) {
     for (MasterWorkerInfo worker : mWorkers) {
       ret += worker.getUsedBytes();
     }
   }
   return ret;
 }
Пример #8
0
 /** @return a list of {@link WorkerInfo} objects representing the workers in Tachyon */
 public List<WorkerInfo> getWorkerInfoList() {
   synchronized (mWorkers) {
     List<WorkerInfo> workerInfoList = new ArrayList<WorkerInfo>(mWorkers.size());
     for (MasterWorkerInfo masterWorkerInfo : mWorkers) {
       workerInfoList.add(masterWorkerInfo.generateClientWorkerInfo());
     }
     return workerInfoList;
   }
 }
Пример #9
0
 /** @return the used bytes on each storage tier */
 public Map<String, Long> getUsedBytesOnTiers() {
   Map<String, Long> ret = new HashMap<String, Long>();
   synchronized (mWorkers) {
     for (MasterWorkerInfo worker : mWorkers) {
       for (Map.Entry<String, Long> entry : worker.getUsedBytesOnTiers().entrySet()) {
         Long used = ret.get(entry.getKey());
         ret.put(entry.getKey(), (used == null ? 0L : used) + entry.getValue());
       }
     }
   }
   return ret;
 }
Пример #10
0
 /**
  * Updates the worker and block metadata for blocks added to a worker.
  *
  * <p>NOTE: {@link #mBlocks} should already be locked before calling this method.
  *
  * @param workerInfo The worker metadata object
  * @param addedBlockIds A mapping from storage tier alias to a list of block ids added
  */
 private void processWorkerAddedBlocks(
     MasterWorkerInfo workerInfo, Map<String, List<Long>> addedBlockIds) {
   for (Map.Entry<String, List<Long>> entry : addedBlockIds.entrySet()) {
     for (long blockId : entry.getValue()) {
       MasterBlockInfo masterBlockInfo = mBlocks.get(blockId);
       if (masterBlockInfo != null) {
         workerInfo.addBlock(blockId);
         masterBlockInfo.addWorker(workerInfo.getId(), entry.getKey());
         mLostBlocks.remove(blockId);
       } else {
         LOG.warn("Failed to register workerId: {} to blockId: {}", workerInfo.getId(), blockId);
       }
     }
   }
 }
Пример #11
0
 /**
  * Updates the worker and block metadata for blocks removed from a worker.
  *
  * <p>NOTE: {@link #mBlocks} should already be locked before calling this method.
  *
  * @param workerInfo The worker metadata object
  * @param removedBlockIds A list of block ids removed from the worker
  */
 private void processWorkerRemovedBlocks(
     MasterWorkerInfo workerInfo, Collection<Long> removedBlockIds) {
   for (long removedBlockId : removedBlockIds) {
     MasterBlockInfo masterBlockInfo = mBlocks.get(removedBlockId);
     if (masterBlockInfo == null) {
       LOG.warn(
           "Worker {} removed block {} but block does not exist.",
           workerInfo.getId(),
           removedBlockId);
       // Continue to remove the remaining blocks.
       continue;
     }
     LOG.info("Block {} is removed on worker {}.", removedBlockId, workerInfo.getId());
     workerInfo.removeBlock(masterBlockInfo.getBlockId());
     masterBlockInfo.removeWorker(workerInfo.getId());
     if (masterBlockInfo.getNumLocations() == 0) {
       mLostBlocks.add(removedBlockId);
     }
   }
 }
Пример #12
0
 /**
  * Removes blocks from workers.
  *
  * @param blockIds a list of block ids to remove from Tachyon space
  */
 public void removeBlocks(List<Long> blockIds) {
   synchronized (mBlocks) {
     synchronized (mWorkers) {
       for (long blockId : blockIds) {
         MasterBlockInfo masterBlockInfo = mBlocks.get(blockId);
         if (masterBlockInfo == null) {
           continue;
         }
         for (long workerId : new ArrayList<Long>(masterBlockInfo.getWorkers())) {
           masterBlockInfo.removeWorker(workerId);
           MasterWorkerInfo worker = mWorkers.getFirstByField(mIdIndex, workerId);
           if (worker != null) {
             worker.updateToRemovedBlock(true, blockId);
           }
         }
         mLostBlocks.remove(blockId);
       }
     }
   }
 }
Пример #13
0
    @Override
    public void heartbeat() {
      LOG.debug("System status checking.");
      TachyonConf conf = MasterContext.getConf();

      int masterWorkerTimeoutMs = conf.getInt(Constants.MASTER_WORKER_TIMEOUT_MS);
      synchronized (mBlocks) {
        synchronized (mWorkers) {
          Iterator<MasterWorkerInfo> iter = mWorkers.iterator();
          while (iter.hasNext()) {
            MasterWorkerInfo worker = iter.next();
            final long lastUpdate = CommonUtils.getCurrentMs() - worker.getLastUpdatedTimeMs();
            if (lastUpdate > masterWorkerTimeoutMs) {
              LOG.error("The worker {} got timed out!", worker);
              mLostWorkers.add(worker);
              iter.remove();
              processLostWorker(worker);
            }
          }
        }
      }
    }
Пример #14
0
 // Synchronized on mBlocks by the caller
 private void processLostWorker(MasterWorkerInfo latest) {
   final Set<Long> lostBlocks = latest.getBlocks();
   processWorkerRemovedBlocks(latest, lostBlocks);
 }
Пример #15
0
 @Override
 public Object getFieldValue(MasterWorkerInfo o) {
   return o.getWorkerAddress();
 }