예제 #1
0
  private void rereadNameSpaceEntry(final PinTask task) throws CacheException {
    /* Ensure that task is still valid and stays valid for the
     * duration of the name space lookup.
     */
    refreshTimeout(task, getExpirationTimeForNameSpaceLookup());

    /* We allow the set of provided attributes to be incomplete
     * and thus add attributes required by pool manager.
     */
    Set<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
    attributes.addAll(task.getFileAttributes().getDefinedAttributes());
    attributes.addAll(PoolMgrSelectReadPoolMsg.getRequiredAttributes());

    _pnfsStub.send(
        new PnfsGetFileAttributes(task.getPnfsId(), attributes),
        PnfsGetFileAttributes.class,
        new AbstractMessageCallback<PnfsGetFileAttributes>() {
          @Override
          public void success(PnfsGetFileAttributes msg) {
            try {
              task.setFileAttributes(msg.getFileAttributes());

              /* Ensure that task is still valid
               * and stays valid for the duration
               * of the pool selection.
               */
              refreshTimeout(task, getExpirationTimeForPoolSelection());
              selectReadPool(task);
            } catch (CacheException e) {
              fail(task, e.getRc(), e.getMessage());
            } catch (RuntimeException e) {
              fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
            }
          }

          @Override
          public void failure(int rc, Object error) {
            fail(task, rc, error.toString());
          }

          @Override
          public void noroute(CellPath path) {
            /* PnfsManager is unreachable. We
             * expect this to be a transient
             * problem and retry in a moment.
             */
            retry(task, RETRY_DELAY);
          }

          @Override
          public void timeout(CellPath path) {
            /* PnfsManager did not respond. We
             * expect this to be a transient
             * problem and retry in a moment.
             */
            retry(task, SMALL_DELAY);
          }
        });
  }
예제 #2
0
/**
 * Processes pin requests.
 *
 * <p>A pin request goes through several steps to pin a file on a pool:
 *
 * <p>- Create DB entry in state PINNING - Optionally read the name space entry - Select a read pool
 * (which may involve staging) - Update DB entry with the pool name - Create sticky flag on pool -
 * Update DB entry to state PINNED
 *
 * <p>If during any step the entry is no longer in PINNING then the operation is aborted.
 *
 * <p>If a DB error occurs it is considered fatal and the pinning operation is not completed. The DB
 * entry will stay in PINNING until either explicitly unpinned or it expires.
 *
 * <p>Database operations are blocking. Communication with PoolManager and pools is asynchronous.
 */
public class PinRequestProcessor implements CellMessageReceiver {
  private static final Logger _log = LoggerFactory.getLogger(PinRequestProcessor.class);

  /** The delay we use after a pin request failed and before retrying the request. */
  private static final long RETRY_DELAY = SECONDS.toMillis(30);

  /**
   * The delay we use after transient failures that should be retried immediately. The small delay
   * prevents tight retry loops.
   */
  private static final long SMALL_DELAY = MILLISECONDS.toMillis(10);

  /** Safety margin added to the lifetime of the sticky bit to account for clock drift. */
  private static final long CLOCK_DRIFT_MARGIN = MINUTES.toMillis(30);

  private static final Set<FileAttribute> REQUIRED_ATTRIBUTES =
      PoolMgrSelectReadPoolMsg.getRequiredAttributes();

  private ScheduledExecutorService _executor;
  private PinDao _dao;
  private CellStub _poolStub;
  private CellStub _pnfsStub;
  private CellStub _poolManagerStub;
  private CheckStagePermission _checkStagePermission;
  private long _maxLifetime;
  private TimeUnit _maxLifetimeUnit;

  private PoolMonitor _poolMonitor;

  @Required
  public void setExecutor(ScheduledExecutorService executor) {
    _executor = executor;
  }

  @Required
  public void setDao(PinDao dao) {
    _dao = dao;
  }

  @Required
  public void setPoolStub(CellStub stub) {
    _poolStub = stub;
  }

  @Required
  public void setPnfsStub(CellStub stub) {
    _pnfsStub = stub;
  }

  @Required
  public void setPoolManagerStub(CellStub stub) {
    _poolManagerStub = stub;
  }

  @Required
  public void setStagePermission(CheckStagePermission checker) {
    _checkStagePermission = checker;
  }

  @Required
  public void setMaxLifetime(long maxLifetime) {
    _maxLifetime = maxLifetime;
  }

  @Required
  public void setPoolMonitor(PoolMonitor poolMonitor) {
    _poolMonitor = poolMonitor;
  }

  public long getMaxLifetime() {
    return _maxLifetime;
  }

  @Required
  public void setMaxLifetimeUnit(TimeUnit unit) {
    _maxLifetimeUnit = unit;
  }

  public TimeUnit getMaxLifetimeUnit() {
    return _maxLifetimeUnit;
  }

  private void enforceLifetimeLimit(PinManagerPinMessage message) {
    if (_maxLifetime > -1) {
      long millis = _maxLifetimeUnit.toMillis(_maxLifetime);
      long requestedLifetime = message.getLifetime();
      if (requestedLifetime == -1) {
        message.setLifetime(millis);
      } else {
        message.setLifetime(Math.min(millis, requestedLifetime));
      }
    }
  }

  public MessageReply<PinManagerPinMessage> messageArrived(PinManagerPinMessage message)
      throws CacheException {
    MessageReply<PinManagerPinMessage> reply = new MessageReply<>();

    enforceLifetimeLimit(message);

    PinTask task = createTask(message, reply);
    if (task != null) {
      if (!task.getFileAttributes().isDefined(REQUIRED_ATTRIBUTES)) {
        rereadNameSpaceEntry(task);
      } else {
        selectReadPool(task);
      }
    }

    return reply;
  }

  protected EnumSet<RequestContainerV5.RequestState> checkStaging(PinTask task) {
    try {
      Subject subject = task.getSubject();
      StorageInfo info = task.getFileAttributes().getStorageInfo();
      return _checkStagePermission.canPerformStaging(subject, info)
          ? RequestContainerV5.allStates
          : RequestContainerV5.allStatesExceptStage;
    } catch (PatternSyntaxException | IOException ex) {
      _log.error("Failed to check stage permission: " + ex);
    }
    return RequestContainerV5.allStatesExceptStage;
  }

  private void retry(final PinTask task, long delay) {
    if (!task.isValidIn(delay)) {
      fail(task, CacheException.TIMEOUT, "Pin request TTL exceeded");
    } else {
      _executor.schedule(
          new Runnable() {
            @Override
            public void run() {
              try {
                rereadNameSpaceEntry(task);
              } catch (CacheException e) {
                fail(task, e.getRc(), e.getMessage());
              } catch (RuntimeException e) {
                fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
              }
            }
          },
          delay,
          MILLISECONDS);
    }
  }

  private void fail(PinTask task, int rc, String error) {
    try {
      task.fail(rc, error);
      clearPin(task);
    } catch (RuntimeException e) {
      _log.error(e.toString());
    }
  }

  private void rereadNameSpaceEntry(final PinTask task) throws CacheException {
    /* Ensure that task is still valid and stays valid for the
     * duration of the name space lookup.
     */
    refreshTimeout(task, getExpirationTimeForNameSpaceLookup());

    /* We allow the set of provided attributes to be incomplete
     * and thus add attributes required by pool manager.
     */
    Set<FileAttribute> attributes = EnumSet.noneOf(FileAttribute.class);
    attributes.addAll(task.getFileAttributes().getDefinedAttributes());
    attributes.addAll(PoolMgrSelectReadPoolMsg.getRequiredAttributes());

    _pnfsStub.send(
        new PnfsGetFileAttributes(task.getPnfsId(), attributes),
        PnfsGetFileAttributes.class,
        new AbstractMessageCallback<PnfsGetFileAttributes>() {
          @Override
          public void success(PnfsGetFileAttributes msg) {
            try {
              task.setFileAttributes(msg.getFileAttributes());

              /* Ensure that task is still valid
               * and stays valid for the duration
               * of the pool selection.
               */
              refreshTimeout(task, getExpirationTimeForPoolSelection());
              selectReadPool(task);
            } catch (CacheException e) {
              fail(task, e.getRc(), e.getMessage());
            } catch (RuntimeException e) {
              fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
            }
          }

          @Override
          public void failure(int rc, Object error) {
            fail(task, rc, error.toString());
          }

          @Override
          public void noroute(CellPath path) {
            /* PnfsManager is unreachable. We
             * expect this to be a transient
             * problem and retry in a moment.
             */
            retry(task, RETRY_DELAY);
          }

          @Override
          public void timeout(CellPath path) {
            /* PnfsManager did not respond. We
             * expect this to be a transient
             * problem and retry in a moment.
             */
            retry(task, SMALL_DELAY);
          }
        });
  }

  private void selectReadPool(final PinTask task) throws CacheException {
    try {
      PoolSelector poolSelector =
          _poolMonitor.getPoolSelector(task.getFileAttributes(), task.getProtocolInfo(), null);

      PoolInfo pool = poolSelector.selectPinPool();
      setPool(task, pool.getName());
      setStickyFlag(task, pool.getName(), pool.getAddress());
    } catch (FileNotOnlineCacheException e) {
      askPoolManager(task);
    }
  }

  private void askPoolManager(final PinTask task) {
    PoolMgrSelectReadPoolMsg msg =
        new PoolMgrSelectReadPoolMsg(
            task.getFileAttributes(),
            task.getProtocolInfo(),
            task.getReadPoolSelectionContext(),
            checkStaging(task));
    msg.setSubject(task.getSubject());
    msg.setSkipCostUpdate(true);
    _poolManagerStub.send(
        msg,
        PoolMgrSelectReadPoolMsg.class,
        new AbstractMessageCallback<PoolMgrSelectReadPoolMsg>() {
          @Override
          public void success(PoolMgrSelectReadPoolMsg msg) {
            try {
              /* Pool manager expects us
               * to keep some state
               * between retries.
               */
              task.setReadPoolSelectionContext(msg.getContext());

              /* Store the pool name in
               * the DB so we know what to
               * clean up if something
               * fails.
               */
              String poolName = msg.getPoolName();
              CellAddressCore poolAddress = msg.getPoolAddress();
              task.getFileAttributes().getLocations().add(poolName);
              setPool(task, poolName);

              setStickyFlag(task, poolName, poolAddress);
            } catch (CacheException e) {
              fail(task, e.getRc(), e.getMessage());
            } catch (RuntimeException e) {
              fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
            }
          }

          @Override
          public void failure(int rc, Object error) {
            /* Pool manager expects us to
             * keep some state between
             * retries.
             */
            task.setReadPoolSelectionContext(getReply().getContext());
            switch (rc) {
              case CacheException.OUT_OF_DATE:
                /* Pool manager asked for a
                 * refresh of the request.
                 * Retry right away.
                 */
                retry(task, 0);
                break;
              case CacheException.FILE_NOT_IN_REPOSITORY:
              case CacheException.PERMISSION_DENIED:
                fail(task, rc, error.toString());
                break;
              default:
                /* Ideally we would delegate the retry to the door,
                 * but for the time being the retry is dealed with
                 * by pin manager.
                 */
                retry(task, RETRY_DELAY);
                break;
            }
          }

          @Override
          public void noroute(CellPath path) {
            /* Pool manager is
             * unreachable. We expect this
             * to be transient and retry in
             * a moment.
             */
            retry(task, RETRY_DELAY);
          }

          @Override
          public void timeout(CellPath path) {
            /* Pool manager did not
             * respond. We expect this to be
             * transient and retry in a
             * moment.
             */
            retry(task, SMALL_DELAY);
          }
        });
  }

  private void setStickyFlag(
      final PinTask task, final String poolName, CellAddressCore poolAddress) {
    /* The pin lifetime should be from the moment the file is
     * actually pinned. Due to staging and pool to pool transfers
     * this may be much later than when the pin was requested.
     */
    Date pinExpiration = task.freezeExpirationTime();

    /* To allow for some drift in clocks we add a safety margin to
     * the lifetime of the sticky bit.
     */
    long poolExpiration =
        (pinExpiration == null) ? -1 : pinExpiration.getTime() + CLOCK_DRIFT_MARGIN;

    PoolSetStickyMessage msg =
        new PoolSetStickyMessage(
            poolName, task.getPnfsId(), true, task.getSticky(), poolExpiration);
    _poolStub.send(
        new CellPath(poolAddress),
        msg,
        PoolSetStickyMessage.class,
        new AbstractMessageCallback<PoolSetStickyMessage>() {
          @Override
          public void success(PoolSetStickyMessage msg) {
            try {
              setToPinned(task);
              task.success();
            } catch (CacheException e) {
              fail(task, e.getRc(), e.getMessage());
            } catch (RuntimeException e) {
              fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
            }
          }

          @Override
          public void failure(int rc, Object error) {
            switch (rc) {
              case CacheException.POOL_DISABLED:
                /* Pool manager had outdated
                 * information about the pool. Give
                 * it a chance to be updated and
                 * then retry.
                 */
                retry(task, RETRY_DELAY);
                break;
              case CacheException.FILE_NOT_IN_REPOSITORY:
                /* Pnfs manager had stale location
                 * information. The pool clears
                 * this information as a result of
                 * this error, so we retry in a
                 * moment.
                 */
                retry(task, SMALL_DELAY);
                break;
              default:
                fail(task, rc, error.toString());
                break;
            }
          }

          @Override
          public void noroute(CellPath path) {
            /* The pool must have gone down. Give
             * pool manager a moment to notice this
             * and then retry.
             */
            retry(task, RETRY_DELAY);
          }

          @Override
          public void timeout(CellPath path) {
            /* No response from pool. Typically this is
             * because the pool is overloaded.
             */
            fail(task, CacheException.TIMEOUT, "No reply from " + path);
          }
        });
  }

  private Date getExpirationTimeForNameSpaceLookup() {
    long now = System.currentTimeMillis();
    long timeout = _pnfsStub.getTimeoutInMillis();
    return new Date(now + 2 * (timeout + RETRY_DELAY));
  }

  private Date getExpirationTimeForPoolSelection() {
    long now = System.currentTimeMillis();
    long timeout = _poolManagerStub.getTimeoutInMillis();
    return new Date(now + 2 * (timeout + RETRY_DELAY));
  }

  private Date getExpirationTimeForSettingFlag() {
    long now = System.currentTimeMillis();
    long timeout = _poolStub.getTimeoutInMillis();
    return new Date(now + 2 * timeout);
  }

  @Transactional
  protected PinTask createTask(
      PinManagerPinMessage message, MessageReply<PinManagerPinMessage> reply) {
    PnfsId pnfsId = message.getFileAttributes().getPnfsId();

    if (message.getRequestId() != null) {
      Pin pin = _dao.getPin(pnfsId, message.getRequestId());
      if (pin != null) {
        /* In this case the request is a resubmission. If the
         * previous pin completed then use it. Otherwise abort the
         * previous pin and create a new one.
         */
        if (pin.getState() == PINNED) {
          message.setPin(pin);
          reply.reply(message);
          return null;
        }

        pin.setState(UNPINNING);
        pin.setRequestId(null);
        _dao.storePin(pin);
      }
    }

    Pin pin = new Pin(message.getSubject(), pnfsId);
    pin.setRequestId(message.getRequestId());
    pin.setSticky("PinManager-" + UUID.randomUUID().toString());
    pin.setExpirationTime(getExpirationTimeForPoolSelection());

    return new PinTask(message, reply, _dao.storePin(pin));
  }

  /**
   * Load the pin belonging to the PinTask.
   *
   * @throw CacheException if the pin no longer exists or is no longer in PINNING.
   */
  protected Pin loadPinBelongingTo(PinTask task) throws CacheException {
    Pin pin = _dao.getPin(task.getPinId(), task.getSticky(), PINNING);
    if (pin == null) {
      throw new CacheException("Operation was aborted");
    }
    return pin;
  }

  @Transactional(isolation = REPEATABLE_READ)
  protected void refreshTimeout(PinTask task, Date date) throws CacheException {
    Pin pin = loadPinBelongingTo(task);
    pin.setExpirationTime(date);
    task.setPin(_dao.storePin(pin));
  }

  @Transactional(isolation = REPEATABLE_READ)
  protected void setPool(PinTask task, String pool) throws CacheException {
    Pin pin = loadPinBelongingTo(task);
    pin.setExpirationTime(getExpirationTimeForSettingFlag());
    pin.setPool(pool);
    task.setPin(_dao.storePin(pin));
  }

  @Transactional(isolation = REPEATABLE_READ)
  protected void setToPinned(PinTask task) throws CacheException {
    Pin pin = loadPinBelongingTo(task);
    pin.setExpirationTime(task.getExpirationTime());
    pin.setState(PINNED);
    task.setPin(_dao.storePin(pin));
  }

  @Transactional
  protected void clearPin(PinTask task) {
    if (task.getPool() != null) {
      /* If the pin record expired or the pin was explicitly
       * unpinned, then the unpin processor may already have
       * submitted a request to the pool to clear the sticky
       * flag. Although out of order delivery of messages is
       * unlikely, if it would happen then we have a race
       * between the set sticky and clear sticky messages. To
       * cover this case we delete the old record and create a
       * fresh one in UNPINNING.
       */
      _dao.deletePin(task.getPin());
      Pin pin = new Pin(task.getSubject(), task.getPnfsId());
      pin.setState(UNPINNING);
      _dao.storePin(pin);
    } else {
      /* We didn't create a sticky flag yet, so there is no
       * reason to keep the record. It will expire by itself,
       * but we delete the record now to avoid that we get
       * tickets from admins wondering why they have records
       * staying in PINNING.
       */
      _dao.deletePin(task.getPin());
    }
  }
}
예제 #3
0
  private void askPoolManager(final PinTask task) {
    PoolMgrSelectReadPoolMsg msg =
        new PoolMgrSelectReadPoolMsg(
            task.getFileAttributes(),
            task.getProtocolInfo(),
            task.getReadPoolSelectionContext(),
            checkStaging(task));
    msg.setSubject(task.getSubject());
    msg.setSkipCostUpdate(true);
    _poolManagerStub.send(
        msg,
        PoolMgrSelectReadPoolMsg.class,
        new AbstractMessageCallback<PoolMgrSelectReadPoolMsg>() {
          @Override
          public void success(PoolMgrSelectReadPoolMsg msg) {
            try {
              /* Pool manager expects us
               * to keep some state
               * between retries.
               */
              task.setReadPoolSelectionContext(msg.getContext());

              /* Store the pool name in
               * the DB so we know what to
               * clean up if something
               * fails.
               */
              String poolName = msg.getPoolName();
              CellAddressCore poolAddress = msg.getPoolAddress();
              task.getFileAttributes().getLocations().add(poolName);
              setPool(task, poolName);

              setStickyFlag(task, poolName, poolAddress);
            } catch (CacheException e) {
              fail(task, e.getRc(), e.getMessage());
            } catch (RuntimeException e) {
              fail(task, CacheException.UNEXPECTED_SYSTEM_EXCEPTION, e.toString());
            }
          }

          @Override
          public void failure(int rc, Object error) {
            /* Pool manager expects us to
             * keep some state between
             * retries.
             */
            task.setReadPoolSelectionContext(getReply().getContext());
            switch (rc) {
              case CacheException.OUT_OF_DATE:
                /* Pool manager asked for a
                 * refresh of the request.
                 * Retry right away.
                 */
                retry(task, 0);
                break;
              case CacheException.FILE_NOT_IN_REPOSITORY:
              case CacheException.PERMISSION_DENIED:
                fail(task, rc, error.toString());
                break;
              default:
                /* Ideally we would delegate the retry to the door,
                 * but for the time being the retry is dealed with
                 * by pin manager.
                 */
                retry(task, RETRY_DELAY);
                break;
            }
          }

          @Override
          public void noroute(CellPath path) {
            /* Pool manager is
             * unreachable. We expect this
             * to be transient and retry in
             * a moment.
             */
            retry(task, RETRY_DELAY);
          }

          @Override
          public void timeout(CellPath path) {
            /* Pool manager did not
             * respond. We expect this to be
             * transient and retry in a
             * moment.
             */
            retry(task, SMALL_DELAY);
          }
        });
  }