Пример #1
0
  private static LongSet loadXOrY(
      String generationPrefix,
      ALSModelDescription modelDescription,
      boolean isX,
      Generation generation,
      Collection<Future<Object>> futures,
      ExecutorService executor)
      throws IOException {

    String xOrYPrefix =
        generationPrefix + (isX ? modelDescription.getXPath() : modelDescription.getYPath());
    final LongSet loadedIDs = new LongSet();

    final Lock writeLock =
        isX ? generation.getXLock().writeLock() : generation.getYLock().writeLock();
    final LongObjectMap<float[]> xOrYMatrix = isX ? generation.getX() : generation.getY();

    for (final String xOrYFilePrefix : Store.get().list(xOrYPrefix, true)) {
      futures.add(
          executor.submit(
              new Callable<Object>() {
                @Override
                public Void call() throws IOException {
                  for (String line : new FileLineIterable(Store.get().readFrom(xOrYFilePrefix))) {

                    int tab = line.indexOf('\t');
                    Preconditions.checkArgument(
                        tab >= 0, "Bad input line in %s: %s", xOrYFilePrefix, line);
                    long id = Long.parseLong(line.substring(0, tab));

                    float[] elements = DataUtils.readFeatureVector(line.substring(tab + 1));

                    writeLock.lock();
                    try {
                      xOrYMatrix.put(id, elements);
                      loadedIDs.add(id);
                    } finally {
                      writeLock.unlock();
                    }
                  }
                  log.info("Loaded feature vectors from {}", xOrYFilePrefix);
                  return null;
                }
              }));
    }

    return loadedIDs;
  }
Пример #2
0
 private static void loadIDMapping(
     String generationPrefix,
     ALSModelDescription modelDescription,
     final Generation generation,
     Collection<Future<Object>> futures,
     ExecutorService executor)
     throws IOException {
   String idMappingPrefix = generationPrefix + modelDescription.getIDMappingPath();
   for (final String prefix : Store.get().list(idMappingPrefix, true)) {
     futures.add(
         executor.submit(
             new Callable<Object>() {
               @Override
               public Void call() throws IOException {
                 for (CharSequence line : new FileLineIterable(Store.get().readFrom(prefix))) {
                   String[] columns = DelimitedDataUtils.decode(line, ',');
                   long numericID = Long.parseLong(columns[0]);
                   String id = columns[1];
                   Lock writeLock = generation.getKnownItemLock().writeLock();
                   StringLongMapping idMapping = generation.getIDMapping();
                   writeLock.lock();
                   try {
                     idMapping.addMapping(id, numericID);
                   } finally {
                     writeLock.unlock();
                   }
                 }
                 return null;
               }
             }));
   }
 }
Пример #3
0
 private static LongSet loadKnownItemIDs(
     String generationPrefix,
     ALSModelDescription modelDescription,
     final Generation generation,
     Collection<Future<Object>> futures,
     ExecutorService executor)
     throws IOException {
   final LongSet loadedIDs = new LongSet();
   String knownItemsPrefix = generationPrefix + modelDescription.getKnownItemsPath();
   for (final String knownItemFilePrefix : Store.get().list(knownItemsPrefix, true)) {
     futures.add(
         executor.submit(
             new Callable<Object>() {
               @Override
               public Void call() throws IOException {
                 for (String line :
                     new FileLineIterable(Store.get().readFrom(knownItemFilePrefix))) {
                   int tab = line.indexOf('\t');
                   Preconditions.checkArgument(
                       tab >= 0, "Bad input line in %s: %s", knownItemFilePrefix, line);
                   long userID = Long.parseLong(line.substring(0, tab));
                   LongSet itemIDs = stringToSet(line.substring(tab + 1));
                   Lock writeLock = generation.getKnownItemLock().writeLock();
                   LongObjectMap<LongSet> knownItems = generation.getKnownItemIDs();
                   writeLock.lock();
                   try {
                     knownItems.put(userID, itemIDs);
                     loadedIDs.add(userID);
                   } finally {
                     writeLock.unlock();
                   }
                 }
                 log.info("Loaded known items from {}", knownItemFilePrefix);
                 return null;
               }
             }));
   }
   return loadedIDs;
 }
Пример #4
0
  void loadModel(int generationID, Generation currentGeneration) throws IOException {

    File modelPMMLFile = File.createTempFile("oryx-model", ".pmml.gz");
    modelPMMLFile.deleteOnExit();
    IOUtils.delete(modelPMMLFile);

    String generationPrefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID);
    String modelPMMLKey = generationPrefix + "model.pmml.gz";
    Store.get().download(modelPMMLKey, modelPMMLFile);
    log.info("Loading model description from {}", modelPMMLKey);

    ALSModelDescription modelDescription = ALSModelDescription.read(modelPMMLFile);
    IOUtils.delete(modelPMMLFile);

    Collection<Future<Object>> futures = new ArrayList<>();
    // Limit this fairly sharply to 2 so as to not saturate the network link
    ExecutorService executor = ExecutorUtils.buildExecutor("LoadModel", 2);

    LongSet loadedUserIDs;
    LongSet loadedItemIDs;
    LongSet loadedUserIDsForKnownItems;
    try {
      loadedUserIDs =
          loadXOrY(generationPrefix, modelDescription, true, currentGeneration, futures, executor);
      loadedItemIDs =
          loadXOrY(generationPrefix, modelDescription, false, currentGeneration, futures, executor);

      if (currentGeneration.getKnownItemIDs() == null) {
        loadedUserIDsForKnownItems = null;
      } else {
        loadedUserIDsForKnownItems =
            loadKnownItemIDs(
                generationPrefix, modelDescription, currentGeneration, futures, executor);
      }

      loadIDMapping(generationPrefix, modelDescription, currentGeneration, futures, executor);

      ExecutorUtils.getResults(futures);
      log.info("Finished all load tasks");

    } finally {
      ExecutorUtils.shutdownNowAndAwait(executor);
    }

    log.info("Pruning old entries...");
    synchronized (lockForRecent) {
      removeNotUpdated(
          currentGeneration.getX().keySetIterator(),
          loadedUserIDs,
          recentlyActiveUsers,
          currentGeneration.getXLock().writeLock());
      removeNotUpdated(
          currentGeneration.getY().keySetIterator(),
          loadedItemIDs,
          recentlyActiveItems,
          currentGeneration.getYLock().writeLock());
      if (loadedUserIDsForKnownItems != null && currentGeneration.getKnownItemIDs() != null) {
        removeNotUpdated(
            currentGeneration.getKnownItemIDs().keySetIterator(),
            loadedUserIDsForKnownItems,
            recentlyActiveUsers,
            currentGeneration.getKnownItemLock().writeLock());
      }
      this.recentlyActiveItems.clear();
      this.recentlyActiveUsers.clear();
    }

    log.info("Recomputing generation state...");
    currentGeneration.recomputeState();

    log.info(
        "All model elements loaded, {} users and {} items",
        currentGeneration.getNumUsers(),
        currentGeneration.getNumItems());
  }