private static LongSet loadXOrY( String generationPrefix, ALSModelDescription modelDescription, boolean isX, Generation generation, Collection<Future<Object>> futures, ExecutorService executor) throws IOException { String xOrYPrefix = generationPrefix + (isX ? modelDescription.getXPath() : modelDescription.getYPath()); final LongSet loadedIDs = new LongSet(); final Lock writeLock = isX ? generation.getXLock().writeLock() : generation.getYLock().writeLock(); final LongObjectMap<float[]> xOrYMatrix = isX ? generation.getX() : generation.getY(); for (final String xOrYFilePrefix : Store.get().list(xOrYPrefix, true)) { futures.add( executor.submit( new Callable<Object>() { @Override public Void call() throws IOException { for (String line : new FileLineIterable(Store.get().readFrom(xOrYFilePrefix))) { int tab = line.indexOf('\t'); Preconditions.checkArgument( tab >= 0, "Bad input line in %s: %s", xOrYFilePrefix, line); long id = Long.parseLong(line.substring(0, tab)); float[] elements = DataUtils.readFeatureVector(line.substring(tab + 1)); writeLock.lock(); try { xOrYMatrix.put(id, elements); loadedIDs.add(id); } finally { writeLock.unlock(); } } log.info("Loaded feature vectors from {}", xOrYFilePrefix); return null; } })); } return loadedIDs; }
private static void loadIDMapping( String generationPrefix, ALSModelDescription modelDescription, final Generation generation, Collection<Future<Object>> futures, ExecutorService executor) throws IOException { String idMappingPrefix = generationPrefix + modelDescription.getIDMappingPath(); for (final String prefix : Store.get().list(idMappingPrefix, true)) { futures.add( executor.submit( new Callable<Object>() { @Override public Void call() throws IOException { for (CharSequence line : new FileLineIterable(Store.get().readFrom(prefix))) { String[] columns = DelimitedDataUtils.decode(line, ','); long numericID = Long.parseLong(columns[0]); String id = columns[1]; Lock writeLock = generation.getKnownItemLock().writeLock(); StringLongMapping idMapping = generation.getIDMapping(); writeLock.lock(); try { idMapping.addMapping(id, numericID); } finally { writeLock.unlock(); } } return null; } })); } }
private static LongSet loadKnownItemIDs( String generationPrefix, ALSModelDescription modelDescription, final Generation generation, Collection<Future<Object>> futures, ExecutorService executor) throws IOException { final LongSet loadedIDs = new LongSet(); String knownItemsPrefix = generationPrefix + modelDescription.getKnownItemsPath(); for (final String knownItemFilePrefix : Store.get().list(knownItemsPrefix, true)) { futures.add( executor.submit( new Callable<Object>() { @Override public Void call() throws IOException { for (String line : new FileLineIterable(Store.get().readFrom(knownItemFilePrefix))) { int tab = line.indexOf('\t'); Preconditions.checkArgument( tab >= 0, "Bad input line in %s: %s", knownItemFilePrefix, line); long userID = Long.parseLong(line.substring(0, tab)); LongSet itemIDs = stringToSet(line.substring(tab + 1)); Lock writeLock = generation.getKnownItemLock().writeLock(); LongObjectMap<LongSet> knownItems = generation.getKnownItemIDs(); writeLock.lock(); try { knownItems.put(userID, itemIDs); loadedIDs.add(userID); } finally { writeLock.unlock(); } } log.info("Loaded known items from {}", knownItemFilePrefix); return null; } })); } return loadedIDs; }
void loadModel(int generationID, Generation currentGeneration) throws IOException { File modelPMMLFile = File.createTempFile("oryx-model", ".pmml.gz"); modelPMMLFile.deleteOnExit(); IOUtils.delete(modelPMMLFile); String generationPrefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID); String modelPMMLKey = generationPrefix + "model.pmml.gz"; Store.get().download(modelPMMLKey, modelPMMLFile); log.info("Loading model description from {}", modelPMMLKey); ALSModelDescription modelDescription = ALSModelDescription.read(modelPMMLFile); IOUtils.delete(modelPMMLFile); Collection<Future<Object>> futures = new ArrayList<>(); // Limit this fairly sharply to 2 so as to not saturate the network link ExecutorService executor = ExecutorUtils.buildExecutor("LoadModel", 2); LongSet loadedUserIDs; LongSet loadedItemIDs; LongSet loadedUserIDsForKnownItems; try { loadedUserIDs = loadXOrY(generationPrefix, modelDescription, true, currentGeneration, futures, executor); loadedItemIDs = loadXOrY(generationPrefix, modelDescription, false, currentGeneration, futures, executor); if (currentGeneration.getKnownItemIDs() == null) { loadedUserIDsForKnownItems = null; } else { loadedUserIDsForKnownItems = loadKnownItemIDs( generationPrefix, modelDescription, currentGeneration, futures, executor); } loadIDMapping(generationPrefix, modelDescription, currentGeneration, futures, executor); ExecutorUtils.getResults(futures); log.info("Finished all load tasks"); } finally { ExecutorUtils.shutdownNowAndAwait(executor); } log.info("Pruning old entries..."); synchronized (lockForRecent) { removeNotUpdated( currentGeneration.getX().keySetIterator(), loadedUserIDs, recentlyActiveUsers, currentGeneration.getXLock().writeLock()); removeNotUpdated( currentGeneration.getY().keySetIterator(), loadedItemIDs, recentlyActiveItems, currentGeneration.getYLock().writeLock()); if (loadedUserIDsForKnownItems != null && currentGeneration.getKnownItemIDs() != null) { removeNotUpdated( currentGeneration.getKnownItemIDs().keySetIterator(), loadedUserIDsForKnownItems, recentlyActiveUsers, currentGeneration.getKnownItemLock().writeLock()); } this.recentlyActiveItems.clear(); this.recentlyActiveUsers.clear(); } log.info("Recomputing generation state..."); currentGeneration.recomputeState(); log.info( "All model elements loaded, {} users and {} items", currentGeneration.getNumUsers(), currentGeneration.getNumItems()); }