/** * Creates the files containing all paths for all concepts up to a certain depth * * @param kb * @param concepts * @throws IOException */ public static void createIndexFiles(KnowledgeBase kb, Collection<String> concepts) throws IOException { Iterator<String> conceptIterator = kb.getConceptIterator(); String outputDir = KnowledgeConfiguration.getInstance().getKnowledgeBasePathIndexDir(kb) + TXT_FILE_DIR; if (!Files.isDirectoryEmpty(new File(outputDir))) { log.warn(outputDir + " IS NOT EMPTY, BAILING OUT ..."); return; } int maxSearchDepth = getMaxSearchDepth(); DirectoryFileManager dirManager = new DirectoryFileManager(outputDir, 1000); log.info("CREATING THE INDEXABLE PATH FILES FOR: " + kb.name() + " INTO " + outputDir); int idx = 0; List<String> goodConcepts = new ArrayList<String>(); while (conceptIterator.hasNext()) { String concept = conceptIterator.next(); if (concepts.isEmpty() || concepts.contains(concept)) goodConcepts.add(concept); idx++; if ((idx % 100000) == 0) log.info("ITERATED THROUGH " + idx + " CONCEPTS SO FAR ..."); } List<List<String>> splitConcepts = Collections.split(goodConcepts, goodConcepts.size() / NSPLITS); int nThreads = splitConcepts.size(); ExecutorService threadExecutor = Executors.newFixedThreadPool(nThreads); CountDownLatch doneSignal = new CountDownLatch(nThreads); for (int i = 0; i < nThreads; i++) { ConceptIndexer worker = new ConceptIndexer(splitConcepts.get(i), i, dirManager, kb, maxSearchDepth, doneSignal); threadExecutor.execute(worker); } Timer timer = new Timer(); try { doneSignal.await(); threadExecutor.shutdown(); } catch (InterruptedException ie) { ie.printStackTrace(); } timer.tick("TO CREATE INDEXABLE FILES"); }
public void getConcepts() throws IOException { int counter = 0; Timer timer = new Timer(); for (String concept : concepts) { Set<List<String>> paths = kb.getAllPathsFrom(concept, maxSearchDepth); FileWriter writerFrom = dirManager.getFileWriter(concept + ".txt"); // saves the paths starting from the concept for (List<String> path : paths) writerFrom.write(Strings.join(path) + "\n"); writerFrom.close(); if ((counter % 100) == 0) timer.tick( "\t[WORKER] " + id + " TO CREATE INDEXABLE FILES FOR " + counter + " CONCEPTS SO FAR ... "); counter++; } }