// marginalise over all corpus files using threads public synchronized void count() { Helper.report("[ContextCounter] Counting over all corpus files..."); File corpusFolder = new File(DepNeighbourhoodSpace.getProjectFolder(), Corpus.getFolderName()); String[] corpusFilenames = corpusFolder.list(); Arrays.sort(corpusFilenames); // run each dep marginaliser thread for (String corpusFilename : corpusFilenames) { DepContextCounterThread ccThread = new DepContextCounterThread( this, corpusFilename, new File(corpusFolder, corpusFilename), amountOfSentences); threads.add(ccThread); (new Thread(ccThread)).start(); } // wait for all threads to finish try { while (!threads.isEmpty()) { wait(); } } catch (InterruptedException e) { } Helper.report("[ContextCounter] ...Finished counting over all corpus files..."); }
public void saveToWriter(BufferedWriter out) throws IOException { out.write("<contextcounts corpus=\"" + DepNeighbourhoodSpace.getName() + "\">\n"); for (String depRelation : depRelationWordCountMap.keySet()) { Helper.report( "[ContextCounts] Writing counts for dep relation \"" + depRelation + "\" to file"); out.write("<deprelation name=\"" + depRelation + "\">\n"); TreeSet<Entry<String, Long>> sortedWordCountMap = getSortedMap(depRelation); while (!sortedWordCountMap.isEmpty()) { Entry<String, Long> topWordCountPair = sortedWordCountMap.pollLast(); out.write(topWordCountPair.getValue() + "\t" + topWordCountPair.getKey() + "\n"); } out.write("</deprelation>\n"); } out.write("</contextcounts>\n"); }