Beispiel #1
0
  // marginalise over all corpus files using threads
  public synchronized void count() {
    Helper.report("[ContextCounter] Counting over all corpus files...");

    File corpusFolder = new File(DepNeighbourhoodSpace.getProjectFolder(), Corpus.getFolderName());
    String[] corpusFilenames = corpusFolder.list();
    Arrays.sort(corpusFilenames);

    // run each dep marginaliser thread
    for (String corpusFilename : corpusFilenames) {
      DepContextCounterThread ccThread =
          new DepContextCounterThread(
              this, corpusFilename, new File(corpusFolder, corpusFilename), amountOfSentences);
      threads.add(ccThread);
      (new Thread(ccThread)).start();
    }

    // wait for all threads to finish
    try {
      while (!threads.isEmpty()) {
        wait();
      }
    } catch (InterruptedException e) {
    }

    Helper.report("[ContextCounter] ...Finished counting over all corpus files...");
  }
Beispiel #2
0
 public void saveToWriter(BufferedWriter out) throws IOException {
   out.write("<contextcounts corpus=\"" + DepNeighbourhoodSpace.getName() + "\">\n");
   for (String depRelation : depRelationWordCountMap.keySet()) {
     Helper.report(
         "[ContextCounts] Writing counts for dep relation \"" + depRelation + "\" to file");
     out.write("<deprelation name=\"" + depRelation + "\">\n");
     TreeSet<Entry<String, Long>> sortedWordCountMap = getSortedMap(depRelation);
     while (!sortedWordCountMap.isEmpty()) {
       Entry<String, Long> topWordCountPair = sortedWordCountMap.pollLast();
       out.write(topWordCountPair.getValue() + "\t" + topWordCountPair.getKey() + "\n");
     }
     out.write("</deprelation>\n");
   }
   out.write("</contextcounts>\n");
 }