Exemple #1
0
  public static DepContextCounts importFromReader(BufferedReader in) throws IOException {
    Helper.report("[ContextCounts] Importing context word counts...");
    DepContextCounts dmc = new DepContextCounts();

    String line;
    while ((line = in.readLine()) != null) {

      if (line.startsWith("<contextcounts")) {
        Matcher matcher = contextCountsPattern.matcher(line);
        if (matcher.find()) { // ignore first entry: corpus name
          Corpus.setName(matcher.group(1));
        }

      } else if (line.startsWith("<deprelation")) {
        Matcher matcher = depRelationPattern.matcher(line);
        if (matcher.find()) { // ignore first entry: corpus name
          String depRelationString = matcher.group(1);
          importDepRelationCounts(in, dmc, depRelationString);
        }

      } else if (line.equals("</contextcounts>")) {
        break;
      }
    }

    Helper.report("[ContextCounts] ...Finished importing context word counts.");
    return dmc;
  }
Exemple #2
0
  // marginalise over all corpus files using threads
  public synchronized void count() {
    Helper.report("[ContextCounter] Counting over all corpus files...");

    File corpusFolder = new File(DepNeighbourhoodSpace.getProjectFolder(), Corpus.getFolderName());
    String[] corpusFilenames = corpusFolder.list();
    Arrays.sort(corpusFilenames);

    // run each dep marginaliser thread
    for (String corpusFilename : corpusFilenames) {
      DepContextCounterThread ccThread =
          new DepContextCounterThread(
              this, corpusFilename, new File(corpusFolder, corpusFilename), amountOfSentences);
      threads.add(ccThread);
      (new Thread(ccThread)).start();
    }

    // wait for all threads to finish
    try {
      while (!threads.isEmpty()) {
        wait();
      }
    } catch (InterruptedException e) {
    }

    Helper.report("[ContextCounter] ...Finished counting over all corpus files...");
  }
Exemple #3
0
 public void saveToFile(File contextCountsFile) {
   try {
     Helper.report("[ContextCounts] Saving counts to \"" + contextCountsFile + "\"...");
     BufferedWriter out = Helper.getFileWriter(contextCountsFile);
     saveToWriter(out);
     out.close();
     Helper.report(
         "[ContextCounts] ...Finished saving counts to \"" + contextCountsFile + "\"...");
   } catch (IOException e) {
     e.printStackTrace();
   }
 }
Exemple #4
0
 public static DepContextCounts importFromFile(File file) {
   DepContextCounts dcc = null;
   try {
     BufferedReader in = Helper.getFileReader(file);
     dcc = importFromReader(in);
     in.close();
   } catch (IOException e) {
     e.printStackTrace();
   }
   return dcc;
 }
Exemple #5
0
 public void saveToWriter(BufferedWriter out) throws IOException {
   out.write("<contextcounts corpus=\"" + DepNeighbourhoodSpace.getName() + "\">\n");
   for (String depRelation : depRelationWordCountMap.keySet()) {
     Helper.report(
         "[ContextCounts] Writing counts for dep relation \"" + depRelation + "\" to file");
     out.write("<deprelation name=\"" + depRelation + "\">\n");
     TreeSet<Entry<String, Long>> sortedWordCountMap = getSortedMap(depRelation);
     while (!sortedWordCountMap.isEmpty()) {
       Entry<String, Long> topWordCountPair = sortedWordCountMap.pollLast();
       out.write(topWordCountPair.getValue() + "\t" + topWordCountPair.getKey() + "\n");
     }
     out.write("</deprelation>\n");
   }
   out.write("</contextcounts>\n");
 }
Exemple #6
0
 public static void importDepRelationCounts(
     BufferedReader in, DepContextCounts dmc, String depRelationString) throws IOException {
   Helper.report(
       "[ContextCounts] Importing context word counts for dep relation \""
           + depRelationString
           + "\"...");
   String line;
   while ((line = in.readLine()) != null) {
     if (line.equals("</deprelation>")) {
       break;
     } else {
       String[] entries = line.split("\t");
       long count = Long.parseLong(entries[0]);
       String contextWord = entries[1];
       dmc.setCount(depRelationString, contextWord, count);
     }
   }
 }