public static DepContextCounts importFromReader(BufferedReader in) throws IOException { Helper.report("[ContextCounts] Importing context word counts..."); DepContextCounts dmc = new DepContextCounts(); String line; while ((line = in.readLine()) != null) { if (line.startsWith("<contextcounts")) { Matcher matcher = contextCountsPattern.matcher(line); if (matcher.find()) { // ignore first entry: corpus name Corpus.setName(matcher.group(1)); } } else if (line.startsWith("<deprelation")) { Matcher matcher = depRelationPattern.matcher(line); if (matcher.find()) { // ignore first entry: corpus name String depRelationString = matcher.group(1); importDepRelationCounts(in, dmc, depRelationString); } } else if (line.equals("</contextcounts>")) { break; } } Helper.report("[ContextCounts] ...Finished importing context word counts."); return dmc; }
// marginalise over all corpus files using threads public synchronized void count() { Helper.report("[ContextCounter] Counting over all corpus files..."); File corpusFolder = new File(DepNeighbourhoodSpace.getProjectFolder(), Corpus.getFolderName()); String[] corpusFilenames = corpusFolder.list(); Arrays.sort(corpusFilenames); // run each dep marginaliser thread for (String corpusFilename : corpusFilenames) { DepContextCounterThread ccThread = new DepContextCounterThread( this, corpusFilename, new File(corpusFolder, corpusFilename), amountOfSentences); threads.add(ccThread); (new Thread(ccThread)).start(); } // wait for all threads to finish try { while (!threads.isEmpty()) { wait(); } } catch (InterruptedException e) { } Helper.report("[ContextCounter] ...Finished counting over all corpus files..."); }
public void saveToFile(File contextCountsFile) { try { Helper.report("[ContextCounts] Saving counts to \"" + contextCountsFile + "\"..."); BufferedWriter out = Helper.getFileWriter(contextCountsFile); saveToWriter(out); out.close(); Helper.report( "[ContextCounts] ...Finished saving counts to \"" + contextCountsFile + "\"..."); } catch (IOException e) { e.printStackTrace(); } }
public static DepContextCounts importFromFile(File file) { DepContextCounts dcc = null; try { BufferedReader in = Helper.getFileReader(file); dcc = importFromReader(in); in.close(); } catch (IOException e) { e.printStackTrace(); } return dcc; }
public void saveToWriter(BufferedWriter out) throws IOException { out.write("<contextcounts corpus=\"" + DepNeighbourhoodSpace.getName() + "\">\n"); for (String depRelation : depRelationWordCountMap.keySet()) { Helper.report( "[ContextCounts] Writing counts for dep relation \"" + depRelation + "\" to file"); out.write("<deprelation name=\"" + depRelation + "\">\n"); TreeSet<Entry<String, Long>> sortedWordCountMap = getSortedMap(depRelation); while (!sortedWordCountMap.isEmpty()) { Entry<String, Long> topWordCountPair = sortedWordCountMap.pollLast(); out.write(topWordCountPair.getValue() + "\t" + topWordCountPair.getKey() + "\n"); } out.write("</deprelation>\n"); } out.write("</contextcounts>\n"); }
public static void importDepRelationCounts( BufferedReader in, DepContextCounts dmc, String depRelationString) throws IOException { Helper.report( "[ContextCounts] Importing context word counts for dep relation \"" + depRelationString + "\"..."); String line; while ((line = in.readLine()) != null) { if (line.equals("</deprelation>")) { break; } else { String[] entries = line.split("\t"); long count = Long.parseLong(entries[0]); String contextWord = entries[1]; dmc.setCount(depRelationString, contextWord, count); } } }