/** Writes out data from this Object to the Writer w. */ @Override public void writeData(PrintWriter out) throws IOException { // all lines have one rule per line for (IntDependency dependency : argCounter.keySet()) { if (dependency.head != wildTW && dependency.arg != wildTW && dependency.head.word != -1 && dependency.arg.word != -1) { double count = argCounter.getCount(dependency); out.println(dependency.toString(wordIndex, tagIndex) + " " + count); } } out.println("BEGIN_STOP"); for (IntDependency dependency : stopCounter.keySet()) { if (dependency.head.word != -1) { double count = stopCounter.getCount(dependency); out.println(dependency.toString(wordIndex, tagIndex) + " " + count); } } out.flush(); }
private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { stream.defaultReadObject(); // System.err.println("Before decompression:"); // System.err.println("arg size: " + argCounter.size() + " total: " + // argCounter.totalCount()); // System.err.println("stop size: " + stopCounter.size() + " total: " + // stopCounter.totalCount()); ClassicCounter<IntDependency> compressedArgC = argCounter; argCounter = new ClassicCounter<IntDependency>(); ClassicCounter<IntDependency> compressedStopC = stopCounter; stopCounter = new ClassicCounter<IntDependency>(); for (IntDependency d : compressedArgC.keySet()) { double count = compressedArgC.getCount(d); expandArg(d, d.distance, count); } for (IntDependency d : compressedStopC.keySet()) { double count = compressedStopC.getCount(d); expandStop(d, d.distance, count, false); } // System.err.println("After decompression:"); // System.err.println("arg size: " + argCounter.size() + " total: " + // argCounter.totalCount()); // System.err.println("stop size: " + stopCounter.size() + " total: " + // stopCounter.totalCount()); expandDependencyMap = null; }
private static <T> void display(ClassicCounter<T> c, PrintWriter pw) { List<T> cats = new ArrayList<>(c.keySet()); Collections.sort(cats, Counters.toComparatorDescending(c)); for (T ob : cats) { pw.println(ob + " " + c.getCount(ob)); } }
private static <L, F> BasicDatum<L, F> newDatum(L label, F[] features, Double[] counts) { ClassicCounter<F> counter = new ClassicCounter<F>(); for (int i = 0; i < features.length; i++) { counter.setCount(features[i], counts[i]); } return new BasicDatum<L, F>(counter.keySet(), label); }
private static <T> void display(ClassicCounter<T> c, int num, PrintWriter pw) { List<T> rules = new ArrayList<>(c.keySet()); Collections.sort(rules, Counters.toComparatorDescending(c)); int rSize = rules.size(); if (num > rSize) { num = rSize; } for (int i = 0; i < num; i++) { pw.println(rules.get(i) + " " + c.getCount(rules.get(i))); } }
/** @param <T> */ public static <T> List<FeatureValue<T>> combine(Collection<FeatureValue<T>> featureValues) { ClassicCounter<T> counter = new ClassicCounter<T>(); for (FeatureValue<T> fv : featureValues) { counter.incrementCount(fv.name, fv.value); } Set<T> keys = new TreeSet<T>(counter.keySet()); List<FeatureValue<T>> featureList = new ArrayList<FeatureValue<T>>(keys.size()); for (T key : keys) { featureList.add(new FeatureValue<T>(key, counter.getCount(key))); } return featureList; }
private void writeObject(ObjectOutputStream stream) throws IOException { // System.err.println("\nBefore compression:"); // System.err.println("arg size: " + argCounter.size() + " total: " + // argCounter.totalCount()); // System.err.println("stop size: " + stopCounter.size() + " total: " + // stopCounter.totalCount()); ClassicCounter<IntDependency> fullArgCounter = argCounter; argCounter = new ClassicCounter<IntDependency>(); for (IntDependency dependency : fullArgCounter.keySet()) { if (dependency.head != wildTW && dependency.arg != wildTW && dependency.head.word != -1 && dependency.arg.word != -1) { argCounter.incrementCount(dependency, fullArgCounter.getCount(dependency)); } } ClassicCounter<IntDependency> fullStopCounter = stopCounter; stopCounter = new ClassicCounter<IntDependency>(); for (IntDependency dependency : fullStopCounter.keySet()) { if (dependency.head.word != -1) { stopCounter.incrementCount(dependency, fullStopCounter.getCount(dependency)); } } // System.err.println("After compression:"); // System.err.println("arg size: " + argCounter.size() + " total: " + // argCounter.totalCount()); // System.err.println("stop size: " + stopCounter.size() + " total: " + // stopCounter.totalCount()); stream.defaultWriteObject(); argCounter = fullArgCounter; stopCounter = fullStopCounter; }
/** * Writes out data from this Object to the Writer w. Rules are separated by newline, and rule * elements are delimited by \t. */ public void writeData(Writer w) throws IOException { PrintWriter out = new PrintWriter(w); for (IntTaggedWord itw : seenCounter.keySet()) { out.println(itw.toLexicalEntry(wordIndex, tagIndex) + " SEEN " + seenCounter.getCount(itw)); } for (IntTaggedWord itw : getUnknownWordModel().unSeenCounter().keySet()) { out.println( itw.toLexicalEntry(wordIndex, tagIndex) + " UNSEEN " + getUnknownWordModel().unSeenCounter().getCount(itw)); } for (int i = 0; i < smooth.length; i++) { out.println("smooth[" + i + "] = " + smooth[i]); } out.flush(); }
@Override public DependencyGrammar formResult() { wordIndex.indexOf(Lexicon.UNKNOWN_WORD, true); MLEDependencyGrammar dg = new MLEDependencyGrammar( tlpParams, directional, useDistance, useCoarseDistance, basicCategoryTagsInDependencyGrammar, op, wordIndex, tagIndex); for (IntDependency dependency : dependencyCounter.keySet()) { dg.addRule(dependency, dependencyCounter.getCount(dependency)); } return dg; }
protected void initRulesWithWord() { if (testOptions.verbose || DEBUG_LEXICON) { System.err.print("\nInitializing lexicon scores ... "); } // int numWords = words.size()+sigs.size()+1; int unkWord = wordIndex.indexOf(UNKNOWN_WORD, true); int numWords = wordIndex.size(); rulesWithWord = new List[numWords]; for (int w = 0; w < numWords; w++) { rulesWithWord[w] = new ArrayList<IntTaggedWord>(1); // most have 1 or 2 // items in them } // for (Iterator ruleI = rules.iterator(); ruleI.hasNext();) { tags = new HashSet<IntTaggedWord>(); for (IntTaggedWord iTW : seenCounter.keySet()) { if (iTW.word() == nullWord && iTW.tag() != nullTag) { tags.add(iTW); } } // tags for unknown words if (DEBUG_LEXICON) { System.err.println( "Lexicon initializing tags for UNKNOWN WORD (" + Lexicon.UNKNOWN_WORD + ", " + unkWord + ')'); } if (DEBUG_LEXICON) System.err.println("unSeenCounter is: " + uwModel.unSeenCounter()); if (DEBUG_LEXICON) System.err.println( "Train.openClassTypesThreshold is " + trainOptions.openClassTypesThreshold); for (IntTaggedWord iT : tags) { if (DEBUG_LEXICON) System.err.println("Entry for " + iT + " is " + uwModel.unSeenCounter().getCount(iT)); double types = uwModel.unSeenCounter().getCount(iT); if (types > trainOptions.openClassTypesThreshold) { // Number of types before it's treated as open class IntTaggedWord iTW = new IntTaggedWord(unkWord, iT.tag); rulesWithWord[iTW.word].add(iTW); } } if (testOptions.verbose || DEBUG_LEXICON) { System.err.print("The " + rulesWithWord[unkWord].size() + " open class tags are: ["); for (IntTaggedWord item : rulesWithWord[unkWord]) { System.err.print(" " + tagIndex.get(item.tag())); if (DEBUG_LEXICON) { IntTaggedWord iTprint = new IntTaggedWord(nullWord, item.tag); System.err.print( " (tag " + item.tag() + ", type count is " + uwModel.unSeenCounter().getCount(iTprint) + ')'); } } System.err.println(" ] "); } for (IntTaggedWord iTW : seenCounter.keySet()) { if (iTW.tag() != nullTag && iTW.word() != nullWord) { rulesWithWord[iTW.word].add(iTW); } } }