Exemplo n.º 1
0
  /** Writes out data from this Object to the Writer w. */
  @Override
  public void writeData(PrintWriter out) throws IOException {
    // all lines have one rule per line

    for (IntDependency dependency : argCounter.keySet()) {
      if (dependency.head != wildTW
          && dependency.arg != wildTW
          && dependency.head.word != -1
          && dependency.arg.word != -1) {
        double count = argCounter.getCount(dependency);
        out.println(dependency.toString(wordIndex, tagIndex) + " " + count);
      }
    }

    out.println("BEGIN_STOP");

    for (IntDependency dependency : stopCounter.keySet()) {
      if (dependency.head.word != -1) {
        double count = stopCounter.getCount(dependency);
        out.println(dependency.toString(wordIndex, tagIndex) + " " + count);
      }
    }

    out.flush();
  }
Exemplo n.º 2
0
  private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
    stream.defaultReadObject();
    //    System.err.println("Before decompression:");
    //    System.err.println("arg size: " + argCounter.size() + "  total: " +
    // argCounter.totalCount());
    //    System.err.println("stop size: " + stopCounter.size() + "  total: " +
    // stopCounter.totalCount());

    ClassicCounter<IntDependency> compressedArgC = argCounter;
    argCounter = new ClassicCounter<IntDependency>();
    ClassicCounter<IntDependency> compressedStopC = stopCounter;
    stopCounter = new ClassicCounter<IntDependency>();
    for (IntDependency d : compressedArgC.keySet()) {
      double count = compressedArgC.getCount(d);
      expandArg(d, d.distance, count);
    }

    for (IntDependency d : compressedStopC.keySet()) {
      double count = compressedStopC.getCount(d);
      expandStop(d, d.distance, count, false);
    }

    //    System.err.println("After decompression:");
    //    System.err.println("arg size: " + argCounter.size() + "  total: " +
    // argCounter.totalCount());
    //    System.err.println("stop size: " + stopCounter.size() + "  total: " +
    // stopCounter.totalCount());

    expandDependencyMap = null;
  }
Exemplo n.º 3
0
 private static <T> void display(ClassicCounter<T> c, PrintWriter pw) {
   List<T> cats = new ArrayList<>(c.keySet());
   Collections.sort(cats, Counters.toComparatorDescending(c));
   for (T ob : cats) {
     pw.println(ob + " " + c.getCount(ob));
   }
 }
 private static <L, F> BasicDatum<L, F> newDatum(L label, F[] features, Double[] counts) {
   ClassicCounter<F> counter = new ClassicCounter<F>();
   for (int i = 0; i < features.length; i++) {
     counter.setCount(features[i], counts[i]);
   }
   return new BasicDatum<L, F>(counter.keySet(), label);
 }
Exemplo n.º 5
0
 private static <T> void display(ClassicCounter<T> c, int num, PrintWriter pw) {
   List<T> rules = new ArrayList<>(c.keySet());
   Collections.sort(rules, Counters.toComparatorDescending(c));
   int rSize = rules.size();
   if (num > rSize) {
     num = rSize;
   }
   for (int i = 0; i < num; i++) {
     pw.println(rules.get(i) + " " + c.getCount(rules.get(i)));
   }
 }
Exemplo n.º 6
0
 /** @param <T> */
 public static <T> List<FeatureValue<T>> combine(Collection<FeatureValue<T>> featureValues) {
   ClassicCounter<T> counter = new ClassicCounter<T>();
   for (FeatureValue<T> fv : featureValues) {
     counter.incrementCount(fv.name, fv.value);
   }
   Set<T> keys = new TreeSet<T>(counter.keySet());
   List<FeatureValue<T>> featureList = new ArrayList<FeatureValue<T>>(keys.size());
   for (T key : keys) {
     featureList.add(new FeatureValue<T>(key, counter.getCount(key)));
   }
   return featureList;
 }
Exemplo n.º 7
0
  private void writeObject(ObjectOutputStream stream) throws IOException {
    //    System.err.println("\nBefore compression:");
    //    System.err.println("arg size: " + argCounter.size() + "  total: " +
    // argCounter.totalCount());
    //    System.err.println("stop size: " + stopCounter.size() + "  total: " +
    // stopCounter.totalCount());

    ClassicCounter<IntDependency> fullArgCounter = argCounter;
    argCounter = new ClassicCounter<IntDependency>();
    for (IntDependency dependency : fullArgCounter.keySet()) {
      if (dependency.head != wildTW
          && dependency.arg != wildTW
          && dependency.head.word != -1
          && dependency.arg.word != -1) {
        argCounter.incrementCount(dependency, fullArgCounter.getCount(dependency));
      }
    }

    ClassicCounter<IntDependency> fullStopCounter = stopCounter;
    stopCounter = new ClassicCounter<IntDependency>();
    for (IntDependency dependency : fullStopCounter.keySet()) {
      if (dependency.head.word != -1) {
        stopCounter.incrementCount(dependency, fullStopCounter.getCount(dependency));
      }
    }

    //    System.err.println("After compression:");
    //    System.err.println("arg size: " + argCounter.size() + "  total: " +
    // argCounter.totalCount());
    //    System.err.println("stop size: " + stopCounter.size() + "  total: " +
    // stopCounter.totalCount());

    stream.defaultWriteObject();

    argCounter = fullArgCounter;
    stopCounter = fullStopCounter;
  }
Exemplo n.º 8
0
  /**
   * Writes out data from this Object to the Writer w. Rules are separated by newline, and rule
   * elements are delimited by \t.
   */
  public void writeData(Writer w) throws IOException {
    PrintWriter out = new PrintWriter(w);

    for (IntTaggedWord itw : seenCounter.keySet()) {
      out.println(itw.toLexicalEntry(wordIndex, tagIndex) + " SEEN " + seenCounter.getCount(itw));
    }
    for (IntTaggedWord itw : getUnknownWordModel().unSeenCounter().keySet()) {
      out.println(
          itw.toLexicalEntry(wordIndex, tagIndex)
              + " UNSEEN "
              + getUnknownWordModel().unSeenCounter().getCount(itw));
    }
    for (int i = 0; i < smooth.length; i++) {
      out.println("smooth[" + i + "] = " + smooth[i]);
    }
    out.flush();
  }
 @Override
 public DependencyGrammar formResult() {
   wordIndex.indexOf(Lexicon.UNKNOWN_WORD, true);
   MLEDependencyGrammar dg =
       new MLEDependencyGrammar(
           tlpParams,
           directional,
           useDistance,
           useCoarseDistance,
           basicCategoryTagsInDependencyGrammar,
           op,
           wordIndex,
           tagIndex);
   for (IntDependency dependency : dependencyCounter.keySet()) {
     dg.addRule(dependency, dependencyCounter.getCount(dependency));
   }
   return dg;
 }
Exemplo n.º 10
0
  protected void initRulesWithWord() {
    if (testOptions.verbose || DEBUG_LEXICON) {
      System.err.print("\nInitializing lexicon scores ... ");
    }
    // int numWords = words.size()+sigs.size()+1;
    int unkWord = wordIndex.indexOf(UNKNOWN_WORD, true);
    int numWords = wordIndex.size();
    rulesWithWord = new List[numWords];
    for (int w = 0; w < numWords; w++) {
      rulesWithWord[w] = new ArrayList<IntTaggedWord>(1); // most have 1 or 2
      // items in them
    }
    // for (Iterator ruleI = rules.iterator(); ruleI.hasNext();) {
    tags = new HashSet<IntTaggedWord>();
    for (IntTaggedWord iTW : seenCounter.keySet()) {
      if (iTW.word() == nullWord && iTW.tag() != nullTag) {
        tags.add(iTW);
      }
    }

    // tags for unknown words
    if (DEBUG_LEXICON) {
      System.err.println(
          "Lexicon initializing tags for UNKNOWN WORD ("
              + Lexicon.UNKNOWN_WORD
              + ", "
              + unkWord
              + ')');
    }
    if (DEBUG_LEXICON) System.err.println("unSeenCounter is: " + uwModel.unSeenCounter());
    if (DEBUG_LEXICON)
      System.err.println(
          "Train.openClassTypesThreshold is " + trainOptions.openClassTypesThreshold);
    for (IntTaggedWord iT : tags) {
      if (DEBUG_LEXICON)
        System.err.println("Entry for " + iT + " is " + uwModel.unSeenCounter().getCount(iT));
      double types = uwModel.unSeenCounter().getCount(iT);
      if (types > trainOptions.openClassTypesThreshold) {
        // Number of types before it's treated as open class
        IntTaggedWord iTW = new IntTaggedWord(unkWord, iT.tag);
        rulesWithWord[iTW.word].add(iTW);
      }
    }
    if (testOptions.verbose || DEBUG_LEXICON) {
      System.err.print("The " + rulesWithWord[unkWord].size() + " open class tags are: [");
      for (IntTaggedWord item : rulesWithWord[unkWord]) {
        System.err.print(" " + tagIndex.get(item.tag()));
        if (DEBUG_LEXICON) {
          IntTaggedWord iTprint = new IntTaggedWord(nullWord, item.tag);
          System.err.print(
              " (tag "
                  + item.tag()
                  + ", type count is "
                  + uwModel.unSeenCounter().getCount(iTprint)
                  + ')');
        }
      }
      System.err.println(" ] ");
    }

    for (IntTaggedWord iTW : seenCounter.keySet()) {
      if (iTW.tag() != nullTag && iTW.word() != nullWord) {
        rulesWithWord[iTW.word].add(iTW);
      }
    }
  }