Beispiel #1
0
 protected Rule specifyRule(Rule rule, List history, int childDepth) {
   Rule r;
   String topHistoryStr = historyToString(history.subList(1, history.size()));
   String bottomHistoryStr = historyToString(history.subList(0, childDepth));
   if (rule instanceof UnaryRule) {
     UnaryRule ur = new UnaryRule();
     UnaryRule urule = (UnaryRule) rule;
     ur.parent = stateNumberer.number(stateNumberer.object(urule.parent) + topHistoryStr);
     if (isSynthetic(urule.child)) {
       ur.child = stateNumberer.number(stateNumberer.object(urule.child) + topHistoryStr);
     } else if (isTag(urule.child)) {
       ur.child = urule.child;
     } else {
       ur.child = stateNumberer.number(stateNumberer.object(urule.child) + bottomHistoryStr);
     }
     r = ur;
   } else {
     BinaryRule br = new BinaryRule();
     BinaryRule brule = (BinaryRule) rule;
     br.parent = stateNumberer.number(stateNumberer.object(brule.parent) + topHistoryStr);
     if (isSynthetic(brule.leftChild)) {
       br.leftChild = stateNumberer.number(stateNumberer.object(brule.leftChild) + topHistoryStr);
     } else if (isTag(brule.leftChild)) {
       br.leftChild = brule.leftChild;
     } else {
       br.leftChild =
           stateNumberer.number(stateNumberer.object(brule.leftChild) + bottomHistoryStr);
     }
     if (isSynthetic(brule.rightChild)) {
       br.rightChild =
           stateNumberer.number(stateNumberer.object(brule.rightChild) + topHistoryStr);
     } else if (isTag(brule.rightChild)) {
       br.rightChild = brule.rightChild;
     } else {
       br.rightChild =
           stateNumberer.number(stateNumberer.object(brule.rightChild) + bottomHistoryStr);
     }
     r = br;
   }
   return r;
 }
Beispiel #2
0
  public Object formResult() {
    Set brs = new HashSet();
    Set urs = new HashSet();
    // scan each rule / history pair
    int ruleCount = 0;
    for (Iterator pairI = rulePairs.keySet().iterator(); pairI.hasNext(); ) {
      if (ruleCount % 100 == 0) {
        System.err.println("Rules multiplied: " + ruleCount);
      }
      ruleCount++;
      Pair rulePair = (Pair) pairI.next();
      Rule baseRule = (Rule) rulePair.first;
      String baseLabel = (String) ruleToLabel.get(baseRule);
      List history = (List) rulePair.second;
      double totalProb = 0;
      for (int depth = 1; depth <= HISTORY_DEPTH() && depth <= history.size(); depth++) {
        List subHistory = history.subList(0, depth);
        double c_label = labelPairs.getCount(new Pair(baseLabel, subHistory));
        double c_rule = rulePairs.getCount(new Pair(baseRule, subHistory));
        // System.out.println("Multiplying out "+baseRule+" with history "+subHistory);
        // System.out.println("Count of "+baseLabel+" with "+subHistory+" is "+c_label);
        // System.out.println("Count of "+baseRule+" with "+subHistory+" is "+c_rule );

        double prob = (1.0 / HISTORY_DEPTH()) * (c_rule) / (c_label);
        totalProb += prob;
        for (int childDepth = 0; childDepth <= Math.min(HISTORY_DEPTH() - 1, depth); childDepth++) {
          Rule rule = specifyRule(baseRule, subHistory, childDepth);
          rule.score = (float) Math.log(totalProb);
          // System.out.println("Created  "+rule+" with score "+rule.score);
          if (rule instanceof UnaryRule) {
            urs.add(rule);
          } else {
            brs.add(rule);
          }
        }
      }
    }
    System.out.println("Total states: " + stateNumberer.total());
    BinaryGrammar bg = new BinaryGrammar(stateNumberer.total());
    UnaryGrammar ug = new UnaryGrammar(stateNumberer.total());
    for (Iterator brI = brs.iterator(); brI.hasNext(); ) {
      BinaryRule br = (BinaryRule) brI.next();
      bg.addRule(br);
    }
    for (Iterator urI = urs.iterator(); urI.hasNext(); ) {
      UnaryRule ur = (UnaryRule) urI.next();
      ug.addRule(ur);
    }
    return new Pair(ug, bg);
  }
Beispiel #3
0
 protected void tallyInternalNode(Tree lt, List parents) {
   // form base rule
   String label = lt.label().value();
   Rule baseR = ltToRule(lt);
   ruleToLabel.put(baseR, label);
   // act on each history depth
   for (int depth = 0, maxDepth = Math.min(HISTORY_DEPTH(), parents.size());
       depth <= maxDepth;
       depth++) {
     List history = new ArrayList(parents.subList(0, depth));
     // tally each history level / rewrite pair
     rulePairs.incrementCount(new Pair(baseR, history), 1);
     labelPairs.incrementCount(new Pair(label, history), 1);
   }
 }