Esempio n. 1
0
 // look up and apply coarts for w to each sign in result
 @SuppressWarnings("unchecked")
 private void applyCoarts(Word w, SignHash result) throws LexException {
   List<Sign> inputSigns = new ArrayList<Sign>(result.asSignSet());
   result.clear();
   List<Sign> outputSigns = new ArrayList<Sign>(inputSigns.size());
   // for each surface attr, lookup coarts and apply to input signs, storing results in output
   // signs
   for (Iterator<Pair<String, String>> it = w.getSurfaceAttrValPairs(); it.hasNext(); ) {
     Pair<String, String> p = it.next();
     String attr = (String) p.a;
     if (!_indexedCoartAttrs.contains(attr)) continue;
     String val = (String) p.b;
     Word coartWord = Word.createWord(attr, val);
     SignHash coartResult = getSignsFromWord(coartWord, null, null, null);
     for (Iterator<Sign> it2 = coartResult.iterator(); it2.hasNext(); ) {
       Sign coartSign = it2.next();
       // apply to each input
       for (int j = 0; j < inputSigns.size(); j++) {
         Sign sign = inputSigns.get(j);
         grammar.rules.applyCoart(sign, coartSign, outputSigns);
       }
     }
     // switch output to input for next iteration
     inputSigns.clear();
     inputSigns.addAll(outputSigns);
     outputSigns.clear();
   }
   // add results back
   result.addAll(inputSigns);
 }
Esempio n. 2
0
  // get signs using an additional arg for a target rel
  private Collection<Sign> getSignsFromPredAndTargetRel(String pred, String targetRel) {

    Collection<Word> words = (Collection<Word>) _predToWords.get(pred);
    String specialTokenConst = null;

    // for robustness, when using supertagger, add words for pred sans sense index
    int dotIndex = -1;
    if (_supertagger != null
        && !Character.isDigit(pred.charAt(0))
        && // skip numbers
        (dotIndex = pred.lastIndexOf('.')) > 0
        && pred.length() > dotIndex + 1
        && pred.charAt(dotIndex + 1) != '_') // skip titles, eg Mr._Smith
    {
      String barePred = pred.substring(0, dotIndex);
      Collection<Word> barePredWords = (Collection<Word>) _predToWords.get(barePred);
      if (words == null) words = barePredWords;
      else if (barePredWords != null) {
        Set<Word> unionWords = new HashSet<Word>(words);
        unionWords.addAll(barePredWords);
        words = unionWords;
      }
    }

    if (words == null) {
      specialTokenConst = tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(pred));
      if (specialTokenConst == null) return null;
      // lookup words with pred = special token const
      Collection<Word> specialTokenWords = (Collection<Word>) _predToWords.get(specialTokenConst);
      // replace special token const with pred
      if (specialTokenWords == null) return null;
      words = new ArrayList<Word>(specialTokenWords.size());
      for (Iterator<Word> it = specialTokenWords.iterator(); it.hasNext(); ) {
        Word stw = it.next();
        Word w = Word.createSurfaceWord(stw, pred);
        words.add(w);
      }
    }

    List<Sign> retval = new ArrayList<Sign>();
    for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
      Word w = it.next();
      try {
        SignHash signs = getSignsFromWord(w, specialTokenConst, pred, targetRel);
        retval.addAll(signs.asSignSet());
      }
      // shouldn't happen
      catch (LexException exc) {
        System.err.println("Unexpected lex exception for word " + w + ": " + exc);
      }
    }
    return retval;
  }