Пример #1
0
 // look up and apply coarts for w to each sign in result
 @SuppressWarnings("unchecked")
 private void applyCoarts(Word w, SignHash result) throws LexException {
   List<Sign> inputSigns = new ArrayList<Sign>(result.asSignSet());
   result.clear();
   List<Sign> outputSigns = new ArrayList<Sign>(inputSigns.size());
   // for each surface attr, lookup coarts and apply to input signs, storing results in output
   // signs
   for (Iterator<Pair<String, String>> it = w.getSurfaceAttrValPairs(); it.hasNext(); ) {
     Pair<String, String> p = it.next();
     String attr = (String) p.a;
     if (!_indexedCoartAttrs.contains(attr)) continue;
     String val = (String) p.b;
     Word coartWord = Word.createWord(attr, val);
     SignHash coartResult = getSignsFromWord(coartWord, null, null, null);
     for (Iterator<Sign> it2 = coartResult.iterator(); it2.hasNext(); ) {
       Sign coartSign = it2.next();
       // apply to each input
       for (int j = 0; j < inputSigns.size(); j++) {
         Sign sign = inputSigns.get(j);
         grammar.rules.applyCoart(sign, coartSign, outputSigns);
       }
     }
     // switch output to input for next iteration
     inputSigns.clear();
     inputSigns.addAll(outputSigns);
     outputSigns.clear();
   }
   // add results back
   result.addAll(inputSigns);
 }
Пример #2
0
  // get signs using an additional arg for a target rel
  private Collection<Sign> getSignsFromPredAndTargetRel(String pred, String targetRel) {

    Collection<Word> words = (Collection<Word>) _predToWords.get(pred);
    String specialTokenConst = null;

    // for robustness, when using supertagger, add words for pred sans sense index
    int dotIndex = -1;
    if (_supertagger != null
        && !Character.isDigit(pred.charAt(0))
        && // skip numbers
        (dotIndex = pred.lastIndexOf('.')) > 0
        && pred.length() > dotIndex + 1
        && pred.charAt(dotIndex + 1) != '_') // skip titles, eg Mr._Smith
    {
      String barePred = pred.substring(0, dotIndex);
      Collection<Word> barePredWords = (Collection<Word>) _predToWords.get(barePred);
      if (words == null) words = barePredWords;
      else if (barePredWords != null) {
        Set<Word> unionWords = new HashSet<Word>(words);
        unionWords.addAll(barePredWords);
        words = unionWords;
      }
    }

    if (words == null) {
      specialTokenConst = tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(pred));
      if (specialTokenConst == null) return null;
      // lookup words with pred = special token const
      Collection<Word> specialTokenWords = (Collection<Word>) _predToWords.get(specialTokenConst);
      // replace special token const with pred
      if (specialTokenWords == null) return null;
      words = new ArrayList<Word>(specialTokenWords.size());
      for (Iterator<Word> it = specialTokenWords.iterator(); it.hasNext(); ) {
        Word stw = it.next();
        Word w = Word.createSurfaceWord(stw, pred);
        words.add(w);
      }
    }

    List<Sign> retval = new ArrayList<Sign>();
    for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
      Word w = it.next();
      try {
        SignHash signs = getSignsFromWord(w, specialTokenConst, pred, targetRel);
        retval.addAll(signs.asSignSet());
      }
      // shouldn't happen
      catch (LexException exc) {
        System.err.println("Unexpected lex exception for word " + w + ": " + exc);
      }
    }
    return retval;
  }
Пример #3
0
 /**
  * For a string of 1 or more surface words, return all of the lexical entries for each word as a
  * list of sign hashes. Tokenization is performed using the configured tokenizer.
  *
  * @param w the words in string format
  * @return a list of sign hashes
  * @exception LexException thrown if word not found
  */
 public List<SignHash> getEntriesFromWords(String s) throws LexException {
   List<SignHash> entries = new ArrayList<SignHash>();
   List<Word> words = tokenizer.tokenize(s);
   for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
     Word w = it.next();
     SignHash signs = getSignsFromWord(w);
     if (signs.size() == 0) {
       throw new LexException("Word not in lexicon: \"" + w + "\"");
     }
     entries.add(signs);
   }
   return entries;
 }
Пример #4
0
 /**
  * For a given word, return all of its surface word's lexical entries. If the word is not listed
  * in the lexicon, the tokenizer is consulted to see if it is a special token (date, time, etc.);
  * otherwise an exception is thrown. If the word has coarticulations, all applicable
  * coarticulation entries are applied to the base word, in an arbitrary order.
  *
  * @param w the word
  * @return a sign hash
  * @exception LexException thrown if word not found
  */
 public SignHash getSignsFromWord(Word w) throws LexException {
   // reduce word to its core, removing coart attrs if any
   Word surfaceWord = Word.createSurfaceWord(w);
   Word coreWord =
       (surfaceWord.attrsIntersect(_coartAttrs))
           ? Word.createCoreSurfaceWord(surfaceWord, _coartAttrs)
           : surfaceWord;
   // lookup core word
   SignHash result = getSignsFromWord(coreWord, null, null, null);
   if (result.size() == 0) {
     throw new LexException(coreWord + " not found in lexicon");
   }
   // return signs if no coart attrs
   if (coreWord == surfaceWord) return result;
   // otherwise apply coarts for word
   applyCoarts(surfaceWord, result);
   return result;
 }
Пример #5
0
  // given EntriesItem
  private void getWithEntriesItem(
      Word w,
      MorphItem mi,
      String stem,
      String pred,
      String targetPred,
      String targetRel,
      EntriesItem item,
      MacroAdder macAdder,
      Map<String, Double> supertags,
      Set<String> supertagsFound,
      SignHash result) {
    // ensure apropos
    if (targetPred != null && !targetPred.equals(pred)) return;
    if (targetRel != null
        && !targetRel.equals(item.getIndexRel())
        && !targetRel.equals(item.getCoartRel())) return;
    if (!item.getActive().booleanValue()) return;
    if (mi.excluded(item)) return;

    try {
      // copy and add macros
      Category cat = item.getCat().copy();
      macAdder.addMacros(cat);

      // replace DEFAULT_VAL with pred, after first
      // unifying type of associated nom var(s) with sem class
      unifySemClass(cat, mi.getWord().getSemClass());
      REPLACEMENT = pred;
      cat.deepMap(defaultReplacer);

      // check supertag
      // TODO: think about earlier checks for efficiency, for grammars where macros and preds don't
      // matter
      // Double lexprob = null; // nb: skipping lex log probs, don't seem to be helpful
      if (supertags != null) {
        // skip if not found
        String stag = cat.getSupertag();
        if (!supertags.containsKey(stag)) return;
        // otherwise update found supertags
        supertagsFound.add(stag);
        // get lex prob
        // lexprob = supertags.get(stag);
      }

      // propagate types of nom vars
      propagateTypes(cat);

      // handle distrib attrs and inherits-from
      propagateDistributiveAttrs(cat);
      expandInheritsFrom(cat);

      // merge stem, pos, sem class from morph item, plus supertag from cat
      Word word = Word.createFullWord(w, mi.getWord(), cat.getSupertag());

      // set origin and lexprob
      Sign sign = new Sign(word, cat);
      sign.setOrigin();
      // if (lexprob != null) {
      //	sign.addData(new SupertaggerAdapter.LexLogProb((float) Math.log10(lexprob)));
      // }
      // return sign
      result.insert(sign);
    } catch (RuntimeException exc) {
      System.err.println(
          "Warning: ignoring entry: "
              + item.getName()
              + " of family: "
              + item.getFamilyName()
              + " for stem: "
              + stem
              + " b/c: "
              + exc.toString());
    }
  }