コード例 #1
0
ファイル: Lexicon.java プロジェクト: kevinkissi/openccg
  // get signs with additional args for a known special token const, target pred and target rel
  private SignHash getSignsFromWord(
      Word w, String specialTokenConst, String targetPred, String targetRel) throws LexException {

    Collection<MorphItem> morphItems =
        (specialTokenConst == null) ? (Collection<MorphItem>) _words.get(w) : null;

    if (morphItems == null) {
      // check for special tokens
      if (specialTokenConst == null) {
        specialTokenConst =
            tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(w.getForm()));
        targetPred = w.getForm();
      }
      if (specialTokenConst != null) {
        Word key = Word.createSurfaceWord(w, specialTokenConst);
        morphItems = (Collection<MorphItem>) _words.get(key);
      }
      // otherwise throw lex exception
      if (morphItems == null) throw new LexException(w + " not in lexicon");
    }

    SignHash result = new SignHash();

    for (Iterator<MorphItem> MI = morphItems.iterator(); MI.hasNext(); ) {
      getWithMorphItem(w, MI.next(), targetPred, targetRel, result);
    }

    return result;
  }
コード例 #2
0
 private String getToolName(byte[] data) {
   String text = new String(data);
   String name = null;
   Tokenizer tok = new Tokenizer();
   Program pgm = tok.tokenize(text);
   int[] code = pgm.getCode();
   Symbol[] symbolTable = pgm.getSymbolTable();
   for (int i = 0; i < code.length; i++) {
     int token = code[i] & MacroConstants.TOK_MASK;
     if (token == MacroConstants.MACRO) {
       int nextToken = code[i + 1] & MacroConstants.TOK_MASK;
       if (nextToken == MacroConstants.STRING_CONSTANT) {
         int address = code[i + 1] >> MacroConstants.TOK_SHIFT;
         Symbol symbol = symbolTable[address];
         name = symbol.str;
         break;
       }
     }
   }
   if (name == null) return null;
   int index = name.indexOf("Tool");
   if (index == -1) return null;
   name = name.substring(0, index + 4);
   name = name.replaceAll(" ", "_");
   name = name + ".ijm";
   return name;
 }
コード例 #3
0
ファイル: Lexicon.java プロジェクト: kevinkissi/openccg
  // get signs using an additional arg for a target rel
  private Collection<Sign> getSignsFromPredAndTargetRel(String pred, String targetRel) {

    Collection<Word> words = (Collection<Word>) _predToWords.get(pred);
    String specialTokenConst = null;

    // for robustness, when using supertagger, add words for pred sans sense index
    int dotIndex = -1;
    if (_supertagger != null
        && !Character.isDigit(pred.charAt(0))
        && // skip numbers
        (dotIndex = pred.lastIndexOf('.')) > 0
        && pred.length() > dotIndex + 1
        && pred.charAt(dotIndex + 1) != '_') // skip titles, eg Mr._Smith
    {
      String barePred = pred.substring(0, dotIndex);
      Collection<Word> barePredWords = (Collection<Word>) _predToWords.get(barePred);
      if (words == null) words = barePredWords;
      else if (barePredWords != null) {
        Set<Word> unionWords = new HashSet<Word>(words);
        unionWords.addAll(barePredWords);
        words = unionWords;
      }
    }

    if (words == null) {
      specialTokenConst = tokenizer.getSpecialTokenConstant(tokenizer.isSpecialToken(pred));
      if (specialTokenConst == null) return null;
      // lookup words with pred = special token const
      Collection<Word> specialTokenWords = (Collection<Word>) _predToWords.get(specialTokenConst);
      // replace special token const with pred
      if (specialTokenWords == null) return null;
      words = new ArrayList<Word>(specialTokenWords.size());
      for (Iterator<Word> it = specialTokenWords.iterator(); it.hasNext(); ) {
        Word stw = it.next();
        Word w = Word.createSurfaceWord(stw, pred);
        words.add(w);
      }
    }

    List<Sign> retval = new ArrayList<Sign>();
    for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
      Word w = it.next();
      try {
        SignHash signs = getSignsFromWord(w, specialTokenConst, pred, targetRel);
        retval.addAll(signs.asSignSet());
      }
      // shouldn't happen
      catch (LexException exc) {
        System.err.println("Unexpected lex exception for word " + w + ": " + exc);
      }
    }
    return retval;
  }
コード例 #4
0
ファイル: Lexicon.java プロジェクト: kevinkissi/openccg
 /**
  * For a string of 1 or more surface words, return all of the lexical entries for each word as a
  * list of sign hashes. Tokenization is performed using the configured tokenizer.
  *
  * @param w the words in string format
  * @return a list of sign hashes
  * @exception LexException thrown if word not found
  */
 public List<SignHash> getEntriesFromWords(String s) throws LexException {
   List<SignHash> entries = new ArrayList<SignHash>();
   List<Word> words = tokenizer.tokenize(s);
   for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
     Word w = it.next();
     SignHash signs = getSignsFromWord(w);
     if (signs.size() == 0) {
       throw new LexException("Word not in lexicon: \"" + w + "\"");
     }
     entries.add(signs);
   }
   return entries;
 }
コード例 #5
0
ファイル: ARecord.java プロジェクト: z33ky/xabber-android
 void rdataFromString(Tokenizer st, Name origin) throws IOException {
   InetAddress address = st.getAddress(Address.IPv4);
   addr = fromArray(address.getAddress());
 }
コード例 #6
0
 void rdataFromString(Tokenizer st, Name origin) throws IOException {
   address = st.getAddressBytes(Address.IPv6);
 }