コード例 #1
0
  void applyWord(String word_str) {
    if (DO_TIMING) timing.log("other");
    ms.step('^');
    for (int i = 0, limit = word_str.length(); i < limit; i++) {
      switch (word_str.charAt(i)) {
        case '\\':
          i++;
          ms.step(Character.toLowerCase(word_str.charAt(i)), any_char);
          break;

        case '/':
          i = limit;
          break;

        case '<':
          for (int j = i + 1; j != limit; j++) {
            if (word_str.charAt(j) == '>') {
              int symbol = alphabet.cast(word_str.substring(i, j + 1));
              if (symbol != 0) {
                ms.step(symbol, any_tag);
              } else {
                ms.step(any_tag);
              }
              i = j;
              break;
            }
          }
          break;

        default:
          ms.step(Character.toLowerCase(word_str.charAt(i)), any_char);
          break;
      }
    }
    ms.step('$');
    if (DO_TIMING) timing.log("applyWord");
  }
コード例 #2
0
  private void applyRule(
      Writer output, Method rule, ArrayList<String> words, ArrayList<String> blanks)
      throws Exception {
    System.err.println("applyRule(" + rule + ", " + words + ", " + blanks);
    if (DO_TIMING) timing.log("other1");

    int limit = words.size(); // number of words

    Object[] args =
        new Object[1 + limit + limit - 1]; // number of arguments out:1, words:limit, blanks:limit-1
    int argn = 0;
    args[argn++] = output;

    for (int i = 0; i != limit; i++) {
      if (i > 0) args[argn++] = blanks.get(i - 1);

      Pair<String, Integer> tr;
      if (useBilingual && preBilingual == false) {
        if (DO_TIMING) timing.log("applyRule 1");
        tr = fstp.biltransWithQueue(words.get(i), false);
        if (DO_TIMING) timing.log("applyRule/fstp.biltransWithQueue ");
      } else if (preBilingual) {
        /* 	input = ^esperanto/english1/english2/english3$
        		<spectei> we can only have one translation in transfer
        		<spectei> so we want ^esperanto/english1 BREAK
        */
        String[] splits = words.get(i).split("/");
        String sl = splits[0];
        String tl = splits.length > 1 ? splits[1] : "";
        // http://freedict.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?r1=35560&r2=35639
        // words.set(i, sl);
        tr = new Pair<String, Integer>(tl, 0);
      } else {
        // If no bilingual dictionary is used (i.e. for apertium-transfer -n, for
        // apertium-interchunk and for apertium-postchunk), then the sl and tl values will be the
        // same.
        tr = new Pair<String, Integer>(words.get(i), 0);
      }

      // args[argn++] = words.get(i)+";"+tr.first+";"+tr.second;
      args[argn++] = new TransferWord(words.get(i), tr.first, tr.second);
    }

    // here was in C++: processRule(lastrule) to interpret XML, but we use Java bytecode via Java
    // Method Invocation
    if (DEBUG) System.err.println("#args = " + args.length);
    if (DEBUG) System.err.println("processRule:" + rule.getName() + "(" + Arrays.toString(args));
    try {
      if (DO_TIMING) timing.log("applyRule 1");
      rule.invoke(transferObject, args);
      // rule.invoke(L, args);
      if (DO_TIMING) timing.log("rule invoke");
    } catch (Exception e) {
      System.err.println("Error during invokation of " + rule);
      System.err.println("#args = " + args.length);
      System.err.println("processRule:" + rule.getName() + "(" + Arrays.toString(args));
      throw e;
    }
    if (DEBUG) output.flush();

    if (DO_TIMING) timing.log("applyRule 1");
  }
コード例 #3
0
  public void transfer(Reader in, Writer output) throws Exception {
    if (getNullFlush()) {
      transfer_wrapper_null_flush(in, output);
    }

    Method lastMatchedRule = null;
    ArrayList<String> tmpword = new ArrayList<String>();
    ArrayList<String> tmpblank = new ArrayList<String>();
    ArrayList<String> matchedWords = new ArrayList<String>();
    ArrayList<String> matchedBlanks = new ArrayList<String>();

    int lastPos = 0;
    ms.init(me.getInitial());
    if (DO_TIMING) timing = new Timing("Transfer");
    while (true) {
      if (ms.size() == 0) {
        if (lastMatchedRule != null) {
          // there was a rule match
          applyRule(output, lastMatchedRule, matchedWords, matchedBlanks);
          lastMatchedRule = null;
          tmpword.clear();
          tmpblank.clear();
          ms.init(me.getInitial());
          input_buffer.setPos(lastPos);
        } else {
          if (tmpword.size() != 0) {
            // no rule match. then default is to just output the stuff word by word
            Pair<String, Integer> tr;
            if (useBilingual && preBilingual == false) {
              if (isExtended && (tmpword.get(0)).charAt(0) == '*') {
                tr = extended.biltransWithQueue((tmpword.get(0)).substring(1), false);
                if (tr.first.charAt(0) == '@') {
                  tr.first = '*' + tr.first.substring(1);
                } else {
                  tr.first = "%" + tr.first;
                }
              } else {
                if (DO_TIMING) timing.log("transfer");
                tr = fstp.biltransWithQueue(tmpword.get(0), false);
                if (DO_TIMING) timing.log("transfer/fstp.biltransWithQueue ");
              }
            } else if (preBilingual) {
              /* 	input = ^esperanto/english1/english2/english3$
              		<spectei> we can only have one translation in transfer
              		<spectei> so we want ^esperanto/english1 BREAK
              */
              String[] splits = tmpword.get(0).split("/");
              String sl = splits[0];
              String tl = splits.length > 1 ? splits[1] : "";
              // http://freedict.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?r1=35560&r2=35639
              // tmpword.set(0, sl);
              tr = new Pair<String, Integer>(tl, 0);
            } else {
              tr = new Pair<String, Integer>(tmpword.get(0), 0);
            }

            if (tr.first.length() != 0) {
              if (!transferObject.isOutputChunked()) {
                fputwc_unlocked('^', output);
                fputws_unlocked(tr.first, output);
                fputwc_unlocked('$', output);
              } else {
                if (tr.first.charAt(0) == '*') {
                  fputws_unlocked("^unknown<unknown>{^", output);
                } else {
                  fputws_unlocked("^default<default>{^", output);
                }
                fputws_unlocked(tr.first, output);
                fputws_unlocked("$}$", output);
              }
            }
            tmpword.clear();
            input_buffer.setPos(lastPos);
            input_buffer.next();
            lastPos = input_buffer.getPos();
            ms.init(me.getInitial());
          } else if (tmpblank.size() != 0) {
            fputws_unlocked(tmpblank.get(0), output);
            tmpblank.clear();
            lastPos = input_buffer.getPos();
            ms.init(me.getInitial());
          }
        }
      }

      System.out.print("arink2");
      if (DO_TIMING) timing.log("transfer");
      int val = ms.classifyFinals();
      if (DO_TIMING) timing.log("transfer/ms.classifyFinals");
      if (val != -1) {
        // a rule match was found. This might not be the longest match, though.
        // so, we store the stuff to invoke applyRule() later
        lastMatchedRule = rule_map[(val - 1)];
        System.out.print("arink13" + rule_map[(val - 1)] + "=>" + val);
        lastPos = input_buffer.getPos();

        if (DEBUG) System.err.println("lastrule = " + (val - 1) + " " + lastMatchedRule.getName());
        if (DEBUG)
          System.err.println("tmpword = " + tmpword.size() + "  tmpblank = " + tmpblank.size());
        if (DEBUG) System.err.println("tmpword = " + tmpword + "  tmpblank = " + tmpblank);
        matchedWords.clear();
        matchedBlanks.clear();
        matchedWords.addAll(tmpword);
        matchedBlanks.addAll(tmpblank);
      }

      if (DO_TIMING) timing.log("transfer");
      TransferToken current = readToken(in);
      if (DO_TIMING) timing.log("readToken");

      switch (current.type) {
        case tt_word:
          applyWord(current.content);
          tmpword.add(current.content);
          break;

        case tt_blank:
          ms.step(' ');
          tmpblank.add(current.content);
          break;

        case tt_eof:
          if (tmpword.size() != 0) {
            tmpblank.add(current.content);
            ms.clear();
          } else {
            fputws_unlocked(current.content, output);
            if (DO_TIMING) {
              timing.log("transfer");
              timing.report();
            }
            return;
          }
          break;

        default:
          System.err.println("Error: Unknown input token.");
          return;
      }
      System.out.print("arink3");
    }
  }