void applyWord(String word_str) { if (DO_TIMING) timing.log("other"); ms.step('^'); for (int i = 0, limit = word_str.length(); i < limit; i++) { switch (word_str.charAt(i)) { case '\\': i++; ms.step(Character.toLowerCase(word_str.charAt(i)), any_char); break; case '/': i = limit; break; case '<': for (int j = i + 1; j != limit; j++) { if (word_str.charAt(j) == '>') { int symbol = alphabet.cast(word_str.substring(i, j + 1)); if (symbol != 0) { ms.step(symbol, any_tag); } else { ms.step(any_tag); } i = j; break; } } break; default: ms.step(Character.toLowerCase(word_str.charAt(i)), any_char); break; } } ms.step('$'); if (DO_TIMING) timing.log("applyWord"); }
private void applyRule( Writer output, Method rule, ArrayList<String> words, ArrayList<String> blanks) throws Exception { System.err.println("applyRule(" + rule + ", " + words + ", " + blanks); if (DO_TIMING) timing.log("other1"); int limit = words.size(); // number of words Object[] args = new Object[1 + limit + limit - 1]; // number of arguments out:1, words:limit, blanks:limit-1 int argn = 0; args[argn++] = output; for (int i = 0; i != limit; i++) { if (i > 0) args[argn++] = blanks.get(i - 1); Pair<String, Integer> tr; if (useBilingual && preBilingual == false) { if (DO_TIMING) timing.log("applyRule 1"); tr = fstp.biltransWithQueue(words.get(i), false); if (DO_TIMING) timing.log("applyRule/fstp.biltransWithQueue "); } else if (preBilingual) { /* input = ^esperanto/english1/english2/english3$ <spectei> we can only have one translation in transfer <spectei> so we want ^esperanto/english1 BREAK */ String[] splits = words.get(i).split("/"); String sl = splits[0]; String tl = splits.length > 1 ? splits[1] : ""; // http://freedict.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?r1=35560&r2=35639 // words.set(i, sl); tr = new Pair<String, Integer>(tl, 0); } else { // If no bilingual dictionary is used (i.e. for apertium-transfer -n, for // apertium-interchunk and for apertium-postchunk), then the sl and tl values will be the // same. tr = new Pair<String, Integer>(words.get(i), 0); } // args[argn++] = words.get(i)+";"+tr.first+";"+tr.second; args[argn++] = new TransferWord(words.get(i), tr.first, tr.second); } // here was in C++: processRule(lastrule) to interpret XML, but we use Java bytecode via Java // Method Invocation if (DEBUG) System.err.println("#args = " + args.length); if (DEBUG) System.err.println("processRule:" + rule.getName() + "(" + Arrays.toString(args)); try { if (DO_TIMING) timing.log("applyRule 1"); rule.invoke(transferObject, args); // rule.invoke(L, args); if (DO_TIMING) timing.log("rule invoke"); } catch (Exception e) { System.err.println("Error during invokation of " + rule); System.err.println("#args = " + args.length); System.err.println("processRule:" + rule.getName() + "(" + Arrays.toString(args)); throw e; } if (DEBUG) output.flush(); if (DO_TIMING) timing.log("applyRule 1"); }
public void transfer(Reader in, Writer output) throws Exception { if (getNullFlush()) { transfer_wrapper_null_flush(in, output); } Method lastMatchedRule = null; ArrayList<String> tmpword = new ArrayList<String>(); ArrayList<String> tmpblank = new ArrayList<String>(); ArrayList<String> matchedWords = new ArrayList<String>(); ArrayList<String> matchedBlanks = new ArrayList<String>(); int lastPos = 0; ms.init(me.getInitial()); if (DO_TIMING) timing = new Timing("Transfer"); while (true) { if (ms.size() == 0) { if (lastMatchedRule != null) { // there was a rule match applyRule(output, lastMatchedRule, matchedWords, matchedBlanks); lastMatchedRule = null; tmpword.clear(); tmpblank.clear(); ms.init(me.getInitial()); input_buffer.setPos(lastPos); } else { if (tmpword.size() != 0) { // no rule match. then default is to just output the stuff word by word Pair<String, Integer> tr; if (useBilingual && preBilingual == false) { if (isExtended && (tmpword.get(0)).charAt(0) == '*') { tr = extended.biltransWithQueue((tmpword.get(0)).substring(1), false); if (tr.first.charAt(0) == '@') { tr.first = '*' + tr.first.substring(1); } else { tr.first = "%" + tr.first; } } else { if (DO_TIMING) timing.log("transfer"); tr = fstp.biltransWithQueue(tmpword.get(0), false); if (DO_TIMING) timing.log("transfer/fstp.biltransWithQueue "); } } else if (preBilingual) { /* input = ^esperanto/english1/english2/english3$ <spectei> we can only have one translation in transfer <spectei> so we want ^esperanto/english1 BREAK */ String[] splits = tmpword.get(0).split("/"); String sl = splits[0]; String tl = splits.length > 1 ? splits[1] : ""; // http://freedict.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?r1=35560&r2=35639 // tmpword.set(0, sl); tr = new Pair<String, Integer>(tl, 0); } else { tr = new Pair<String, Integer>(tmpword.get(0), 0); } if (tr.first.length() != 0) { if (!transferObject.isOutputChunked()) { fputwc_unlocked('^', output); fputws_unlocked(tr.first, output); fputwc_unlocked('$', output); } else { if (tr.first.charAt(0) == '*') { fputws_unlocked("^unknown<unknown>{^", output); } else { fputws_unlocked("^default<default>{^", output); } fputws_unlocked(tr.first, output); fputws_unlocked("$}$", output); } } tmpword.clear(); input_buffer.setPos(lastPos); input_buffer.next(); lastPos = input_buffer.getPos(); ms.init(me.getInitial()); } else if (tmpblank.size() != 0) { fputws_unlocked(tmpblank.get(0), output); tmpblank.clear(); lastPos = input_buffer.getPos(); ms.init(me.getInitial()); } } } System.out.print("arink2"); if (DO_TIMING) timing.log("transfer"); int val = ms.classifyFinals(); if (DO_TIMING) timing.log("transfer/ms.classifyFinals"); if (val != -1) { // a rule match was found. This might not be the longest match, though. // so, we store the stuff to invoke applyRule() later lastMatchedRule = rule_map[(val - 1)]; System.out.print("arink13" + rule_map[(val - 1)] + "=>" + val); lastPos = input_buffer.getPos(); if (DEBUG) System.err.println("lastrule = " + (val - 1) + " " + lastMatchedRule.getName()); if (DEBUG) System.err.println("tmpword = " + tmpword.size() + " tmpblank = " + tmpblank.size()); if (DEBUG) System.err.println("tmpword = " + tmpword + " tmpblank = " + tmpblank); matchedWords.clear(); matchedBlanks.clear(); matchedWords.addAll(tmpword); matchedBlanks.addAll(tmpblank); } if (DO_TIMING) timing.log("transfer"); TransferToken current = readToken(in); if (DO_TIMING) timing.log("readToken"); switch (current.type) { case tt_word: applyWord(current.content); tmpword.add(current.content); break; case tt_blank: ms.step(' '); tmpblank.add(current.content); break; case tt_eof: if (tmpword.size() != 0) { tmpblank.add(current.content); ms.clear(); } else { fputws_unlocked(current.content, output); if (DO_TIMING) { timing.log("transfer"); timing.report(); } return; } break; default: System.err.println("Error: Unknown input token."); return; } System.out.print("arink3"); } }