void applyWord(String word_str) { if (DO_TIMING) timing.log("other"); ms.step('^'); for (int i = 0, limit = word_str.length(); i < limit; i++) { switch (word_str.charAt(i)) { case '\\': i++; ms.step(Character.toLowerCase(word_str.charAt(i)), any_char); break; case '/': i = limit; break; case '<': for (int j = i + 1; j != limit; j++) { if (word_str.charAt(j) == '>') { int symbol = alphabet.cast(word_str.substring(i, j + 1)); if (symbol != 0) { ms.step(symbol, any_tag); } else { ms.step(any_tag); } i = j; break; } } break; default: ms.step(Character.toLowerCase(word_str.charAt(i)), any_char); break; } } ms.step('$'); if (DO_TIMING) timing.log("applyWord"); }
public void transfer(Reader in, Writer output) throws Exception { if (getNullFlush()) { transfer_wrapper_null_flush(in, output); } Method lastMatchedRule = null; ArrayList<String> tmpword = new ArrayList<String>(); ArrayList<String> tmpblank = new ArrayList<String>(); ArrayList<String> matchedWords = new ArrayList<String>(); ArrayList<String> matchedBlanks = new ArrayList<String>(); int lastPos = 0; ms.init(me.getInitial()); if (DO_TIMING) timing = new Timing("Transfer"); while (true) { if (ms.size() == 0) { if (lastMatchedRule != null) { // there was a rule match applyRule(output, lastMatchedRule, matchedWords, matchedBlanks); lastMatchedRule = null; tmpword.clear(); tmpblank.clear(); ms.init(me.getInitial()); input_buffer.setPos(lastPos); } else { if (tmpword.size() != 0) { // no rule match. then default is to just output the stuff word by word Pair<String, Integer> tr; if (useBilingual && preBilingual == false) { if (isExtended && (tmpword.get(0)).charAt(0) == '*') { tr = extended.biltransWithQueue((tmpword.get(0)).substring(1), false); if (tr.first.charAt(0) == '@') { tr.first = '*' + tr.first.substring(1); } else { tr.first = "%" + tr.first; } } else { if (DO_TIMING) timing.log("transfer"); tr = fstp.biltransWithQueue(tmpword.get(0), false); if (DO_TIMING) timing.log("transfer/fstp.biltransWithQueue "); } } else if (preBilingual) { /* input = ^esperanto/english1/english2/english3$ <spectei> we can only have one translation in transfer <spectei> so we want ^esperanto/english1 BREAK */ String[] splits = tmpword.get(0).split("/"); String sl = splits[0]; String tl = splits.length > 1 ? splits[1] : ""; // http://freedict.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?r1=35560&r2=35639 // tmpword.set(0, sl); tr = new Pair<String, Integer>(tl, 0); } else { tr = new Pair<String, Integer>(tmpword.get(0), 0); } if (tr.first.length() != 0) { if (!transferObject.isOutputChunked()) { fputwc_unlocked('^', output); fputws_unlocked(tr.first, output); fputwc_unlocked('$', output); } else { if (tr.first.charAt(0) == '*') { fputws_unlocked("^unknown<unknown>{^", output); } else { fputws_unlocked("^default<default>{^", output); } fputws_unlocked(tr.first, output); fputws_unlocked("$}$", output); } } tmpword.clear(); input_buffer.setPos(lastPos); input_buffer.next(); lastPos = input_buffer.getPos(); ms.init(me.getInitial()); } else if (tmpblank.size() != 0) { fputws_unlocked(tmpblank.get(0), output); tmpblank.clear(); lastPos = input_buffer.getPos(); ms.init(me.getInitial()); } } } System.out.print("arink2"); if (DO_TIMING) timing.log("transfer"); int val = ms.classifyFinals(); if (DO_TIMING) timing.log("transfer/ms.classifyFinals"); if (val != -1) { // a rule match was found. This might not be the longest match, though. // so, we store the stuff to invoke applyRule() later lastMatchedRule = rule_map[(val - 1)]; System.out.print("arink13" + rule_map[(val - 1)] + "=>" + val); lastPos = input_buffer.getPos(); if (DEBUG) System.err.println("lastrule = " + (val - 1) + " " + lastMatchedRule.getName()); if (DEBUG) System.err.println("tmpword = " + tmpword.size() + " tmpblank = " + tmpblank.size()); if (DEBUG) System.err.println("tmpword = " + tmpword + " tmpblank = " + tmpblank); matchedWords.clear(); matchedBlanks.clear(); matchedWords.addAll(tmpword); matchedBlanks.addAll(tmpblank); } if (DO_TIMING) timing.log("transfer"); TransferToken current = readToken(in); if (DO_TIMING) timing.log("readToken"); switch (current.type) { case tt_word: applyWord(current.content); tmpword.add(current.content); break; case tt_blank: ms.step(' '); tmpblank.add(current.content); break; case tt_eof: if (tmpword.size() != 0) { tmpblank.add(current.content); ms.clear(); } else { fputws_unlocked(current.content, output); if (DO_TIMING) { timing.log("transfer"); timing.report(); } return; } break; default: System.err.println("Error: Unknown input token."); return; } System.out.print("arink3"); } }