TransferToken readToken(Reader in) throws IOException { if (!input_buffer.isEmpty()) { return input_buffer.next(); } String content = ""; while (true) { int val = in.read(); if (val == -1 || (val == 0 && internal_null_flush)) { return input_buffer.add(new TransferToken(content, TransferToken.TransferTokenType.tt_eof)); } if (val == '\\') { content += '\\'; content += (char) in.read(); } else if (val == '[') { content += '['; while (true) { int val2 = in.read(); if (val2 == '\\') { content += '\\'; content += (char) in.read(); } else if (val2 == ']') { content += ']'; break; } else { content += (char) val2; } } } else if (val == '$') { return input_buffer.add( new TransferToken(content, TransferToken.TransferTokenType.tt_word)); } else if (val == '^') { return input_buffer.add( new TransferToken(content, TransferToken.TransferTokenType.tt_blank)); } else { content += (char) val; } } }
public void transfer(Reader in, Writer output) throws Exception { if (getNullFlush()) { transfer_wrapper_null_flush(in, output); } Method lastMatchedRule = null; ArrayList<String> tmpword = new ArrayList<String>(); ArrayList<String> tmpblank = new ArrayList<String>(); ArrayList<String> matchedWords = new ArrayList<String>(); ArrayList<String> matchedBlanks = new ArrayList<String>(); int lastPos = 0; ms.init(me.getInitial()); if (DO_TIMING) timing = new Timing("Transfer"); while (true) { if (ms.size() == 0) { if (lastMatchedRule != null) { // there was a rule match applyRule(output, lastMatchedRule, matchedWords, matchedBlanks); lastMatchedRule = null; tmpword.clear(); tmpblank.clear(); ms.init(me.getInitial()); input_buffer.setPos(lastPos); } else { if (tmpword.size() != 0) { // no rule match. then default is to just output the stuff word by word Pair<String, Integer> tr; if (useBilingual && preBilingual == false) { if (isExtended && (tmpword.get(0)).charAt(0) == '*') { tr = extended.biltransWithQueue((tmpword.get(0)).substring(1), false); if (tr.first.charAt(0) == '@') { tr.first = '*' + tr.first.substring(1); } else { tr.first = "%" + tr.first; } } else { if (DO_TIMING) timing.log("transfer"); tr = fstp.biltransWithQueue(tmpword.get(0), false); if (DO_TIMING) timing.log("transfer/fstp.biltransWithQueue "); } } else if (preBilingual) { /* input = ^esperanto/english1/english2/english3$ <spectei> we can only have one translation in transfer <spectei> so we want ^esperanto/english1 BREAK */ String[] splits = tmpword.get(0).split("/"); String sl = splits[0]; String tl = splits.length > 1 ? splits[1] : ""; // http://freedict.svn.sourceforge.net/viewvc/apertium/trunk/apertium/apertium/transfer.cc?r1=35560&r2=35639 // tmpword.set(0, sl); tr = new Pair<String, Integer>(tl, 0); } else { tr = new Pair<String, Integer>(tmpword.get(0), 0); } if (tr.first.length() != 0) { if (!transferObject.isOutputChunked()) { fputwc_unlocked('^', output); fputws_unlocked(tr.first, output); fputwc_unlocked('$', output); } else { if (tr.first.charAt(0) == '*') { fputws_unlocked("^unknown<unknown>{^", output); } else { fputws_unlocked("^default<default>{^", output); } fputws_unlocked(tr.first, output); fputws_unlocked("$}$", output); } } tmpword.clear(); input_buffer.setPos(lastPos); input_buffer.next(); lastPos = input_buffer.getPos(); ms.init(me.getInitial()); } else if (tmpblank.size() != 0) { fputws_unlocked(tmpblank.get(0), output); tmpblank.clear(); lastPos = input_buffer.getPos(); ms.init(me.getInitial()); } } } System.out.print("arink2"); if (DO_TIMING) timing.log("transfer"); int val = ms.classifyFinals(); if (DO_TIMING) timing.log("transfer/ms.classifyFinals"); if (val != -1) { // a rule match was found. This might not be the longest match, though. // so, we store the stuff to invoke applyRule() later lastMatchedRule = rule_map[(val - 1)]; System.out.print("arink13" + rule_map[(val - 1)] + "=>" + val); lastPos = input_buffer.getPos(); if (DEBUG) System.err.println("lastrule = " + (val - 1) + " " + lastMatchedRule.getName()); if (DEBUG) System.err.println("tmpword = " + tmpword.size() + " tmpblank = " + tmpblank.size()); if (DEBUG) System.err.println("tmpword = " + tmpword + " tmpblank = " + tmpblank); matchedWords.clear(); matchedBlanks.clear(); matchedWords.addAll(tmpword); matchedBlanks.addAll(tmpblank); } if (DO_TIMING) timing.log("transfer"); TransferToken current = readToken(in); if (DO_TIMING) timing.log("readToken"); switch (current.type) { case tt_word: applyWord(current.content); tmpword.add(current.content); break; case tt_blank: ms.step(' '); tmpblank.add(current.content); break; case tt_eof: if (tmpword.size() != 0) { tmpblank.add(current.content); ms.clear(); } else { fputws_unlocked(current.content, output); if (DO_TIMING) { timing.log("transfer"); timing.report(); } return; } break; default: System.err.println("Error: Unknown input token."); return; } System.out.print("arink3"); } }