public void TestDefaultRuleBasedSentenceIteration() { logln("Testing the RBBI for sentence iteration using default rules"); RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getSentenceInstance(); // fetch the rules used to create the above RuleBasedBreakIterator String defaultRules = rbbi.toString(); RuleBasedBreakIterator sentIterDefault = null; try { sentIterDefault = new RuleBasedBreakIterator(defaultRules); } catch (IllegalArgumentException iae) { errln( "ERROR: failed construction in TestDefaultRuleBasedSentenceIteration()" + iae.toString()); } List<String> sentdata = new ArrayList<String>(); sentdata.add("(This is it.) "); sentdata.add("Testing the sentence iterator. "); sentdata.add("\"This isn\'t it.\" "); sentdata.add("Hi! "); sentdata.add("This is a simple sample sentence. "); sentdata.add("(This is it.) "); sentdata.add("This is a simple sample sentence. "); sentdata.add("\"This isn\'t it.\" "); sentdata.add("Hi! "); sentdata.add("This is a simple sample sentence. "); sentdata.add("It does not have to make any sense as you can see. "); sentdata.add("Nel mezzo del cammin di nostra vita, mi ritrovai in una selva oscura. "); sentdata.add("Che la dritta via aveo smarrita. "); generalIteratorTest(sentIterDefault, sentdata); }
private List<String> _testLastAndPrevious(RuleBasedBreakIterator rbbi, String text) { int p = rbbi.last(); int lastP = p; List<String> result = new ArrayList<String>(); if (p != text.length()) { errln("last() returned " + p + " instead of " + text.length()); } while (p != RuleBasedBreakIterator.DONE) { p = rbbi.previous(); if (p != RuleBasedBreakIterator.DONE) { if (p >= lastP) { errln( "previous() failed to move backward: previous() on position " + lastP + " yielded " + p); } result.add(0, text.substring(p, lastP)); } else { if (lastP != 0) { errln("previous() returned DONE prematurely: offset was " + lastP + " instead of 0"); } } lastP = p; } return result; }
private List<String> _testFirstAndNext(RuleBasedBreakIterator rbbi, String text) { int p = rbbi.first(); int lastP = p; List<String> result = new ArrayList<String>(); if (p != 0) { errln("first() returned " + p + " instead of 0"); } while (p != RuleBasedBreakIterator.DONE) { p = rbbi.next(); if (p != RuleBasedBreakIterator.DONE) { if (p <= lastP) { errln("next() failed to move forward: next() on position " + lastP + " yielded " + p); } result.add(text.substring(lastP, p)); } else { if (lastP != text.length()) { errln( "next() returned DONE prematurely: offset was " + lastP + " instead of " + text.length()); } } lastP = p; } return result; }
@Override public int first() { working.setText(this.text.getText(), this.text.getStart(), this.text.getLength()); rules.setText(working); workingOffset = 0; int first = rules.first(); return first == BreakIterator.DONE ? BreakIterator.DONE : workingOffset + first; }
/* * Tests the method public int following(int offset) */ public void TestFollowing() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "else if (offset < fText.getBeginIndex())" is true rbbi.setText("dummy"); if (rbbi.following(-1) != 0) { errln( "RuleBasedBreakIterator.following(-1) was suppose to return " + "0 when the object has a fText of dummy."); } }
/** * Clone method. Creates another LaoBreakIterator with the same behavior and current state as this * one. * * @return The clone. */ @Override public LaoBreakIterator clone() { LaoBreakIterator other = (LaoBreakIterator) super.clone(); other.rules = (RuleBasedBreakIterator) rules.clone(); other.verify = (RuleBasedBreakIterator) verify.clone(); if (text != null) other.text = text.clone(); if (working != null) other.working = working.clone(); if (verifyText != null) other.verifyText = verifyText.clone(); return other; }
/* * Tests the method public int last() */ public void TestLast() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "if (fText == null)" is true rbbi.setText((CharacterIterator) null); if (rbbi.last() != BreakIterator.DONE) { errln( "RuleBasedBreakIterator.last() was suppose to return " + "BreakIterator.DONE when the object has a null fText."); } }
/* Tests the method public Object clone() */ public void TestClone() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); try { rbbi.setText((CharacterIterator) null); if (((RuleBasedBreakIterator) rbbi.clone()).getText() != null) errln( "RuleBasedBreakIterator.clone() was suppose to return " + "the same object because fText is set to null."); } catch (Exception e) { errln("RuleBasedBreakIterator.clone() was not suppose to return " + "an exception."); } }
public void TestDefaultRuleBasedWordIteration() { logln("Testing the RBBI for word iteration using default rules"); RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getWordInstance(); // fetch the rules used to create the above RuleBasedBreakIterator String defaultRules = rbbi.toString(); RuleBasedBreakIterator wordIterDefault = null; try { wordIterDefault = new RuleBasedBreakIterator(defaultRules); } catch (IllegalArgumentException iae) { errln( "ERROR: failed construction in TestDefaultRuleBasedWordIteration() -- custom rules" + iae.toString()); } List<String> worddata = new ArrayList<String>(); worddata.add("Write"); worddata.add(" "); worddata.add("wordrules"); worddata.add("."); worddata.add(" "); // worddata.add("alpha-beta-gamma"); worddata.add(" "); worddata.add("\u092f\u0939"); worddata.add(" "); worddata.add("\u0939\u093f" + halfNA + "\u0926\u0940"); worddata.add(" "); worddata.add("\u0939\u0948"); // worddata.add("\u0964"); //danda followed by a space worddata.add(" "); worddata.add("\u0905\u093e\u092a"); worddata.add(" "); worddata.add("\u0938\u093f\u0916\u094b\u0917\u0947"); worddata.add("?"); worddata.add(" "); worddata.add("\r"); worddata.add("It's"); worddata.add(" "); // worddata.add("$30.10"); worddata.add(" "); worddata.add(" "); worddata.add("Badges"); worddata.add("?"); worddata.add(" "); worddata.add("BADGES"); worddata.add("!"); worddata.add("1000,233,456.000"); worddata.add(" "); generalIteratorTest(wordIterDefault, worddata); }
private void doMultipleSelectionTest(RuleBasedBreakIterator iterator, String testText) { logln("Multiple selection test..."); RuleBasedBreakIterator testIterator = (RuleBasedBreakIterator) iterator.clone(); int offset = iterator.first(); int testOffset; int count = 0; do { testOffset = testIterator.first(); testOffset = testIterator.next(count); logln("next(" + count + ") -> " + testOffset); if (offset != testOffset) errln( "next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); if (offset != RuleBasedBreakIterator.DONE) { count++; offset = iterator.next(); } } while (offset != RuleBasedBreakIterator.DONE); // now do it backwards... offset = iterator.last(); count = 0; do { testOffset = testIterator.last(); testOffset = testIterator.next(count); logln("next(" + count + ") -> " + testOffset); if (offset != testOffset) errln( "next(n) and next() not returning consistent results: for step " + count + ", next(n) returned " + testOffset + " and next() had " + offset); if (offset != RuleBasedBreakIterator.DONE) { count--; offset = iterator.previous(); } } while (offset != RuleBasedBreakIterator.DONE); }
@Override public void lexer_open(String text, LocaleId language, Tokens tokens) { if (Util.isEmpty(text)) { cancel(); return; } this.text = text; if (iterators.containsKey(language)) { iterator = iterators.get(language); } else { iterator = (RuleBasedBreakIterator) BreakIterator.getWordInstance(ULocale.createCanonical(language.toString())); String defaultRules = iterator.toString(); // Collect rules for the language, combine with defaultRules String newRules = defaultRules; for (LexerRule rule : getRules()) { boolean isInternal = Util.isEmpty(rule.getPattern()); if (checkRule(rule, language) && !isInternal) { newRules = formatRule( newRules, rule.getName(), rule.getDescription(), rule.getPattern(), rule.getLexemId()); } } // Recreate iterator for the language(with new rules), store for future reuse iterator = new RuleBasedBreakIterator(newRules); iterators.put(language, iterator); } if (iterator == null) return; iterator.setText(text); // Sets the current iteration position to the beginning of the text start = iterator.first(); end = start; }
@Override public Lexem lexer_next() { end = iterator.next(); if (end == BreakIterator.DONE) return null; if (start >= end) return null; int lexemId = iterator.getRuleStatus(); Lexem lexem = new Lexem(lexemId, text.substring(start, end), start, end); // System.out.println(lexem.toString()); start = end; // Prepare for the next iteration return lexem; }
/* Tests the method public int current() */ public void TestCurrent() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "(fText != null) ? fText.getIndex() : BreakIterator.DONE" is true and false rbbi.setText((CharacterIterator) null); if (rbbi.current() != BreakIterator.DONE) { errln( "RuleBasedBreakIterator.current() was suppose to return " + "BreakIterator.DONE when the object has a fText of null."); } rbbi.setText("dummy"); if (rbbi.current() != 0) { errln( "RuleBasedBreakIterator.current() was suppose to return " + "0 when the object has a fText of dummy."); } }
public void TestDefaultRuleBasedLineIteration() { logln("Testing the RBBI for line iteration using default rules"); RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) RuleBasedBreakIterator.getLineInstance(); // fetch the rules used to create the above RuleBasedBreakIterator String defaultRules = rbbi.toString(); RuleBasedBreakIterator lineIterDefault = null; try { lineIterDefault = new RuleBasedBreakIterator(defaultRules); } catch (IllegalArgumentException iae) { errln("ERROR: failed construction in TestDefaultRuleBasedLineIteration()" + iae.toString()); } List<String> linedata = new ArrayList<String>(); linedata.add("Multi-"); linedata.add("Level "); linedata.add("example "); linedata.add("of "); linedata.add("a "); linedata.add("semi-"); linedata.add("idiotic "); linedata.add("non-"); linedata.add("sensical "); linedata.add("(non-"); linedata.add("important) "); linedata.add("sentence. "); linedata.add("Hi "); linedata.add("Hello "); linedata.add("How\n"); linedata.add("are\r"); linedata.add("you" + kLineSeparator); linedata.add("fine.\t"); linedata.add("good. "); linedata.add("Now\r"); linedata.add("is\n"); linedata.add("the\r\n"); linedata.add("time\n"); linedata.add("\r"); linedata.add("for\r"); linedata.add("\r"); linedata.add("all"); generalIteratorTest(lineIterDefault, linedata); }
/* * Tests the method public int preceding(int offset) */ public void TestPreceding() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "if (fText == null || offset > fText.getEndIndex())" is true rbbi.setText((CharacterIterator) null); if (rbbi.preceding(-1) != BreakIterator.DONE) { errln( "RuleBasedBreakIterator.preceding(-1) was suppose to return " + "0 when the object has a fText of null."); } // Tests when "else if (offset < fText.getBeginIndex())" is true rbbi.setText("dummy"); if (rbbi.preceding(-1) != 0) { errln( "RuleBasedBreakIterator.preceding(-1) was suppose to return " + "0 when the object has a fText of dummy."); } }
@Override public void setText(CharacterIterator text) { if (!(text instanceof CharArrayIterator)) throw new UnsupportedOperationException("unsupported CharacterIterator"); this.text = (CharArrayIterator) text; ccReorder(this.text.getText(), this.text.getStart(), this.text.getLength()); working.setText(this.text.getText(), this.text.getStart(), this.text.getLength()); rules.setText(working); workingOffset = 0; }
@Override public int next() { int current = current(); int next = rules.next(); if (next == BreakIterator.DONE) return next; else next += workingOffset; char c = working.current(); int following = rules.next(); // lookahead if (following != BreakIterator.DONE) { following += workingOffset; if (rules.getRuleStatus() == 0 && laoSet.contains(c) && verifyPushBack(current, next)) { workingOffset = next - 1; working.setText( text.getText(), text.getStart() + workingOffset, text.getLength() - workingOffset); return next - 1; } rules.previous(); // undo the lookahead } return next; }
private void _testIsBoundary(RuleBasedBreakIterator rbbi, String text, int[] boundaries) { logln("testIsBoundary():"); int p = 1; boolean isB; for (int i = 0; i <= text.length(); i++) { isB = rbbi.isBoundary(i); logln("rbbi.isBoundary(" + i + ") -> " + isB); if (i == boundaries[p]) { if (!isB) errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); ++p; } else { if (isB) errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); } } }
/* * Tests the method public int first() */ public void TestFirst() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); // Tests when "if (fText == null)" is true rbbi.setText((CharacterIterator) null); assertEquals("RuleBasedBreakIterator.first()", BreakIterator.DONE, rbbi.first()); rbbi.setText("abc"); assertEquals("RuleBasedBreakIterator.first()", 0, rbbi.first()); assertEquals("RuleBasedBreakIterator.next()", 1, rbbi.next()); }
/* * Tests the method public boolean equals(Object that) */ public void TestEquals() { RuleBasedBreakIterator rbbi = new RuleBasedBreakIterator(".;"); RuleBasedBreakIterator rbbi1 = new RuleBasedBreakIterator(".;"); // TODO: Tests when "if (fRData != other.fRData && (fRData == null || other.fRData == null))" is // true // Tests when "if (fText == null || other.fText == null)" is true rbbi.setText((CharacterIterator) null); if (rbbi.equals(rbbi1)) { errln( "RuleBasedBreakIterator.equals(Object) was not suppose to return " + "true when the other object has a null fText."); } // Tests when "if (fText == null && other.fText == null)" is true rbbi1.setText((CharacterIterator) null); if (!rbbi.equals(rbbi1)) { errln( "RuleBasedBreakIterator.equals(Object) was not suppose to return " + "false when both objects has a null fText."); } // Tests when an exception occurs if (rbbi.equals(0)) { errln( "RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to integer 0."); } if (rbbi.equals(0.0)) { errln( "RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to float 0.0."); } if (rbbi.equals("0")) { errln( "RuleBasedBreakIterator.equals(Object) was suppose to return " + "false when comparing to string '0'."); } }
private void _testPreceding(RuleBasedBreakIterator rbbi, String text, int[] boundaries) { logln("testPreceding():"); int p = 0; for (int i = 0; i <= text.length(); i++) { int b = rbbi.preceding(i); logln("rbbi.preceding(" + i + ") -> " + b); if (b != boundaries[p]) errln( "Wrong result from preceding() for " + i + ": expected " + boundaries[p] + ", got " + b); if (i == boundaries[p + 1]) ++p; } }
private boolean verifyPushBack(int current, int next) { int shortenedSyllable = next - current - 1; verifyText.setText(text.getText(), text.getStart() + current, shortenedSyllable); verify.setText(verifyText); if (verify.next() != shortenedSyllable || verify.getRuleStatus() == 0) return false; verifyText.setText(text.getText(), text.getStart() + next - 1, text.getLength() - next + 1); verify.setText(verifyText); return (verify.next() != BreakIterator.DONE && verify.getRuleStatus() != 0); }
private void generalIteratorTest(RuleBasedBreakIterator rbbi, List<String> expectedResult) { StringBuffer buffer = new StringBuffer(); String text; for (int i = 0; i < expectedResult.size(); i++) { text = expectedResult.get(i); buffer.append(text); } text = buffer.toString(); if (rbbi == null) { errln("null iterator, test skipped."); return; } rbbi.setText(text); List<String> nextResults = _testFirstAndNext(rbbi, text); List<String> previousResults = _testLastAndPrevious(rbbi, text); logln("comparing forward and backward..."); int errs = getErrorCount(); compareFragmentLists("forward iteration", "backward iteration", nextResults, previousResults); if (getErrorCount() == errs) { logln("comparing expected and actual..."); compareFragmentLists("expected result", "actual result", expectedResult, nextResults); } int[] boundaries = new int[expectedResult.size() + 3]; boundaries[0] = RuleBasedBreakIterator.DONE; boundaries[1] = 0; for (int i = 0; i < expectedResult.size(); i++) { boundaries[i + 2] = boundaries[i + 1] + (expectedResult.get(i).length()); } boundaries[boundaries.length - 1] = RuleBasedBreakIterator.DONE; _testFollowing(rbbi, text, boundaries); _testPreceding(rbbi, text, boundaries); _testIsBoundary(rbbi, text, boundaries); doMultipleSelectionTest(rbbi, text); }
@Override public int current() { int current = rules.current(); return current == BreakIterator.DONE ? BreakIterator.DONE : workingOffset + current; }
/** * Creates a new iterator, performing the backtracking verification across the provided <code> * rules</code>. */ public LaoBreakIterator(RuleBasedBreakIterator rules) { this.rules = (RuleBasedBreakIterator) rules.clone(); this.verify = (RuleBasedBreakIterator) rules.clone(); }
// tests default rules based character iteration. // Builds a new iterator from the source rules in the default (prebuilt) iterator. // public void TestDefaultRuleBasedCharacterIteration() { RuleBasedBreakIterator rbbi = (RuleBasedBreakIterator) BreakIterator.getCharacterInstance(); logln("Testing the RBBI for character iteration by using default rules"); // fetch the rules used to create the above RuleBasedBreakIterator String defaultRules = rbbi.toString(); RuleBasedBreakIterator charIterDefault = null; try { charIterDefault = new RuleBasedBreakIterator(defaultRules); } catch (IllegalArgumentException iae) { errln( "ERROR: failed construction in TestDefaultRuleBasedCharacterIteration()" + iae.toString()); } List<String> chardata = new ArrayList<String>(); chardata.add("H"); chardata.add("e"); chardata.add("l"); chardata.add("l"); chardata.add("o"); chardata.add("e\u0301"); // acuteE chardata.add("&"); chardata.add("e\u0303"); // tildaE // devanagiri characters for Hindi support chardata.add("\u0906"); // devanagiri AA // chardata.add("\u093e\u0901"); //devanagiri vowelsign AA+ chandrabindhu chardata.add("\u0916\u0947"); // devanagiri KHA+vowelsign E chardata.add("\u0938\u0941\u0902"); // devanagiri SA+vowelsign U + anusvara(bindu) chardata.add("\u0926"); // devanagiri consonant DA chardata.add("\u0930"); // devanagiri consonant RA // chardata.add("\u0939\u094c"); //devanagiri HA+vowel sign AI chardata.add("\u0964"); // devanagiri danda // end hindi characters chardata.add("A\u0302"); // circumflexA chardata.add("i\u0301"); // acuteBelowI // conjoining jamo... chardata.add("\u1109\u1161\u11bc"); chardata.add("\u1112\u1161\u11bc"); chardata.add("\n"); chardata.add("\r\n"); // keep CRLF sequences together chardata.add("S\u0300"); // graveS chardata.add("i\u0301"); // acuteBelowI chardata.add("!"); // What follows is a string of Korean characters (I found it in the Yellow Pages // ad for the Korean Presbyterian Church of San Francisco, and I hope I transcribed // it correctly), first as precomposed syllables, and then as conjoining jamo. // Both sequences should be semantically identical and break the same way. // precomposed syllables... chardata.add("\uc0c1"); chardata.add("\ud56d"); chardata.add(" "); chardata.add("\ud55c"); chardata.add("\uc778"); chardata.add(" "); chardata.add("\uc5f0"); chardata.add("\ud569"); chardata.add(" "); chardata.add("\uc7a5"); chardata.add("\ub85c"); chardata.add("\uad50"); chardata.add("\ud68c"); chardata.add(" "); // conjoining jamo... chardata.add("\u1109\u1161\u11bc"); chardata.add("\u1112\u1161\u11bc"); chardata.add(" "); chardata.add("\u1112\u1161\u11ab"); chardata.add("\u110b\u1175\u11ab"); chardata.add(" "); chardata.add("\u110b\u1167\u11ab"); chardata.add("\u1112\u1161\u11b8"); chardata.add(" "); chardata.add("\u110c\u1161\u11bc"); chardata.add("\u1105\u1169"); chardata.add("\u1100\u116d"); chardata.add("\u1112\u116c"); generalIteratorTest(charIterDefault, chardata); }