public String abbreviate(String firstString, String secondString) { if (firstString.equals(secondString)) { return firstString; } buffer.setLength(0); breaker1.setText(firstString); breaker2.setText(secondString); // find common initial section // we use two offset variables, in case we want to have some kind of equivalence later. int start1 = breaker1.first(); int start2 = breaker2.first(); while (true) { breaker1.next(); final int current1 = breaker1.current(); if (current1 == BreakIterator.DONE) { break; } breaker2.next(); final int current2 = breaker2.current(); if (current2 == BreakIterator.DONE) { break; } if (!firstString.regionMatches(start1, secondString, start2, current1 - start1)) { break; } start1 = current1; start2 = current2; } // find common initial section int end1 = breaker1.last(); while (true) { breaker1.previous(); final int current1 = breaker1.current(); if (current1 == BreakIterator.DONE) { break; } breaker2.previous(); final int current2 = breaker2.current(); if (current2 == BreakIterator.DONE) { break; } if (!firstString.regionMatches(current1, secondString, current2, end1 - current1)) { break; } end1 = current1; } return buffer .append(firstString.substring(0, end1)) .append(separator) .append(secondString.substring(start2)) .toString(); }
public void doTest() { BreakIterator brkIter; switch (type) { case BreakIterator.KIND_CHARACTER: brkIter = BreakIterator.getCharacterInstance(locale); break; case BreakIterator.KIND_WORD: brkIter = BreakIterator.getWordInstance(locale); break; case BreakIterator.KIND_LINE: brkIter = BreakIterator.getLineInstance(locale); break; case BreakIterator.KIND_SENTENCE: brkIter = BreakIterator.getSentenceInstance(locale); break; default: errln("Unsupported break iterator type " + type); return; } brkIter.setText(text); int[] foundOffsets = new int[maxOffsetCount]; int offset, foundOffsetsCount = 0; // do forwards iteration test while (foundOffsetsCount < maxOffsetCount && (offset = brkIter.next()) != BreakIterator.DONE) { foundOffsets[foundOffsetsCount++] = offset; } if (!offsetsMatchExpected(foundOffsets, foundOffsetsCount)) { // log error for forwards test String textToDisplay = (text.length() <= 16) ? text : text.substring(0, 16); errln( "For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" + "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) + "; found " + foundOffsetsCount + " offsets fwd:" + formatOffsets(foundOffsets, foundOffsetsCount)); } else { // do backwards iteration test --foundOffsetsCount; // back off one from the end offset while (foundOffsetsCount > 0) { offset = brkIter.previous(); if (offset != foundOffsets[--foundOffsetsCount]) { // log error for backwards test String textToDisplay = (text.length() <= 16) ? text : text.substring(0, 16); errln( "For type " + type + " " + locale + ", text \"" + textToDisplay + "...\"" + "; expect " + expectOffsets.length + " offsets:" + formatOffsets(expectOffsets, expectOffsets.length) + "; found rev offset " + offset + " where expect " + foundOffsets[foundOffsetsCount]); break; } } } }
public void TestThaiDictionaryBreakIterator() { int position; int index; int result[] = {1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0}; char ctext[] = { 0x0041, 0x0020, 0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07, 0x0020, 0x0041 }; String text = new String(ctext); ULocale locale = ULocale.createCanonical("th"); BreakIterator b = BreakIterator.getWordInstance(locale); b.setText(text); index = 0; // Test forward iteration while ((position = b.next()) != BreakIterator.DONE) { if (position != result[index++]) { errln( "Error with ThaiDictionaryBreakIterator forward iteration test at " + position + ".\nShould have been " + result[index - 1]); } } // Test backward iteration while ((position = b.previous()) != BreakIterator.DONE) { if (position != result[index++]) { errln( "Error with ThaiDictionaryBreakIterator backward iteration test at " + position + ".\nShould have been " + result[index - 1]); } } // Test invalid sequence and spaces char text2[] = { 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, 0x0E16, 0x0E49, 0x0E33 }; int expectedWordResult[] = {2, 3, 6, 10, 11, 15, 17, 20, 22}; int expectedLineResult[] = {3, 6, 11, 15, 17, 20, 22}; BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th")); brk.setText(new String(text2)); position = index = 0; while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) { if (position != expectedWordResult[index++]) { errln( "Incorrect break given by thai word break iterator. Expected: " + expectedWordResult[index - 1] + " Got: " + position); } } brk = BreakIterator.getLineInstance(new ULocale("th")); brk.setText(new String(text2)); position = index = 0; while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) { if (position != expectedLineResult[index++]) { errln( "Incorrect break given by thai line break iterator. Expected: " + expectedLineResult[index - 1] + " Got: " + position); } } // Improve code coverage if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) { errln("Incorrect preceding position."); } if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) { errln("Incorrect following position."); } int[] fillInArray = new int[2]; if (((RuleBasedBreakIterator) brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) { errln( "Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0."); } }