Example #1
0
  public String abbreviate(String firstString, String secondString) {
    if (firstString.equals(secondString)) {
      return firstString;
    }
    buffer.setLength(0);
    breaker1.setText(firstString);
    breaker2.setText(secondString);

    // find common initial section
    // we use two offset variables, in case we want to have some kind of equivalence later.
    int start1 = breaker1.first();
    int start2 = breaker2.first();
    while (true) {
      breaker1.next();
      final int current1 = breaker1.current();
      if (current1 == BreakIterator.DONE) {
        break;
      }
      breaker2.next();
      final int current2 = breaker2.current();
      if (current2 == BreakIterator.DONE) {
        break;
      }
      if (!firstString.regionMatches(start1, secondString, start2, current1 - start1)) {
        break;
      }
      start1 = current1;
      start2 = current2;
    }

    // find common initial section
    int end1 = breaker1.last();
    while (true) {
      breaker1.previous();
      final int current1 = breaker1.current();
      if (current1 == BreakIterator.DONE) {
        break;
      }
      breaker2.previous();
      final int current2 = breaker2.current();
      if (current2 == BreakIterator.DONE) {
        break;
      }
      if (!firstString.regionMatches(current1, secondString, current2, end1 - current1)) {
        break;
      }
      end1 = current1;
    }
    return buffer
        .append(firstString.substring(0, end1))
        .append(separator)
        .append(secondString.substring(start2))
        .toString();
  }
Example #2
0
 public void doTest() {
   BreakIterator brkIter;
   switch (type) {
     case BreakIterator.KIND_CHARACTER:
       brkIter = BreakIterator.getCharacterInstance(locale);
       break;
     case BreakIterator.KIND_WORD:
       brkIter = BreakIterator.getWordInstance(locale);
       break;
     case BreakIterator.KIND_LINE:
       brkIter = BreakIterator.getLineInstance(locale);
       break;
     case BreakIterator.KIND_SENTENCE:
       brkIter = BreakIterator.getSentenceInstance(locale);
       break;
     default:
       errln("Unsupported break iterator type " + type);
       return;
   }
   brkIter.setText(text);
   int[] foundOffsets = new int[maxOffsetCount];
   int offset, foundOffsetsCount = 0;
   // do forwards iteration test
   while (foundOffsetsCount < maxOffsetCount
       && (offset = brkIter.next()) != BreakIterator.DONE) {
     foundOffsets[foundOffsetsCount++] = offset;
   }
   if (!offsetsMatchExpected(foundOffsets, foundOffsetsCount)) {
     // log error for forwards test
     String textToDisplay = (text.length() <= 16) ? text : text.substring(0, 16);
     errln(
         "For type "
             + type
             + " "
             + locale
             + ", text \""
             + textToDisplay
             + "...\""
             + "; expect "
             + expectOffsets.length
             + " offsets:"
             + formatOffsets(expectOffsets, expectOffsets.length)
             + "; found "
             + foundOffsetsCount
             + " offsets fwd:"
             + formatOffsets(foundOffsets, foundOffsetsCount));
   } else {
     // do backwards iteration test
     --foundOffsetsCount; // back off one from the end offset
     while (foundOffsetsCount > 0) {
       offset = brkIter.previous();
       if (offset != foundOffsets[--foundOffsetsCount]) {
         // log error for backwards test
         String textToDisplay = (text.length() <= 16) ? text : text.substring(0, 16);
         errln(
             "For type "
                 + type
                 + " "
                 + locale
                 + ", text \""
                 + textToDisplay
                 + "...\""
                 + "; expect "
                 + expectOffsets.length
                 + " offsets:"
                 + formatOffsets(expectOffsets, expectOffsets.length)
                 + "; found rev offset "
                 + offset
                 + " where expect "
                 + foundOffsets[foundOffsetsCount]);
         break;
       }
     }
   }
 }
Example #3
0
  public void TestThaiDictionaryBreakIterator() {
    int position;
    int index;
    int result[] = {1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0};
    char ctext[] = {
      0x0041, 0x0020, 0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07, 0x0020, 0x0041
    };
    String text = new String(ctext);

    ULocale locale = ULocale.createCanonical("th");
    BreakIterator b = BreakIterator.getWordInstance(locale);

    b.setText(text);

    index = 0;
    // Test forward iteration
    while ((position = b.next()) != BreakIterator.DONE) {
      if (position != result[index++]) {
        errln(
            "Error with ThaiDictionaryBreakIterator forward iteration test at "
                + position
                + ".\nShould have been "
                + result[index - 1]);
      }
    }

    // Test backward iteration
    while ((position = b.previous()) != BreakIterator.DONE) {
      if (position != result[index++]) {
        errln(
            "Error with ThaiDictionaryBreakIterator backward iteration test at "
                + position
                + ".\nShould have been "
                + result[index - 1]);
      }
    }

    // Test invalid sequence and spaces
    char text2[] = {
      0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020,
      0x0E1B, 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
      0x0E16, 0x0E49, 0x0E33
    };
    int expectedWordResult[] = {2, 3, 6, 10, 11, 15, 17, 20, 22};
    int expectedLineResult[] = {3, 6, 11, 15, 17, 20, 22};
    BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
    brk.setText(new String(text2));
    position = index = 0;
    while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
      if (position != expectedWordResult[index++]) {
        errln(
            "Incorrect break given by thai word break iterator. Expected: "
                + expectedWordResult[index - 1]
                + " Got: "
                + position);
      }
    }

    brk = BreakIterator.getLineInstance(new ULocale("th"));
    brk.setText(new String(text2));
    position = index = 0;
    while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
      if (position != expectedLineResult[index++]) {
        errln(
            "Incorrect break given by thai line break iterator. Expected: "
                + expectedLineResult[index - 1]
                + " Got: "
                + position);
      }
    }
    // Improve code coverage
    if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) {
      errln("Incorrect preceding position.");
    }
    if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) {
      errln("Incorrect following position.");
    }
    int[] fillInArray = new int[2];
    if (((RuleBasedBreakIterator) brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) {
      errln(
          "Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
    }
  }