Beispiel #1
0
  public String abbreviate(String firstString, String secondString) {
    if (firstString.equals(secondString)) {
      return firstString;
    }
    buffer.setLength(0);
    breaker1.setText(firstString);
    breaker2.setText(secondString);

    // find common initial section
    // we use two offset variables, in case we want to have some kind of equivalence later.
    int start1 = breaker1.first();
    int start2 = breaker2.first();
    while (true) {
      breaker1.next();
      final int current1 = breaker1.current();
      if (current1 == BreakIterator.DONE) {
        break;
      }
      breaker2.next();
      final int current2 = breaker2.current();
      if (current2 == BreakIterator.DONE) {
        break;
      }
      if (!firstString.regionMatches(start1, secondString, start2, current1 - start1)) {
        break;
      }
      start1 = current1;
      start2 = current2;
    }

    // find common initial section
    int end1 = breaker1.last();
    while (true) {
      breaker1.previous();
      final int current1 = breaker1.current();
      if (current1 == BreakIterator.DONE) {
        break;
      }
      breaker2.previous();
      final int current2 = breaker2.current();
      if (current2 == BreakIterator.DONE) {
        break;
      }
      if (!firstString.regionMatches(current1, secondString, current2, end1 - current1)) {
        break;
      }
      end1 = current1;
    }
    return buffer
        .append(firstString.substring(0, end1))
        .append(separator)
        .append(secondString.substring(start2))
        .toString();
  }
  /** Reads the next line. The lengths of the line will not exceed the gived maximum width. */
  public String readLine() throws IOException {
    if (fLine == null) {
      String line = fReader.readLine();
      if (line == null) return null;

      int lineLen = fGC.textExtent(line).x;
      if (lineLen < fMaxWidth) {
        return line;
      }
      fLine = line;
      fLineBreakIterator.setText(line);
      fOffset = 0;
    }
    int breakOffset = findNextBreakOffset(fOffset);
    String res;
    if (breakOffset != BreakIterator.DONE) {
      res = fLine.substring(fOffset, breakOffset);
      fOffset = findWordBegin(breakOffset);
      if (fOffset == fLine.length()) {
        fLine = null;
      }
    } else {
      res = fLine.substring(fOffset);
      fLine = null;
    }
    return res;
  }
  /**
   * Set the text to split.
   *
   * @param text Text to split.
   */
  public void setText(String text) {
    this.text = text;

    sentenceExtractor.setText(this.text);

    start = sentenceExtractor.first();
    end = sentenceExtractor.next();
  }
 /*
  * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
  */
 public void setText(CharacterIterator newText) {
   if (newText instanceof CharSequence) {
     fText = (CharSequence) newText;
     fIterator.setText(newText);
     first();
   } else {
     throw new UnsupportedOperationException("CharacterIterator not supported"); // $NON-NLS-1$
   }
 }
Beispiel #5
0
 private String TitleCaseFirst(ULocale locale, String value) {
   if (value.length() == 0) {
     return value;
   }
   breaker.setText(value);
   breaker.first();
   int endOfFirstWord = breaker.next();
   return UCharacter.toTitleCase(uLocale, value.substring(0, endOfFirstWord), breaker)
       + value.substring(endOfFirstWord);
 }
Beispiel #6
0
 static boolean canBreakAfter(char c) {
   boolean result = Character.isWhitespace(c) || c == '-';
   if (!result && (c < 'a' || c > 'z')) {
     // chinese characters and such would be caught in here
     // LINE_BREAK is used here because INTERNAL_LINE_BREAK might be in
     // use
     LINE_BREAK.setText(c + "a"); // $NON-NLS-1$
     result = LINE_BREAK.isBoundary(1);
   }
   return result;
 }
 private String retrieveText(IJavaElement elem) throws JavaModelException {
   if (elem instanceof IMember) {
     Reader reader = JavadocContentAccess.getHTMLContentReader((IMember) elem, true, true);
     if (reader != null) reader = new HTML2TextReader(reader, null);
     if (reader != null) {
       String str = getString(reader);
       BreakIterator breakIterator = BreakIterator.getSentenceInstance();
       breakIterator.setText(str);
       return str.substring(0, breakIterator.next());
     }
   }
   return ""; //$NON-NLS-1$
 }
 /**
  * Creates a break iterator given a char sequence.
  *
  * @param newText the new text
  */
 public void setText(CharSequence newText) {
   fText = newText;
   fIterator.setText(new SequenceCharacterIterator(newText));
   first();
 }
Beispiel #9
0
 public void doTest() {
   BreakIterator brkIter;
   switch (type) {
     case BreakIterator.KIND_CHARACTER:
       brkIter = BreakIterator.getCharacterInstance(locale);
       break;
     case BreakIterator.KIND_WORD:
       brkIter = BreakIterator.getWordInstance(locale);
       break;
     case BreakIterator.KIND_LINE:
       brkIter = BreakIterator.getLineInstance(locale);
       break;
     case BreakIterator.KIND_SENTENCE:
       brkIter = BreakIterator.getSentenceInstance(locale);
       break;
     default:
       errln("Unsupported break iterator type " + type);
       return;
   }
   brkIter.setText(text);
   int[] foundOffsets = new int[maxOffsetCount];
   int offset, foundOffsetsCount = 0;
   // do forwards iteration test
   while (foundOffsetsCount < maxOffsetCount
       && (offset = brkIter.next()) != BreakIterator.DONE) {
     foundOffsets[foundOffsetsCount++] = offset;
   }
   if (!offsetsMatchExpected(foundOffsets, foundOffsetsCount)) {
     // log error for forwards test
     String textToDisplay = (text.length() <= 16) ? text : text.substring(0, 16);
     errln(
         "For type "
             + type
             + " "
             + locale
             + ", text \""
             + textToDisplay
             + "...\""
             + "; expect "
             + expectOffsets.length
             + " offsets:"
             + formatOffsets(expectOffsets, expectOffsets.length)
             + "; found "
             + foundOffsetsCount
             + " offsets fwd:"
             + formatOffsets(foundOffsets, foundOffsetsCount));
   } else {
     // do backwards iteration test
     --foundOffsetsCount; // back off one from the end offset
     while (foundOffsetsCount > 0) {
       offset = brkIter.previous();
       if (offset != foundOffsets[--foundOffsetsCount]) {
         // log error for backwards test
         String textToDisplay = (text.length() <= 16) ? text : text.substring(0, 16);
         errln(
             "For type "
                 + type
                 + " "
                 + locale
                 + ", text \""
                 + textToDisplay
                 + "...\""
                 + "; expect "
                 + expectOffsets.length
                 + " offsets:"
                 + formatOffsets(expectOffsets, expectOffsets.length)
                 + "; found rev offset "
                 + offset
                 + " where expect "
                 + foundOffsets[foundOffsetsCount]);
         break;
       }
     }
   }
 }
Beispiel #10
0
  public void TestThaiDictionaryBreakIterator() {
    int position;
    int index;
    int result[] = {1, 2, 5, 10, 11, 12, 11, 10, 5, 2, 1, 0};
    char ctext[] = {
      0x0041, 0x0020, 0x0E01, 0x0E32, 0x0E23, 0x0E17, 0x0E14, 0x0E25, 0x0E2D, 0x0E07, 0x0020, 0x0041
    };
    String text = new String(ctext);

    ULocale locale = ULocale.createCanonical("th");
    BreakIterator b = BreakIterator.getWordInstance(locale);

    b.setText(text);

    index = 0;
    // Test forward iteration
    while ((position = b.next()) != BreakIterator.DONE) {
      if (position != result[index++]) {
        errln(
            "Error with ThaiDictionaryBreakIterator forward iteration test at "
                + position
                + ".\nShould have been "
                + result[index - 1]);
      }
    }

    // Test backward iteration
    while ((position = b.previous()) != BreakIterator.DONE) {
      if (position != result[index++]) {
        errln(
            "Error with ThaiDictionaryBreakIterator backward iteration test at "
                + position
                + ".\nShould have been "
                + result[index - 1]);
      }
    }

    // Test invalid sequence and spaces
    char text2[] = {
      0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020,
      0x0E1B, 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
      0x0E16, 0x0E49, 0x0E33
    };
    int expectedWordResult[] = {2, 3, 6, 10, 11, 15, 17, 20, 22};
    int expectedLineResult[] = {3, 6, 11, 15, 17, 20, 22};
    BreakIterator brk = BreakIterator.getWordInstance(new ULocale("th"));
    brk.setText(new String(text2));
    position = index = 0;
    while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
      if (position != expectedWordResult[index++]) {
        errln(
            "Incorrect break given by thai word break iterator. Expected: "
                + expectedWordResult[index - 1]
                + " Got: "
                + position);
      }
    }

    brk = BreakIterator.getLineInstance(new ULocale("th"));
    brk.setText(new String(text2));
    position = index = 0;
    while ((position = brk.next()) != BreakIterator.DONE && position < text2.length) {
      if (position != expectedLineResult[index++]) {
        errln(
            "Incorrect break given by thai line break iterator. Expected: "
                + expectedLineResult[index - 1]
                + " Got: "
                + position);
      }
    }
    // Improve code coverage
    if (brk.preceding(expectedLineResult[1]) != expectedLineResult[0]) {
      errln("Incorrect preceding position.");
    }
    if (brk.following(expectedLineResult[1]) != expectedLineResult[2]) {
      errln("Incorrect following position.");
    }
    int[] fillInArray = new int[2];
    if (((RuleBasedBreakIterator) brk).getRuleStatusVec(fillInArray) != 1 || fillInArray[0] != 0) {
      errln(
          "Error: Since getRuleStatusVec is not supported in DictionaryBasedBreakIterator, it should return 1 and fillInArray[0] == 0.");
    }
  }