Beispiel #1
0
  /**
   * Converts a line of text into an array of lower case words using a BreakIterator.wordInstance().
   *
   * <p>This method is under the Jive Open Source Software License and was written by Mark Imbriaco.
   *
   * @param text a String of text to convert into an array of words
   * @return text broken up into an array of words.
   */
  public static String[] toLowerCaseWordArray(String text) {
    if (text == null || text.length() == 0) {
      return new String[0];
    }

    List<String> wordList = new ArrayList<String>();
    BreakIterator boundary = BreakIterator.getWordInstance();
    boundary.setText(text);
    int start = 0;

    for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
      String tmp = text.substring(start, end).trim();
      // Remove characters that are not needed.
      tmp = replace(tmp, "+", "");
      tmp = replace(tmp, "/", "");
      tmp = replace(tmp, "\\", "");
      tmp = replace(tmp, "#", "");
      tmp = replace(tmp, "*", "");
      tmp = replace(tmp, ")", "");
      tmp = replace(tmp, "(", "");
      tmp = replace(tmp, "&", "");
      if (tmp.length() > 0) {
        wordList.add(tmp);
      }
    }
    return wordList.toArray(new String[wordList.size()]);
  }
 /** Replaces the current word token */
 public void replaceWord(String newWord) {
   if (currentWordPos != -1) {
     try {
       /* ORIGINAL
         document.remove(currentWordPos, currentWordEnd - currentWordPos);
         document.insertString(currentWordPos, newWord, null);
       */
       // Howard's Version for Ekit
       Element element =
           ((javax.swing.text.html.HTMLDocument) document).getCharacterElement(currentWordPos);
       AttributeSet attribs = element.getAttributes();
       document.remove(currentWordPos, currentWordEnd - currentWordPos);
       document.insertString(currentWordPos, newWord, attribs);
       // End Howard's Version
       // Need to reset the segment
       document.getText(0, document.getLength(), text);
     } catch (BadLocationException ex) {
       throw new RuntimeException(ex.getMessage());
     }
     // Position after the newly replaced word(s)
     // Position after the newly replaced word(s)
     first = true;
     currentWordPos = getNextWordStart(text, currentWordPos + newWord.length());
     if (currentWordPos != -1) {
       currentWordEnd = getNextWordEnd(text, currentWordPos);
       nextWordPos = getNextWordStart(text, currentWordEnd);
       sentanceIterator.setText(text);
       sentanceIterator.following(currentWordPos);
     } else moreTokens = false;
   }
 }
Beispiel #3
0
  /**
   * Take the given filter text and break it down into words using a BreakIterator.
   *
   * @param text
   * @return an array of words
   */
  private String[] getWords(String text) {
    List<String> words = new ArrayList<String>();

    /*
     * Break the text up into words, separating based on whitespace and common
     * punctuation. Previously used String.split(..., "\\W"), where "\W" is a
     * regular expression (see the Javadoc for class Pattern). Need to avoid
     * both String.split and regular expressions, in order to compile against
     * JCL Foundation (bug 80053). Also need to do this in an NL-sensitive way.
     * The use of BreakIterator was suggested in bug 90579.
     */
    BreakIterator iter = BreakIterator.getWordInstance();
    iter.setText(text);
    int i = iter.first();
    while (i != java.text.BreakIterator.DONE && i < text.length()) {
      int j = iter.following(i);
      if (j == java.text.BreakIterator.DONE) j = text.length();

      /* match the word */
      if (Character.isLetterOrDigit(text.charAt(i))) {
        String word = text.substring(i, j);
        words.add(word);
      }
      i = j;
    }
    return words.toArray(new String[words.size()]);
  }
 // offsets on any line will go from start,true to end,false
 // excluding start,false and end,true
 public Selection point2Offset(Point p, Selection o) {
   if (p.y < yInset) {
     o.caret = 0;
     o.clickAfter = true;
     return o;
   }
   int line = (p.y - yInset) / lineHeight;
   if (line >= lineCount) {
     o.caret = contents.length();
     o.clickAfter = false;
     return o;
   }
   int target = p.x - xInset;
   if (target <= 0) {
     o.caret = lineStarts[line];
     o.clickAfter = true;
     return o;
   }
   int lowGuess = lineStarts[line];
   int lowWidth = 0;
   int highGuess = lineStarts[line + 1];
   int highWidth = fm.stringWidth(contents.substring(lineStarts[line], highGuess));
   if (target >= highWidth) {
     o.caret = lineStarts[line + 1];
     o.clickAfter = false;
     return o;
   }
   while (lowGuess < highGuess - 1) {
     int guess = (lowGuess + highGuess) / 2;
     int width = fm.stringWidth(contents.substring(lineStarts[line], guess));
     if (width <= target) {
       lowGuess = guess;
       lowWidth = width;
       if (width == target) break;
     } else {
       highGuess = guess;
       highWidth = width;
     }
   }
   // at end, either lowWidth < target < width(low+1), or lowWidth = target
   int highBound = charBreaker.following(lowGuess);
   int lowBound = charBreaker.previous();
   // we are now at character boundaries
   if (lowBound != lowGuess)
     lowWidth = fm.stringWidth(contents.substring(lineStarts[line], lowBound));
   if (highBound != highGuess)
     highWidth = fm.stringWidth(contents.substring(lineStarts[line], highBound));
   // we now have the right widths
   if (target - lowWidth < highWidth - target) {
     o.caret = lowBound;
     o.clickAfter = true;
   } else {
     o.caret = highBound;
     o.clickAfter = false;
   }
   // we now have the closest!
   return o;
 }
 /**
  * Create copy of this iterator, all status including current position is kept.
  *
  * @return copy of this iterator
  */
 @Override
 public Object clone() {
   try {
     BreakIterator cloned = (BreakIterator) super.clone();
     cloned.wrapped = (com.ibm.icu4jni.text.BreakIterator) wrapped.clone();
     return cloned;
   } catch (CloneNotSupportedException e) {
     throw new InternalError(e.getMessage());
   }
 }
 public void setText2(String text) {
   contents = text;
   charBreaker.setText(text);
   wordBreaker.setText(text);
   lineBreaker.setText(text);
   redoLines = true;
   if (textListener != null)
     textListener.textValueChanged(new TextEvent(this, TextEvent.TEXT_VALUE_CHANGED));
   repaint(16);
 }
Beispiel #7
0
 /**
  * Creates a copy of this iterator, all status information including the current position are kept
  * the same.
  *
  * @return a copy of this iterator.
  */
 @Override
 public Object clone() {
   try {
     BreakIterator cloned = (BreakIterator) super.clone();
     cloned.wrapped = (NativeBreakIterator) wrapped.clone();
     return cloned;
   } catch (CloneNotSupportedException e) {
     throw new AssertionError(e); // android-changed
   }
 }
 /**
  * LIU: Finds the longest substring that fits a given width composed of subunits returned by a
  * BreakIterator. If the smallest subunit is too long, returns 0.
  *
  * @param fMtr metrics to use
  * @param line the string to be fix into width
  * @param width line.substring(0, result) must be <= width
  * @param breaker the BreakIterator that will be used to find subunits
  * @return maximum characters, at boundaries returned by breaker, that fit into width, or zero on
  *     failure
  */
 private int findFittingBreak(FontMetrics fMtr, String line, int width, BreakIterator breaker) {
   breaker.setText(line);
   int last = breaker.first();
   int end = breaker.next();
   while (end != BreakIterator.DONE && visibleWidth(fMtr, line.substring(0, end)) <= width) {
     last = end;
     end = breaker.next();
   }
   return last;
 }
  static void parseWordDataQ1(String inputText) {

    Locale currentLocale = new Locale("en", "US");
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(currentLocale);

    ArrayList<Integer> alist = findSentenceBoundaries(inputText, sentenceIterator);

    BreakIterator wordIterator = BreakIterator.getWordInstance(currentLocale);

    HashMap<String, WordTuple> wordList = findWordsInSentences(inputText, wordIterator, alist);
    printProcessedData(alist, wordList);
  }
  public static String convertStringToTitleCase(String toConvert) {
    BreakIterator wordBreaker = BreakIterator.getWordInstance();
    wordBreaker.setText(toConvert);
    int end;

    String word = "";
    for (int start = wordBreaker.first();
        (end = wordBreaker.next()) != BreakIterator.DONE;
        start = end) {

      word += StringProcessing.wordToTitleCase(toConvert.substring(start, end));
    }

    return word;
  }
Beispiel #11
0
 public static void javaBreakIterator() {
   BreakIterator wordIterator = BreakIterator.getWordInstance();
   String text = "Let's pause, and then reflect.";
   wordIterator.setText(text);
   int boundary = wordIterator.first();
   while (boundary != BreakIterator.DONE) {
     int begin = boundary;
     System.out.print(boundary + "-");
     boundary = wordIterator.next();
     int end = boundary;
     if (end == BreakIterator.DONE) break;
     System.out.print(boundary + " [" + text.substring(begin, end) + "];");
   }
   System.out.println();
 }
Beispiel #12
0
 static String extractShortDescription(String description) {
   if (description == null) {
     return null;
   }
   int dot = description.indexOf(".");
   if (dot != -1) {
     BreakIterator breakIterator = BreakIterator.getSentenceInstance(Locale.US);
     breakIterator.setText(description);
     String text = description.substring(breakIterator.first(), breakIterator.next()).trim();
     return removeSpaceBetweenLine(text);
   } else {
     String[] lines = description.split(NEW_LINE);
     return lines[0].trim();
   }
 }
  static ArrayList<Integer> findSentenceBoundaries(String target, BreakIterator iterator) {

    ArrayList<Integer> sentenceBoundaryList = new ArrayList<Integer>();
    iterator.setText(target);
    int boundary = iterator.first();

    while (boundary != BreakIterator.DONE) {
      boundary = iterator.next();
      if (boundary != -1) {
        sentenceBoundaryList.add(boundary);
      }
    }

    return sentenceBoundaryList;
  }
 /*     */ public LineBreakMeasurer(
     AttributedCharacterIterator paramAttributedCharacterIterator,
     FontRenderContext paramFontRenderContext)
       /*     */ {
   /* 277 */ this(
       paramAttributedCharacterIterator, BreakIterator.getLineInstance(), paramFontRenderContext);
   /*     */ }
Beispiel #15
0
  private static void assertObsolete(String newCode, String oldCode, String displayName) {
    // Either code should get you the same locale.
    Locale newLocale = new Locale(newCode);
    Locale oldLocale = new Locale(oldCode);
    assertEquals(newLocale, oldLocale);

    // No matter what code you used to create the locale, you should get the old code back.
    assertEquals(oldCode, newLocale.getLanguage());
    assertEquals(oldCode, oldLocale.getLanguage());

    // Check we get the right display name.
    assertEquals(displayName, newLocale.getDisplayLanguage(newLocale));
    assertEquals(displayName, oldLocale.getDisplayLanguage(newLocale));
    assertEquals(displayName, newLocale.getDisplayLanguage(oldLocale));
    assertEquals(displayName, oldLocale.getDisplayLanguage(oldLocale));

    // Check that none of the 'getAvailableLocales' methods are accidentally returning two
    // equal locales (because to ICU they're different, but we mangle one into the other).
    assertOnce(newLocale, BreakIterator.getAvailableLocales());
    assertOnce(newLocale, Calendar.getAvailableLocales());
    assertOnce(newLocale, Collator.getAvailableLocales());
    assertOnce(newLocale, DateFormat.getAvailableLocales());
    assertOnce(newLocale, DateFormatSymbols.getAvailableLocales());
    assertOnce(newLocale, NumberFormat.getAvailableLocales());
    assertOnce(newLocale, Locale.getAvailableLocales());
  }
Beispiel #16
0
  public void setCharSequence(CharSequence charSequence, int start, int end) {
    mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
    final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);

    mString = charSequence.toString().substring(mOffsetShift, windowEnd);
    mIterator.setText(mString);
  }
  static HashMap<String, WordTuple> findWordsInSentences(
      String target, BreakIterator wordIterator, ArrayList<Integer> sentences) {

    HashMap<String, WordTuple> wordMap = new HashMap<String, WordTuple>();

    wordIterator.setText(target);
    int start = wordIterator.first();
    int end = wordIterator.next();

    while (end != BreakIterator.DONE) {
      String word = target.substring(start, end);
      if (Character.isLetterOrDigit(word.charAt(0))) {
        // System.out.println(word);
        // System.out.println(start + "-" + end);
        // check which sentence the word is in by comparing end with values in sentences
        int sentenceNo = 0;
        for (int i = 0; i < sentences.size(); i++) {
          if (end <= sentences.get(i)) {
            sentenceNo = i;
            break;
          }
        }
        // lowercase the word
        String wordLc = word.toLowerCase();
        // check if word exists in hashmap
        if (wordMap.containsKey(wordLc)) {
          // if exists, add sentence number to word's list in hashmap
          WordTuple wordTuple = wordMap.get(wordLc);
          ArrayList<Integer> sentenceList = wordTuple.getSentenceList();
          sentenceList.add(sentenceNo);
          wordMap.put(wordLc, wordTuple);
        } else {
          // if it does not exist, create list, add sentence number to list, and add list to hashmap
          // with word as key
          ArrayList<Integer> sentenceList = new ArrayList<Integer>();
          sentenceList.add(sentenceNo);
          WordTuple wordTuple = new WordTuple();
          wordTuple.setSentenceList(sentenceList);
          wordMap.put(wordLc, wordTuple);
        }
      }
      start = end;
      end = wordIterator.next();
    }
    return wordMap;
  }
 /**
  * Returns the position of next boundary after the given offset. Returns {@code DONE} if there is
  * no boundary after the given offset.
  *
  * @param offset the given start position to search from.
  * @return the position of the last boundary preceding the given offset.
  */
 public int nextBoundary(int offset) {
   int shiftedOffset = offset - mOffsetShift;
   shiftedOffset = mIterator.following(shiftedOffset);
   if (shiftedOffset == BreakIterator.DONE) {
     return BreakIterator.DONE;
   }
   return shiftedOffset + mOffsetShift;
 }
 /**
  * Returns the position of boundary preceding the given offset or {@code DONE} if the given offset
  * specifies the starting position.
  *
  * @param offset the given start position to search from.
  * @return the position of the last boundary preceding the given offset.
  */
 public int prevBoundary(int offset) {
   int shiftedOffset = offset - mOffsetShift;
   shiftedOffset = mIterator.preceding(shiftedOffset);
   if (shiftedOffset == BreakIterator.DONE) {
     return BreakIterator.DONE;
   }
   return shiftedOffset + mOffsetShift;
 }
 private static void checkIsBoundaryException(BreakIterator bi, int offset) {
   try {
     bi.isBoundary(offset);
   } catch (IllegalArgumentException e) {
     return; // OK
   }
   throw new RuntimeException(bi + ": isBoundary() doesn't throw an IAE with offset " + offset);
 }
Beispiel #21
0
  /**
   * If <code>offset</code> is within a word, returns the index of the last character of that word
   * plus one, otherwise returns BreakIterator.DONE.
   *
   * <p>The offsets that are considered to be part of a word are the indexes of its characters,
   * <i>as well as</i> the index of its last character plus one. If offset is the index of a low
   * surrogate character, BreakIterator.DONE will be returned.
   *
   * <p>Valid range for offset is [0..textLength] (note the inclusive upper bound). The returned
   * value is within [offset..textLength] or BreakIterator.DONE.
   *
   * @throws IllegalArgumentException is offset is not valid.
   */
  public int getEnd(int offset) {
    final int shiftedOffset = offset - mOffsetShift;
    checkOffsetIsValid(shiftedOffset);

    if (isAfterLetterOrDigit(shiftedOffset)) {
      if (mIterator.isBoundary(shiftedOffset)) {
        return shiftedOffset + mOffsetShift;
      } else {
        return mIterator.following(shiftedOffset) + mOffsetShift;
      }
    } else {
      if (isOnLetterOrDigit(shiftedOffset)) {
        return mIterator.following(shiftedOffset) + mOffsetShift;
      }
    }
    return BreakIterator.DONE;
  }
 private static void checkPrecedingException(BreakIterator bi, int offset) {
   try {
     bi.preceding(offset);
   } catch (IllegalArgumentException e) {
     return; // OK
   }
   throw new RuntimeException(bi + ": preceding() doesn't throw an IAE with offset " + offset);
 }
Beispiel #23
0
 /**
  * set / update the text of the displayLabels. these are the Week column headers above the days on
  * the Calendar part of the <code>CDateTime</code>.
  */
 private void updateDaysOfWeek() {
   if (dayPanel != null) {
     Calendar tmpcal = cdt.getCalendarInstance();
     tmpcal.set(Calendar.DAY_OF_WEEK, tmpcal.getFirstDayOfWeek());
     Locale locale = cdt.getLocale();
     boolean ltr =
         (ComponentOrientation.getOrientation(locale).isLeftToRight()
             && !locale.getLanguage().equals("zh")); // $NON-NLS-1$
     BreakIterator iterator = BreakIterator.getCharacterInstance(locale);
     for (int x = 0; x < dayLabels.length; x++) {
       String str = getFormattedDate("E", tmpcal.getTime()); // $NON-NLS-1$
       if (dayLabels[x].getData(CDT.Key.Compact, Boolean.class)) {
         iterator.setText(str);
         int start, end;
         if (ltr) {
           start = iterator.first();
           end = iterator.next();
         } else {
           end = iterator.last();
           start = iterator.previous();
         }
         dayLabels[x].setText(str.substring(start, end));
       } else {
         dayLabels[x].setText(str);
       }
       tmpcal.add(Calendar.DAY_OF_WEEK, 1);
     }
   }
 }
Beispiel #24
0
  public void test(TestHarness harness) {
    // Just to be explicit: we're only testing the US locale here.
    Locale loc = Locale.US;
    Locale.setDefault(loc);

    BreakIterator bi = BreakIterator.getLineInstance(loc);

    String[] r1 = {"How ", "much ", "time ", "is ", "left?  ", "We ", "don't ", "know."};
    check("How much", "How much time is left?  We don't know.", r1, bi, harness);
  }
  @Test
  public void testSentenceDetection() {
    BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(Locale.US);

    sentenceIterator.setText(TEST_STRING);

    int start = sentenceIterator.first();
    int end = -1;

    List<String> sentenceList = new ArrayList<String>();

    while ((end = sentenceIterator.next()) != BreakIterator.DONE) {
      String sentence = TEST_STRING.substring(start, end);
      start = end;
      sentenceList.add(sentence);

      System.out.println("Sentence: " + sentence);
    }
  }
  public void setCharSequence(CharSequence charSequence, int start, int end) {
    mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
    final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);

    if (charSequence instanceof SpannableStringBuilder) {
      mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
    } else {
      mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
    }
    mIterator.setText(mString);
  }
  /**
   * Called to summarize a document when no hits were found. By default this just returns the first
   * {@code maxPassages} sentences; subclasses can override to customize.
   */
  protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
    // BreakIterator should be un-next'd:
    List<Passage> passages = new ArrayList<>();
    int pos = bi.current();
    assert pos == 0;
    while (passages.size() < maxPassages) {
      int next = bi.next();
      if (next == BreakIterator.DONE) {
        break;
      }
      Passage passage = new Passage();
      passage.score = Float.NaN;
      passage.startOffset = pos;
      passage.endOffset = next;
      passages.add(passage);
      pos = next;
    }

    return passages.toArray(new Passage[passages.size()]);
  }
 public DocumentWordTokenizer(Document document) {
   this.document = document;
   // Create a text segment over the etire document
   text = new Segment();
   sentanceIterator = BreakIterator.getSentenceInstance();
   try {
     document.getText(0, document.getLength(), text);
     sentanceIterator.setText(text);
     currentWordPos = getNextWordStart(text, 0);
     // If the current word pos is -1 then the string was all white space
     if (currentWordPos != -1) {
       currentWordEnd = getNextWordEnd(text, currentWordPos);
       nextWordPos = getNextWordStart(text, currentWordEnd);
     } else {
       moreTokens = false;
     }
   } catch (BadLocationException ex) {
     moreTokens = false;
   }
 }
Beispiel #29
0
  public static String wordWrap(String input, int width, Locale locale) {
    if (input == null) return "";
    if (width < 5) return input;
    if (width >= input.length()) return input;
    StringBuffer buf = new StringBuffer(input);
    boolean endOfLine = false;
    int lineStart = 0;
    for (int i = 0; i < buf.length(); i++) {
      if (buf.charAt(i) == '\n') {
        lineStart = i + 1;
        endOfLine = true;
      }
      if (i <= (lineStart + width) - 1) continue;
      if (!endOfLine) {
        int limit = i - lineStart - 1;
        BreakIterator breaks = BreakIterator.getLineInstance(locale);
        breaks.setText(buf.substring(lineStart, i));
        int end = breaks.last();
        if (end == limit + 1 && !Character.isWhitespace(buf.charAt(lineStart + end)))
          end = breaks.preceding(end - 1);
        if (end != -1 && end == limit + 1) {
          buf.replace(lineStart + end, lineStart + end + 1, "\n");
          lineStart += end;
          continue;
        }
        if (end != -1 && end != 0) {
          buf.insert(lineStart + end, '\n');
          lineStart = lineStart + end + 1;
        } else {
          buf.insert(i, '\n');
          lineStart = i + 1;
        }
      } else {
        buf.insert(i, '\n');
        lineStart = i + 1;
        endOfLine = false;
      }
    }

    return buf.toString();
  }
Beispiel #30
0
 /** {@inheritDoc} */
 public int following(int offset) {
   int shiftedOffset = offset - mOffsetShift;
   do {
     shiftedOffset = mIterator.following(shiftedOffset);
     if (shiftedOffset == BreakIterator.DONE) {
       return BreakIterator.DONE;
     }
     if (isAfterLetterOrDigit(shiftedOffset)) {
       return shiftedOffset + mOffsetShift;
     }
   } while (true);
 }