Exemple #1
  private String cleanString(String string) {
    String patt = "'";
    Pattern r = Pattern.compile(patt);

    StringBuilder sb = new StringBuilder(string);
    Matcher m = r.matcher(sb);
    int start = 0;
    String replString = "\\'";
    while (m.find(start)) {

      sb.replace(m.start(), m.end(), "\\'");
      start = m.start() + replString.length();

    patt = "\n";
    r = Pattern.compile(patt);
    m = r.matcher(sb);
    start = 0;
    replString = "\' + char(13) + char(10) + \'";
    while (m.find(start)) {
      sb.replace(m.start(), m.end(), replString);
      start = m.start() + replString.length();
    return sb.toString();
  * Applies the size filter
  * @param sb message to filter
  * @param front true if apply front sizer, false for back sizer
  * @param player whether or not it is for a player
 private void filterSizer(StringBuilder sb, boolean front, boolean player) {
   Pattern regex = front ? EXPAND_FRONT : EXPAND_BACK;
   Matcher match = regex.matcher(sb);
   int size = sb.length();
   while (match.find()) {
     int playerSize = Integer.parseInt(match.group(2));
     int consoleSize = Integer.parseInt(match.group(3));
     String string = match.group(1);
     if (player) {
           match.start() + sb.length() - size,
           (TextSizer.measureString(string) > playerSize - 2
               ? string
               : TextSizer.expand(string, playerSize, front)));
     } else {
           match.start() + sb.length() - size,
           (string.length() > consoleSize
               ? string
               : TextSizer.expandConsole(string, consoleSize, front)));
Exemple #3
  /** Index initialization */
  private void initialIndex() {
    index = new HashMap<String, ArrayList<IndexWord>>();
    int indexSpeech = 0;
    for (Speech speech : subtitle) {
      String text = speech.content;
      Matcher matcherTag = Pattern.compile("<[^>]*>").matcher(text);
      while (matcherTag.find()) {
        String fill = repeatChar(' ', matcherTag.group().length());
        text =
            text.substring(0, matcherTag.start())
                + fill
                + text.substring(matcherTag.end(), text.length());

      Pattern patternWord = Pattern.compile("[\\p{L}']{3,}");
      Matcher matcherWord = patternWord.matcher(text);
      while (matcherWord.find()) {
        String word = matcherWord.group();
        String stem = Stemmator.stemmingWord(word.toLowerCase(), language);
        if (!index.containsKey(stem)) index.put(stem, new ArrayList<IndexWord>());
        IndexWord indexWord =
            new IndexWord(word, stem, indexSpeech, matcherWord.start(), matcherWord.end());
Exemple #4
  // priorityHelper parses the string and sets the Task's importance
  private static boolean priorityHelper(Task task) {
    String inputText = task.getValue(Task.TITLE);
    String[] importanceStrings = {
      "(?i)()(\\shigh(est)?|\\slow(est)?|\\stop|\\sleast) ?priority$"
    boolean result = false;
    for (String importanceString : importanceStrings) {
      Pattern importancePattern = Pattern.compile(importanceString);
      while (true) {
        Matcher m = importancePattern.matcher(inputText);
        if (m.find()) {
          result = true;
          task.setValue(Task.IMPORTANCE, strToPriority(m.group(2).trim()));
          int start = m.start() == 0 ? 0 : m.start() + 1;
          inputText = inputText.substring(0, start) + inputText.substring(m.end());

        } else {
    task.setValue(Task.TITLE, inputText.trim());
    return result;
 private Multiplier multiplier(String expression, int offset) {
   Matcher matcher = MULTIPLIER_PREFIX_PATTERN.matcher(expression);
   if (!matcher.find()) {
     return null;
   if (matcher.start() > 0) {
     fail(offset + matcher.start(), "illegal multiplier position");
   Matcher digitMatcher = DIGIT_PATTERN.matcher(expression);
   if (!digitMatcher.find()) {
     return null;
   String digitStr = expression.substring(0, digitMatcher.end());
   int number = 0;
   try {
     number = Integer.parseInt(digitStr);
   } catch (NumberFormatException e) {
     fail(offset, e);
   if (number <= 0) {
     fail(offset, "illegal 0 multiplier");
   String subexpression = expression.substring(matcher.end(), expression.length() - 1);
   return new Multiplier(number, subexpression, matcher.end());
Exemple #6
  public boolean addImages(Context context, Spannable spannable) {
    Pattern refImg = Pattern.compile("\\Q[img src=\\E([a-zA-Z0-9_]+?)\\Q/]\\E");
    boolean hasChanges = false;

    Matcher matcher = refImg.matcher(spannable);
    while (matcher.find()) {
      boolean set = true;
      for (ImageSpan span : spannable.getSpans(matcher.start(), matcher.end(), ImageSpan.class)) {
        if (spannable.getSpanStart(span) >= matcher.start()
            && spannable.getSpanEnd(span) <= matcher.end()) {
        } else {
          set = false;
      String resname = spannable.subSequence(matcher.start(1), matcher.end(1)).toString().trim();
      int id = context.getResources().getIdentifier(resname, "drawable", context.getPackageName());
      Drawable icon = context.getResources().getDrawable(id); // ,this.getTheme());
      icon.setBounds(0, 0, tv_test1.getLineHeight(), tv_test1.getLineHeight());
      if (set) {
        hasChanges = true;
            new ImageSpan(icon, ImageSpan.ALIGN_BASELINE),
    return hasChanges;
 /** 对spanableString进行正则判断,如果符合要求,则以表情图片代替 */
 public static void dealExpression(
     Context context, SpannableString spannableString, Pattern patten, int start)
     throws Exception {
   Matcher matcher = patten.matcher(spannableString);
   while (matcher.find()) {
     String key = matcher.group();
     Log.d("Key", key);
     if (matcher.start() < start) {
     Field field =
             "emoji_" + key.substring(key.indexOf("]") + 1, key.lastIndexOf("[")));
     int resId = Integer.parseInt(field.get(null).toString());
     if (resId != 0) {
       Bitmap bitmap = BitmapFactory.decodeResource(context.getResources(), resId);
       ImageSpan imageSpan = new ImageSpan(bitmap);
       int end = matcher.start() + key.length();
           imageSpan, matcher.start(), end, Spannable.SPAN_INCLUSIVE_EXCLUSIVE);
       if (end < spannableString.length()) {
         dealExpression(context, spannableString, patten, end);
  * replace existing spannable with smiles
  * @param context
  * @param spannable
  * @return
 public static boolean addSmiles(Context context, Spannable spannable) {
   boolean hasChanges = false;
   for (Entry<Pattern, Integer> entry : emoticons.entrySet()) {
     Matcher matcher = entry.getKey().matcher(spannable);
     while (matcher.find()) {
       boolean set = true;
       for (ImageSpan span : spannable.getSpans(matcher.start(), matcher.end(), ImageSpan.class))
         if (spannable.getSpanStart(span) >= matcher.start()
             && spannable.getSpanEnd(span) <= matcher.end()) spannable.removeSpan(span);
         else {
           set = false;
       if (set) {
         hasChanges = true;
             new ImageSpan(context, entry.getValue()),
   return hasChanges;
Exemple #9
 protected int continueSearch(Matcher matcher, int pos, boolean forward) {
   boolean hasNext = false;
   int start = 0, end = 0;
   if (!forward) {
     while (matcher.find()) {
       hasNext = true;
       start = matcher.start();
       end = matcher.end();
   } else {
     hasNext = matcher.find();
     if (!hasNext) {
       return -1;
     start = matcher.start();
     end = matcher.end();
   if (hasNext) {
     Document doc = getDocument();
     getCaret().setDot(pos + end);
     getCaret().moveDot(pos + start);
     return pos + start;
   return -1;
Exemple #10
   * 关于微博的文本处理: @、##、http://t.cn/....
   * @param tv
  public static void textFormat(TextView tv) {

    Spannable sp = (Spannable) tv.getText();
    String text = tv.getText().toString();

    Matcher m1 = Pattern.compile("http://t.cn/[a-zA-Z0-9]+").matcher(text);
    Matcher m2 = Pattern.compile("@[^:||^:||\\s]+").matcher(text);
    Matcher m3 = Pattern.compile("#[^#]+#").matcher(text);

    // 下面使用正则表达式
    while (m1.find()) {
      int start, end;
      start = m1.start();
      end = m1.end();
          new ForegroundColorSpan(0xff444444), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);

    while (m2.find()) {
      int start, end;
      start = m2.start();
      end = m2.end();
          new ForegroundColorSpan(0xff444444), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);

    while (m3.find()) {
      int start, end;
      start = m3.start();
      end = m3.end();
          new ForegroundColorSpan(0xff444444), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
  public void testCurrentVersionStructureAgainstGold() throws Throwable {
    int latestVersion = CatalogVersions.getCurrentVersion().getSchemaVersion();
    String[] lastKnown = UpgradeTestUtils.getGoldVersion(latestVersion);
    String[] current = generateCatalog();
    Pattern thanksHibernate =
        Pattern.compile("(alter table )(\\w+)( add index )(\\w+)(.*add constraint )(\\w+)(.*)");
    assertEquals("number of stmts in schema ddl", lastKnown.length, current.length - 1);
    for (int i = 0; i < current.length - 1; i++) {
      if (!lastKnown[i].equals(current[i])) {
        // accept differences in index, constraint names
        if (lastKnown[i].startsWith("alter table") && current[i].startsWith("alter table")) {
          Matcher last = thanksHibernate.matcher(lastKnown[i]);
          Matcher now = thanksHibernate.matcher(current[i]);
          if (last.matches() && now.matches()) {
            String knownConstraint = lastKnown[i].substring(last.start(6), last.end(6));
            String knownIndex = lastKnown[i].substring(last.start(4), last.end(4));

            StringBuilder buf = new StringBuilder();
            buf.append(current[i].substring(0, now.start(4)));
            buf.append(current[i].substring(now.end(4), now.start(6)));
            String munged = buf.toString();
            if (lastKnown[i].equals(munged)) continue;
        assertEquals("schema ddl stmt " + i, lastKnown[i], current[i]);
Exemple #12
  public String replaceTokens(String text, List<String> values) {
    StringBuilder output = new StringBuilder();
    Matcher tokenMatcher = tokenPattern.matcher(text);

    int cursor = 0;
    while (tokenMatcher.find()) {
      // A token is defined as a sequence of the format "{...}".
      // A key is defined as the content between the brackets.
      int tokenStart = tokenMatcher.start();
      int tokenEnd = tokenMatcher.end();
      int keyStart = tokenMatcher.start(1);
      int keyEnd = tokenMatcher.end(1);

      output.append(text.substring(cursor, tokenStart));

      String token = text.substring(tokenStart, tokenEnd);
      String key = text.substring(keyStart, keyEnd);

      Integer index = Integer.parseInt(key);

      if (values.size() >= index) {
        String value = values.get(index);
      } else {

      cursor = tokenEnd;

    return output.toString();
Exemple #13
 public static Spannable getSmiledText(Context context, CharSequence text) {
   Spannable spannable = spannableFactory.newSpannable(text);
   for (Entry entry : ANDROID_EMOTICONS.entrySet()) {
     Matcher matcher = ((Pattern) entry.getKey()).matcher(spannable);
     while (matcher.find()) {
       for (Object obj :
           (ImageSpan[]) spannable.getSpans(matcher.start(), matcher.end(), ImageSpan.class)) {
         if (spannable.getSpanStart(obj) < matcher.start()
             || spannable.getSpanEnd(obj) > matcher.end()) {
           Object obj2 = null;
       int i = 1;
       if (obj2 != null) {
             new ImageSpan(context, ((Integer) entry.getValue()).intValue()),
   return spannable;
Exemple #14
   * @param desc
   * @return
  private String extractClassification(String desc) {
    String result = desc;
    Matcher mat = classificationPattern.matcher(desc);

    boolean matched = false;
    if (mat.find()) {
      matched = true;
      String category = mat.group(2);
      result = desc.substring(0, mat.start()) + desc.substring(mat.end());
      if (debug) System.out.println("       Category: " + category);

    mat = Pattern.compile(",? ?(classified .*)").matcher(desc);
    if (mat.find()) {
      // strip out the excess text regardless
      // XXX overwriting previous result.
      result = desc.substring(0, mat.start()) + desc.substring(mat.end());

      if (!matched) {
        // if we didn't previously match a sub-set of the clause, record the category
        String category = mat.group(1);
        if (debug) System.out.println("       Category: " + category);

    mat = Pattern.compile("text too brief to classify,? ?").matcher(result);
    if (mat.find()) {
      result = result.substring(0, mat.start()) + result.substring(mat.end());
      if (debug) System.out.println("       Category: text too brief to classify");

    return result;
Exemple #15
   * @param desc
   * @return
  private String extractContents(String desc) {
    Pattern p = Pattern.compile("((?:an?|the) (.*?)(?: of (.*))?)(?:,|\\s*located)");
    Pattern p2 = Pattern.compile("((incomplete .*?)(?: of (.*))?)(?:,|\\s*located)");

    String result = desc;
    Matcher mat = p.matcher(desc);
    Matcher mat2 = p2.matcher(desc);
    mat = mat.find() ? mat : mat2.find() ? mat2 : null;

    if (mat != null) {
      String matchedText = mat.group(1);
      int start = mat.start();
      int end =
          (matchedText.indexOf("located") >= 0)
              ? end = mat.start(1) + matchedText.indexOf("located")
              : mat.end(1);

      String contents = desc.substring(mat.start(1), end);
      result = desc.substring(0, start) + desc.substring(end);

      contents = StringUtils.trimToEmpty(contents);
      if (contents.lastIndexOf(",") == contents.length() - 1) {
        contents = contents.substring(0, contents.length() - 1);
      LOGGER.debug("       Contents: " + contents);

    return result;
  /** recursive method that finds the matches. */
  private void findMatch() {
    int startLine;
    int startColumn;
    int endLine;
    int endColumn;
    boolean foundMatch;
    boolean ignore = false;

    foundMatch = mMatcher.find();
    if (!foundMatch && !mIllegalPattern && (mMatchCount == 0)) {
    } else if (foundMatch) {
      startLine = (mCharacters.get(mMatcher.start()))[0].intValue();
      startColumn = (mCharacters.get(mMatcher.start()))[1].intValue();
      endLine = (mCharacters.get(mMatcher.end() - 1))[0].intValue();
      endColumn = (mCharacters.get(mMatcher.end() - 1))[1].intValue();
      if (mIgnoreComments) {
        final FileContents theFileContents = getFileContents();
        ignore =
            theFileContents.hasIntersectionWithComment(startLine, startColumn, endLine, endColumn);
      if (!ignore) {
        if (mIllegalPattern || (mCheckForDuplicates && ((mMatchCount - 1) > mDuplicateLimit))) {
      if ((mErrorCount < mErrorLimit) && (ignore || mIllegalPattern || mCheckForDuplicates)) {
  private static String filter(String str) {
    if (str.length() > 63) {
      throw new IllegalArgumentException(
              + " is too long to be a "
              + "valid PostgreSQL name. By default names must be shorter "
              + "than 64, but it has "
              + str.length()
              + " characters");
    Pattern quotesPattern = Pattern.compile("(\"+)");
    Matcher matcher = quotesPattern.matcher(str);
    while (matcher.find()) {
      if (((matcher.end() - matcher.start()) & 1) == 1) { // lenght is uneven
        throw new IllegalArgumentException(
            "The name '"
                + str
                + "' is"
                + "illegal because contains an open quote at "
                + matcher.start());

    return str;
Exemple #18
  private Pattern toPattern(final String pattern) {
    final StringBuilder sb = new StringBuilder();

    final Matcher m = SPLIT_PATTERN.matcher(pattern);
    int lastEnd = 0;
    while (m.find()) {
      if (lastEnd < m.start()) {
        sb.append(Pattern.quote(pattern.substring(lastEnd, m.start())));

      final String matched = pattern.substring(m.start(), m.end());
      if ("*".equals(matched)) {
      } else if ("?".equals(matched)) {
      } else {
        throw new IllegalStateException("Wildcard character does not match * nor ?");
      lastEnd = m.end();

    if (lastEnd < pattern.length()) {


    return Pattern.compile(sb.toString());
  private static String processHtml(final String source, StringBuilder errorMessages) {
    if (M_evilTags == null) init();

    // normalize all variants of the "<br>" HTML tag to be "<br />\n"
    // TODO call a method to do this in each process routine
    String Html = M_patternTagBr.matcher(source).replaceAll("<br />");

    // process text and tags
    StringBuilder buf = new StringBuilder();
    if (Html != null) {
      try {
        int start = 0;
        Matcher m = M_patternTag.matcher(Html);

        // if there are no tags, return as is
        if (!m.find()) return Html;

        // if there are tags, make sure they are safe
        while (m.find()) {
          // append text that isn't part of a tag
          if (m.start() > start) buf.append(Html.substring(start, m.start()));
          start = m.end();

          buf.append(checkTag(m.group(), errorMessages));

        // tail
        if (Html.length() > start) buf.append(Html.substring((start)));
      } catch (Exception e) {
        M_log.warn("FormattedText.processEscapedHtml M_patternTag.matcher(Html):", e);
    return new String(buf.toString());
Exemple #20
  public static List getNcMLElements(String path, Document doc) {

    // XPath doesn't support default namespaces, so we add nc as a prefix for the tags within the
    // namespace!!!
    if (!path.startsWith(NS_PREFIX_ON_TAG) && !path.startsWith("/")) path = NS_PREFIX_ON_TAG + path;

    Pattern pattern = Pattern.compile("/\\w");
    Matcher matcher = pattern.matcher(path);

    StringBuilder sb = new StringBuilder();
    int currentChar = 0;
    while (matcher.find()) {

      sb.append(path.substring(currentChar, matcher.start() - currentChar + 1));
      if (!sb.toString().endsWith("/")) sb.append("/");
      currentChar = matcher.start() + 1;

    sb.append(path.substring(currentChar, path.length()));

    XPath xpath;
    try {

      xpath = XPath.newInstance(sb.toString());
      xpath.addNamespace(NS_PREFIX, doc.getRootElement().getNamespaceURI());
      return xpath.selectNodes(doc);

    } catch (JDOMException e) {


    return null;
  public String documentedJson(String json) {
    JSONReaderImpl jsonReader = new JSONReaderImpl();
    JsonKeyFinder keyFinder = new JsonKeyFinder(jsonReader);
    try {
    } catch (SAJException e) {
      // TODO Auto-generated catch block

    StringBuffer documentedJson = new StringBuffer();
    Pattern pattern = Pattern.compile("\\\"([a-zA-Z_]+)\\\"\\:");
    Matcher matcher = pattern.matcher(json);
    int afterLastMatch = 0;
    while (matcher.find()) {
      String dotNotatedKey =
          keyFinder.getKeys().get(matcher.start() + matcher.group(0).length() - 1);
      String keyDoc = docReference.getDocumentationForKey(dotNotatedKey);

      // after last match to beginning of this one:
      documentedJson.append(json.substring(afterLastMatch, matcher.start()));

      // documented key
      if (keyDoc != null) {
        documentedJson.append(surroundWith.getDocumentedKey(matcher.group(0), keyDoc));
      } else documentedJson.append(matcher.group(0));

      afterLastMatch = matcher.start() + matcher.group(0).length();

    return documentedJson.toString();
Exemple #22
 private void replace(Reader in, Writer out, boolean refs) throws IOException {
   final String template = IoUtils.read(in);
   final Matcher matcher = refStart.matcher(template);
   int matchPos = 0;
   int appendPos = 0;
   while (matcher.find(matchPos)) {
     final String name = matcher.group(1);
     if (!snippets.containsKey(name)) {
       throw new IllegalArgumentException("Snippet '" + name + "' not defined.");
     if (refs) {
       out.write(template.substring(appendPos, matcher.start()));
       matchPos = appendPos = matcher.end();
     } else {
       out.write(template.substring(appendPos, matcher.end()));
       appendPos = template.indexOf(refEnd, matcher.end());
       if (appendPos < 0) {
         throw new IllegalArgumentException(
             "No refEnd marker found for refStart '"
                 + template.substring(matcher.start(), matcher.end())
                 + "'");
       matchPos = appendPos + refEnd.length();
Exemple #23
  private String addDataRights(String content, String classification, Artifact artifact) {
    String toReturn = content;
    PageOrientation orientation = WordRendererUtil.getPageOrientation(artifact);
    DataRightInput request = new DataRightInput();
    request.addData(artifact.getGuid(), classification, orientation, 0);

    DataRightProvider provider = new DataRightProviderImpl();
    DataRightResult dataRights = provider.getDataRights(request);
    String footer = dataRights.getContent(artifact.getGuid(), orientation);

    Matcher startFtr = START_PATTERN.matcher(footer);
    Matcher endFtr = END_PATTERN.matcher(footer);
    if (startFtr.find() && endFtr.find()) {
      ChangeSet ftrCs = new ChangeSet(footer);
      ftrCs.delete(0, startFtr.end());
      ftrCs.delete(endFtr.start(), footer.length());
      footer = ftrCs.applyChangesToSelf().toString();

    ChangeSet cs = new ChangeSet(content);
    while (startFtr.find()) {
      if (endFtr.find()) {
        cs.replace(startFtr.end(), endFtr.start(), footer);
    toReturn = cs.applyChangesToSelf().toString();
    return toReturn;
 public Token getWordAt(int offs, Pattern p) {
   Token word = null;
   try {
     Element line = getParagraphElement(offs);
     if (line == null) {
       return word;
     int lineStart = line.getStartOffset();
     int lineEnd = Math.min(line.getEndOffset(), getLength());
     Segment seg = new Segment();
     getText(lineStart, lineEnd - lineStart, seg);
     if (seg.count > 0) {
       // we need to get the word using the words pattern p
       Matcher m = p.matcher(seg);
       int o = offs - lineStart;
       while (m.find()) {
         if (m.start() <= o && o <= m.end()) {
           word = new Token(TokenType.DEFAULT, m.start() + lineStart, m.end() - m.start());
   } catch (BadLocationException ex) {
     Logger.getLogger(SyntaxDocument.class.getName()).log(Level.SEVERE, null, ex);
   } finally {
     return word;
Exemple #25
  public static void main(String[] args) throws PatternSyntaxException {
    Scanner in = new Scanner(System.in);
    System.out.println("Enter pattern: ");
    String patternString = in.nextLine();

    Pattern pattern = Pattern.compile(patternString);

    while (true) {
      System.out.println("Enter string to match: ");
      String input = in.nextLine();
      if (input == null || input.equals("")) return;
      Matcher matcher = pattern.matcher(input);
      if (matcher.matches()) {
        int g = matcher.groupCount();
        if (g > 0) {
          for (int i = 0; i < input.length(); i++) {
            // Print any empty groups
            for (int j = 1; j <= g; j++)
              if (i == matcher.start(j) && i == matcher.end(j)) System.out.print("()");
            // Print ( for non-empty groups starting here
            for (int j = 1; j <= g; j++)
              if (i == matcher.start(j) && i != matcher.end(j)) System.out.print('(');
            // Print ) for non-empty groups ending here
            for (int j = 1; j <= g; j++)
              if (i + 1 != matcher.start(j) && i + 1 == matcher.end(j)) System.out.print(')');
      } else System.out.println("No match");
Exemple #26
 private void splitHead(String intro) {
   Pattern pattern = Pattern.compile("^[0-9]{0,2}(\\. ){0,1}" + intro + "$", Pattern.MULTILINE);
   Matcher matcher = pattern.matcher(fullText);
   if (matcher.find()) {
     if (debug) logger.info("Found " + intro + " at " + matcher.end());
     head = fullText.substring(0, matcher.start());
     body = fullText.substring(matcher.start());
   } else {
     // try "...." after Abstract
     if (debug) logger.info("Trying to find abstract");
     Pattern abstractPattern = Pattern.compile("\\s[0-9]*Abstract\\s");
     Matcher abstractMatcher = abstractPattern.matcher(fullText);
     int abstractOffSet = 0;
     if (abstractMatcher.find()) {
       if (debug) logger.info("Found Abstract");
       abstractOffSet = abstractMatcher.end();
       Pattern pointPattern = Pattern.compile("\\.{4,}");
       Matcher pointMatcher = pointPattern.matcher(fullText);
       while (pointMatcher.find()) {
         if (pointMatcher.end() > abstractOffSet) {
           head = fullText.substring(0, pointMatcher.end());
           body = fullText.substring(pointMatcher.end());
       head = fullText.substring(0, abstractMatcher.start());
       body = fullText.substring(abstractMatcher.start());
     // Apparently abstract wasn't divided by points
Exemple #27
    public void prepareRequireSearch(final String file) {
      // if an extension is specified, try more targetted searches
      if (file.lastIndexOf('.') > file.lastIndexOf('/')) {
        Matcher matcher = null;
        if ((matcher = sourcePattern.matcher(file)).find()) {
          // source extensions
          suffixType = SuffixType.Source;

          // trim extension to try other options
          searchFile = file.substring(0, matcher.start());
        } else if ((matcher = extensionPattern.matcher(file)).find()) {
          // extension extensions
          suffixType = SuffixType.Extension;

          // trim extension to try other options
          searchFile = file.substring(0, matcher.start());
        } else {
          // unknown extension, fall back to search with extensions
          suffixType = SuffixType.Both;
          searchFile = file;
      } else {
        // try all extensions
        suffixType = SuffixType.Both;
        searchFile = file;
Exemple #28
   * Method to split a text by headings. As of now we assume a Heading has a leading number followed
   * by a whitespace character and some text beginning with upper case letters, such as "3 Related
   * Work"
  private void splitByHeading() {
    Pattern pattern = Pattern.compile("^[0-9]*\\s[A-Z].*", Pattern.MULTILINE);
    Matcher matcher;
    int add = 0;
    // try to find headings after abstract
    Pattern abstractPattern = Pattern.compile("^(Abstract).{0,5}$", Pattern.MULTILINE);
    Matcher abstractMatcher = abstractPattern.matcher(body);
    if (abstractMatcher.find()) {
      add = abstractMatcher.end();
      matcher = pattern.matcher(body.substring(abstractMatcher.end()));
    } else {
      matcher = pattern.matcher(body);

    if (matcher.find()) {
      // found at least once
      if (debug)
            "Splitting by heading at "
                + add
                + matcher.start()
                + " which is the heading: "
                + matcher.group());
      head = body.substring(0, add + matcher.start());
      body = body.substring(add + matcher.start());
Exemple #29
  /** Returns the places of possible breaks between sentences. */
  private static List<BreakPosition> getBreaks(String paragraph, Rule rule) {
    List<BreakPosition> res = new ArrayList<BreakPosition>();

    Matcher bbm = null;
    if (rule.getBeforebreak() != null) bbm = rule.getCompiledBeforebreak().matcher(paragraph);
    Matcher abm = null;
    if (rule.getAfterbreak() != null) abm = rule.getCompiledAfterbreak().matcher(paragraph);

    if (bbm == null && abm == null) return res;

    if (abm != null) if (!abm.find()) return res;

    if (bbm == null) bbm = DEFAULT_BEFOREBREAK_PATTERN.matcher(paragraph);

    while (bbm.find()) {
      int bbe = bbm.end();
      if (abm == null) res.add(new BreakPosition(bbe, rule));
      else {
        int abs = abm.start();
        while (abs < bbe) {
          boolean found = abm.find();
          if (!found) return res;
          abs = abm.start();
        if (abs == bbe) res.add(new BreakPosition(bbe, rule));

    return res;
  public static String interpret(String source) {
    StringBuilder result = new StringBuilder();

    Matcher matcher = PATTERN_VARIABLE.matcher(source);

    int current = 0;
    while (matcher.find()) {
      if (matcher.group(1) == null) {
        String var = matcher.group(3);

        String value = System.getProperty(var);
        if (value != null) {
          result.append(source, current, matcher.start());
          current = matcher.end();
      } else {
        result.append(source, current, matcher.start());
        current = matcher.start(2);

    if (current < source.length()) {
      result.append(source, current, source.length());

    return result.toString();