private String cleanString(String string) { string.trim(); String patt = "'"; Pattern r = Pattern.compile(patt); StringBuilder sb = new StringBuilder(string); Matcher m = r.matcher(sb); int start = 0; String replString = "\\'"; while (m.find(start)) { sb.replace(m.start(), m.end(), "\\'"); start = m.start() + replString.length(); } patt = "\n"; r = Pattern.compile(patt); m = r.matcher(sb); start = 0; replString = "\' + char(13) + char(10) + \'"; while (m.find(start)) { sb.replace(m.start(), m.end(), replString); start = m.start() + replString.length(); } return sb.toString(); }
/** * Applies the size filter * * @param sb message to filter * @param front true if apply front sizer, false for back sizer * @param player whether or not it is for a player */ private void filterSizer(StringBuilder sb, boolean front, boolean player) { Pattern regex = front ? EXPAND_FRONT : EXPAND_BACK; Matcher match = regex.matcher(sb); int size = sb.length(); while (match.find()) { int playerSize = Integer.parseInt(match.group(2)); int consoleSize = Integer.parseInt(match.group(3)); String string = match.group(1); if (player) { sb.replace( match.start() + sb.length() - size, match.end(), (TextSizer.measureString(string) > playerSize - 2 ? string : TextSizer.expand(string, playerSize, front))); } else { sb.replace( match.start() + sb.length() - size, match.end(), (string.length() > consoleSize ? string : TextSizer.expandConsole(string, consoleSize, front))); } } }
/** Index initialization */ private void initialIndex() { index = new HashMap<String, ArrayList<IndexWord>>(); int indexSpeech = 0; for (Speech speech : subtitle) { String text = speech.content; Matcher matcherTag = Pattern.compile("<[^>]*>").matcher(text); while (matcherTag.find()) { String fill = repeatChar(' ', matcherTag.group().length()); text = text.substring(0, matcherTag.start()) + fill + text.substring(matcherTag.end(), text.length()); } Pattern patternWord = Pattern.compile("[\\p{L}']{3,}"); Matcher matcherWord = patternWord.matcher(text); while (matcherWord.find()) { String word = matcherWord.group(); String stem = Stemmator.stemmingWord(word.toLowerCase(), language); if (!index.containsKey(stem)) index.put(stem, new ArrayList<IndexWord>()); IndexWord indexWord = new IndexWord(word, stem, indexSpeech, matcherWord.start(), matcherWord.end()); index.get(stem).add(indexWord); } indexSpeech++; } }
// priorityHelper parses the string and sets the Task's importance private static boolean priorityHelper(Task task) { String inputText = task.getValue(Task.TITLE); String[] importanceStrings = { "()((^|[^\\w!])!+|(^|[^\\w!])!\\d)($|[^\\w!])", "()(?i)((\\s?bang){1,})$", "(?i)(\\spriority\\s?(\\d)$)", "(?i)(\\sbang\\s?(\\d)$)", "(?i)()(\\shigh(est)?|\\slow(est)?|\\stop|\\sleast) ?priority$" }; boolean result = false; for (String importanceString : importanceStrings) { Pattern importancePattern = Pattern.compile(importanceString); while (true) { Matcher m = importancePattern.matcher(inputText); if (m.find()) { result = true; task.setValue(Task.IMPORTANCE, strToPriority(m.group(2).trim())); int start = m.start() == 0 ? 0 : m.start() + 1; inputText = inputText.substring(0, start) + inputText.substring(m.end()); } else { break; } } } task.setValue(Task.TITLE, inputText.trim()); return result; }
private Multiplier multiplier(String expression, int offset) { Matcher matcher = MULTIPLIER_PREFIX_PATTERN.matcher(expression); if (!matcher.find()) { return null; } if (matcher.start() > 0) { fail(offset + matcher.start(), "illegal multiplier position"); } Matcher digitMatcher = DIGIT_PATTERN.matcher(expression); if (!digitMatcher.find()) { return null; } String digitStr = expression.substring(0, digitMatcher.end()); int number = 0; try { number = Integer.parseInt(digitStr); } catch (NumberFormatException e) { fail(offset, e); } if (number <= 0) { fail(offset, "illegal 0 multiplier"); } String subexpression = expression.substring(matcher.end(), expression.length() - 1); return new Multiplier(number, subexpression, matcher.end()); }
public boolean addImages(Context context, Spannable spannable) { Pattern refImg = Pattern.compile("\\Q[img src=\\E([a-zA-Z0-9_]+?)\\Q/]\\E"); boolean hasChanges = false; Matcher matcher = refImg.matcher(spannable); while (matcher.find()) { boolean set = true; for (ImageSpan span : spannable.getSpans(matcher.start(), matcher.end(), ImageSpan.class)) { if (spannable.getSpanStart(span) >= matcher.start() && spannable.getSpanEnd(span) <= matcher.end()) { spannable.removeSpan(span); } else { set = false; break; } } String resname = spannable.subSequence(matcher.start(1), matcher.end(1)).toString().trim(); int id = context.getResources().getIdentifier(resname, "drawable", context.getPackageName()); Drawable icon = context.getResources().getDrawable(id); // ,this.getTheme()); icon.setBounds(0, 0, tv_test1.getLineHeight(), tv_test1.getLineHeight()); if (set) { hasChanges = true; spannable.setSpan( new ImageSpan(icon, ImageSpan.ALIGN_BASELINE), matcher.start(), matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } } return hasChanges; }
/** 对spanableString进行正则判断,如果符合要求,则以表情图片代替 */ public static void dealExpression( Context context, SpannableString spannableString, Pattern patten, int start) throws Exception { Matcher matcher = patten.matcher(spannableString); while (matcher.find()) { String key = matcher.group(); Log.d("Key", key); if (matcher.start() < start) { continue; } Field field = R.drawable.class.getDeclaredField( "emoji_" + key.substring(key.indexOf("]") + 1, key.lastIndexOf("["))); int resId = Integer.parseInt(field.get(null).toString()); if (resId != 0) { Bitmap bitmap = BitmapFactory.decodeResource(context.getResources(), resId); ImageSpan imageSpan = new ImageSpan(bitmap); int end = matcher.start() + key.length(); spannableString.setSpan( imageSpan, matcher.start(), end, Spannable.SPAN_INCLUSIVE_EXCLUSIVE); if (end < spannableString.length()) { dealExpression(context, spannableString, patten, end); } break; } } }
/** * replace existing spannable with smiles * * @param context * @param spannable * @return */ public static boolean addSmiles(Context context, Spannable spannable) { boolean hasChanges = false; for (Entry<Pattern, Integer> entry : emoticons.entrySet()) { Matcher matcher = entry.getKey().matcher(spannable); while (matcher.find()) { boolean set = true; for (ImageSpan span : spannable.getSpans(matcher.start(), matcher.end(), ImageSpan.class)) if (spannable.getSpanStart(span) >= matcher.start() && spannable.getSpanEnd(span) <= matcher.end()) spannable.removeSpan(span); else { set = false; break; } if (set) { hasChanges = true; spannable.setSpan( new ImageSpan(context, entry.getValue()), matcher.start(), matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } } } return hasChanges; }
protected int continueSearch(Matcher matcher, int pos, boolean forward) { boolean hasNext = false; int start = 0, end = 0; if (!forward) { while (matcher.find()) { hasNext = true; start = matcher.start(); end = matcher.end(); } } else { hasNext = matcher.find(); if (!hasNext) { return -1; } start = matcher.start(); end = matcher.end(); } if (hasNext) { Document doc = getDocument(); getCaret().setDot(pos + end); getCaret().moveDot(pos + start); getCaret().setSelectionVisible(true); return pos + start; } return -1; }
/** * 关于微博的文本处理: @、##、http://t.cn/.... * * @param tv */ public static void textFormat(TextView tv) { Spannable sp = (Spannable) tv.getText(); String text = tv.getText().toString(); Matcher m1 = Pattern.compile("http://t.cn/[a-zA-Z0-9]+").matcher(text); Matcher m2 = Pattern.compile("@[^:||^:||\\s]+").matcher(text); Matcher m3 = Pattern.compile("#[^#]+#").matcher(text); // 下面使用正则表达式 while (m1.find()) { int start, end; start = m1.start(); end = m1.end(); sp.setSpan( new ForegroundColorSpan(0xff444444), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } while (m2.find()) { int start, end; start = m2.start(); end = m2.end(); sp.setSpan( new ForegroundColorSpan(0xff444444), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } while (m3.find()) { int start, end; start = m3.start(); end = m3.end(); sp.setSpan( new ForegroundColorSpan(0xff444444), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE); } }
@Test public void testCurrentVersionStructureAgainstGold() throws Throwable { int latestVersion = CatalogVersions.getCurrentVersion().getSchemaVersion(); String[] lastKnown = UpgradeTestUtils.getGoldVersion(latestVersion); String[] current = generateCatalog(); Pattern thanksHibernate = Pattern.compile("(alter table )(\\w+)( add index )(\\w+)(.*add constraint )(\\w+)(.*)"); assertEquals("number of stmts in schema ddl", lastKnown.length, current.length - 1); for (int i = 0; i < current.length - 1; i++) { if (!lastKnown[i].equals(current[i])) { // accept differences in index, constraint names if (lastKnown[i].startsWith("alter table") && current[i].startsWith("alter table")) { Matcher last = thanksHibernate.matcher(lastKnown[i]); Matcher now = thanksHibernate.matcher(current[i]); if (last.matches() && now.matches()) { String knownConstraint = lastKnown[i].substring(last.start(6), last.end(6)); String knownIndex = lastKnown[i].substring(last.start(4), last.end(4)); StringBuilder buf = new StringBuilder(); buf.append(current[i].substring(0, now.start(4))); buf.append(knownIndex); buf.append(current[i].substring(now.end(4), now.start(6))); buf.append(knownConstraint); buf.append(current[i].substring(now.end(6))); String munged = buf.toString(); if (lastKnown[i].equals(munged)) continue; } } assertEquals("schema ddl stmt " + i, lastKnown[i], current[i]); } } }
public String replaceTokens(String text, List<String> values) { StringBuilder output = new StringBuilder(); Matcher tokenMatcher = tokenPattern.matcher(text); int cursor = 0; while (tokenMatcher.find()) { // A token is defined as a sequence of the format "{...}". // A key is defined as the content between the brackets. int tokenStart = tokenMatcher.start(); int tokenEnd = tokenMatcher.end(); int keyStart = tokenMatcher.start(1); int keyEnd = tokenMatcher.end(1); output.append(text.substring(cursor, tokenStart)); String token = text.substring(tokenStart, tokenEnd); String key = text.substring(keyStart, keyEnd); Integer index = Integer.parseInt(key); if (values.size() >= index) { String value = values.get(index); output.append(value); } else { output.append(token); } cursor = tokenEnd; } output.append(text.substring(cursor)); return output.toString(); }
public static Spannable getSmiledText(Context context, CharSequence text) { Spannable spannable = spannableFactory.newSpannable(text); for (Entry entry : ANDROID_EMOTICONS.entrySet()) { Matcher matcher = ((Pattern) entry.getKey()).matcher(spannable); while (matcher.find()) { for (Object obj : (ImageSpan[]) spannable.getSpans(matcher.start(), matcher.end(), ImageSpan.class)) { if (spannable.getSpanStart(obj) < matcher.start() || spannable.getSpanEnd(obj) > matcher.end()) { Object obj2 = null; break; } spannable.removeSpan(obj); } int i = 1; if (obj2 != null) { spannable.setSpan( new ImageSpan(context, ((Integer) entry.getValue()).intValue()), matcher.start(), matcher.end(), 33); } } } return spannable; }
/** * @param desc * @return */ private String extractClassification(String desc) { String result = desc; Matcher mat = classificationPattern.matcher(desc); boolean matched = false; if (mat.find()) { matched = true; String category = mat.group(2); result = desc.substring(0, mat.start()) + desc.substring(mat.end()); if (debug) System.out.println(" Category: " + category); } mat = Pattern.compile(",? ?(classified .*)").matcher(desc); if (mat.find()) { // strip out the excess text regardless // XXX overwriting previous result. result = desc.substring(0, mat.start()) + desc.substring(mat.end()); if (!matched) { // if we didn't previously match a sub-set of the clause, record the category String category = mat.group(1); if (debug) System.out.println(" Category: " + category); } } mat = Pattern.compile("text too brief to classify,? ?").matcher(result); if (mat.find()) { result = result.substring(0, mat.start()) + result.substring(mat.end()); if (debug) System.out.println(" Category: text too brief to classify"); } return result; }
/** * @param desc * @return */ private String extractContents(String desc) { Pattern p = Pattern.compile("((?:an?|the) (.*?)(?: of (.*))?)(?:,|\\s*located)"); Pattern p2 = Pattern.compile("((incomplete .*?)(?: of (.*))?)(?:,|\\s*located)"); String result = desc; Matcher mat = p.matcher(desc); Matcher mat2 = p2.matcher(desc); mat = mat.find() ? mat : mat2.find() ? mat2 : null; if (mat != null) { String matchedText = mat.group(1); int start = mat.start(); int end = (matchedText.indexOf("located") >= 0) ? end = mat.start(1) + matchedText.indexOf("located") : mat.end(1); String contents = desc.substring(mat.start(1), end); result = desc.substring(0, start) + desc.substring(end); contents = StringUtils.trimToEmpty(contents); if (contents.lastIndexOf(",") == contents.length() - 1) { contents = contents.substring(0, contents.length() - 1); } ms.setContents(contents); LOGGER.debug(" Contents: " + contents); } return result; }
/** recursive method that finds the matches. */ private void findMatch() { int startLine; int startColumn; int endLine; int endColumn; boolean foundMatch; boolean ignore = false; foundMatch = mMatcher.find(); if (!foundMatch && !mIllegalPattern && (mMatchCount == 0)) { logMessage(0); } else if (foundMatch) { startLine = (mCharacters.get(mMatcher.start()))[0].intValue(); startColumn = (mCharacters.get(mMatcher.start()))[1].intValue(); endLine = (mCharacters.get(mMatcher.end() - 1))[0].intValue(); endColumn = (mCharacters.get(mMatcher.end() - 1))[1].intValue(); if (mIgnoreComments) { final FileContents theFileContents = getFileContents(); ignore = theFileContents.hasIntersectionWithComment(startLine, startColumn, endLine, endColumn); } if (!ignore) { mMatchCount++; if (mIllegalPattern || (mCheckForDuplicates && ((mMatchCount - 1) > mDuplicateLimit))) { mErrorCount++; logMessage(startLine); } } if ((mErrorCount < mErrorLimit) && (ignore || mIllegalPattern || mCheckForDuplicates)) { findMatch(); } } }
private static String filter(String str) { if (str.length() > 63) { throw new IllegalArgumentException( str + " is too long to be a " + "valid PostgreSQL name. By default names must be shorter " + "than 64, but it has " + str.length() + " characters"); } Pattern quotesPattern = Pattern.compile("(\"+)"); Matcher matcher = quotesPattern.matcher(str); while (matcher.find()) { if (((matcher.end() - matcher.start()) & 1) == 1) { // lenght is uneven throw new IllegalArgumentException( "The name '" + str + "' is" + "illegal because contains an open quote at " + matcher.start()); } } return str; }
private Pattern toPattern(final String pattern) { final StringBuilder sb = new StringBuilder(); sb.append('^'); final Matcher m = SPLIT_PATTERN.matcher(pattern); int lastEnd = 0; while (m.find()) { if (lastEnd < m.start()) { sb.append(Pattern.quote(pattern.substring(lastEnd, m.start()))); } final String matched = pattern.substring(m.start(), m.end()); if ("*".equals(matched)) { sb.append(".*"); } else if ("?".equals(matched)) { sb.append("."); } else { throw new IllegalStateException("Wildcard character does not match * nor ?"); } lastEnd = m.end(); } if (lastEnd < pattern.length()) { sb.append(Pattern.compile(pattern.substring(lastEnd))); } sb.append('$'); return Pattern.compile(sb.toString()); }
private static String processHtml(final String source, StringBuilder errorMessages) { if (M_evilTags == null) init(); // normalize all variants of the "<br>" HTML tag to be "<br />\n" // TODO call a method to do this in each process routine String Html = M_patternTagBr.matcher(source).replaceAll("<br />"); // process text and tags StringBuilder buf = new StringBuilder(); if (Html != null) { try { int start = 0; Matcher m = M_patternTag.matcher(Html); // if there are no tags, return as is if (!m.find()) return Html; m.reset(Html); // if there are tags, make sure they are safe while (m.find()) { // append text that isn't part of a tag if (m.start() > start) buf.append(Html.substring(start, m.start())); start = m.end(); buf.append(checkTag(m.group(), errorMessages)); } // tail if (Html.length() > start) buf.append(Html.substring((start))); } catch (Exception e) { M_log.warn("FormattedText.processEscapedHtml M_patternTag.matcher(Html):", e); } } return new String(buf.toString()); }
public static List getNcMLElements(String path, Document doc) { // XPath doesn't support default namespaces, so we add nc as a prefix for the tags within the // namespace!!! if (!path.startsWith(NS_PREFIX_ON_TAG) && !path.startsWith("/")) path = NS_PREFIX_ON_TAG + path; Pattern pattern = Pattern.compile("/\\w"); Matcher matcher = pattern.matcher(path); StringBuilder sb = new StringBuilder(); int currentChar = 0; while (matcher.find()) { sb.append(path.substring(currentChar, matcher.start() - currentChar + 1)); if (!sb.toString().endsWith("/")) sb.append("/"); sb.append(NS_PREFIX_ON_TAG); currentChar = matcher.start() + 1; } sb.append(path.substring(currentChar, path.length())); XPath xpath; try { xpath = XPath.newInstance(sb.toString()); xpath.addNamespace(NS_PREFIX, doc.getRootElement().getNamespaceURI()); return xpath.selectNodes(doc); } catch (JDOMException e) { e.printStackTrace(); } return null; }
public String documentedJson(String json) { JSONReaderImpl jsonReader = new JSONReaderImpl(); JsonKeyFinder keyFinder = new JsonKeyFinder(jsonReader); jsonReader.setContentHandler(keyFinder); try { jsonReader.parse(json); } catch (SAJException e) { // TODO Auto-generated catch block e.printStackTrace(); } StringBuffer documentedJson = new StringBuffer(); Pattern pattern = Pattern.compile("\\\"([a-zA-Z_]+)\\\"\\:"); Matcher matcher = pattern.matcher(json); int afterLastMatch = 0; while (matcher.find()) { String dotNotatedKey = keyFinder.getKeys().get(matcher.start() + matcher.group(0).length() - 1); String keyDoc = docReference.getDocumentationForKey(dotNotatedKey); // after last match to beginning of this one: documentedJson.append(json.substring(afterLastMatch, matcher.start())); // documented key if (keyDoc != null) { documentedJson.append(surroundWith.getDocumentedKey(matcher.group(0), keyDoc)); } else documentedJson.append(matcher.group(0)); afterLastMatch = matcher.start() + matcher.group(0).length(); } documentedJson.append(json.substring(afterLastMatch)); return documentedJson.toString(); }
private void replace(Reader in, Writer out, boolean refs) throws IOException { final String template = IoUtils.read(in); final Matcher matcher = refStart.matcher(template); int matchPos = 0; int appendPos = 0; while (matcher.find(matchPos)) { final String name = matcher.group(1); if (!snippets.containsKey(name)) { throw new IllegalArgumentException("Snippet '" + name + "' not defined."); } if (refs) { out.write(template.substring(appendPos, matcher.start())); matchPos = appendPos = matcher.end(); } else { out.write(template.substring(appendPos, matcher.end())); appendPos = template.indexOf(refEnd, matcher.end()); if (appendPos < 0) { throw new IllegalArgumentException( "No refEnd marker found for refStart '" + template.substring(matcher.start(), matcher.end()) + "'"); } matchPos = appendPos + refEnd.length(); } out.write(prefix); out.write(snippets.get(name)); out.write(postfix); } out.write(template.substring(appendPos)); }
private String addDataRights(String content, String classification, Artifact artifact) { String toReturn = content; PageOrientation orientation = WordRendererUtil.getPageOrientation(artifact); DataRightInput request = new DataRightInput(); request.addData(artifact.getGuid(), classification, orientation, 0); DataRightProvider provider = new DataRightProviderImpl(); DataRightResult dataRights = provider.getDataRights(request); String footer = dataRights.getContent(artifact.getGuid(), orientation); Matcher startFtr = START_PATTERN.matcher(footer); Matcher endFtr = END_PATTERN.matcher(footer); if (startFtr.find() && endFtr.find()) { ChangeSet ftrCs = new ChangeSet(footer); ftrCs.delete(0, startFtr.end()); ftrCs.delete(endFtr.start(), footer.length()); footer = ftrCs.applyChangesToSelf().toString(); } startFtr.reset(content); endFtr.reset(content); ChangeSet cs = new ChangeSet(content); while (startFtr.find()) { if (endFtr.find()) { cs.replace(startFtr.end(), endFtr.start(), footer); } } toReturn = cs.applyChangesToSelf().toString(); return toReturn; }
public Token getWordAt(int offs, Pattern p) { Token word = null; try { Element line = getParagraphElement(offs); if (line == null) { return word; } int lineStart = line.getStartOffset(); int lineEnd = Math.min(line.getEndOffset(), getLength()); Segment seg = new Segment(); getText(lineStart, lineEnd - lineStart, seg); if (seg.count > 0) { // we need to get the word using the words pattern p Matcher m = p.matcher(seg); int o = offs - lineStart; while (m.find()) { if (m.start() <= o && o <= m.end()) { word = new Token(TokenType.DEFAULT, m.start() + lineStart, m.end() - m.start()); break; } } } } catch (BadLocationException ex) { Logger.getLogger(SyntaxDocument.class.getName()).log(Level.SEVERE, null, ex); } finally { return word; } }
public static void main(String[] args) throws PatternSyntaxException { Scanner in = new Scanner(System.in); System.out.println("Enter pattern: "); String patternString = in.nextLine(); Pattern pattern = Pattern.compile(patternString); while (true) { System.out.println("Enter string to match: "); String input = in.nextLine(); if (input == null || input.equals("")) return; Matcher matcher = pattern.matcher(input); if (matcher.matches()) { System.out.println("Match"); int g = matcher.groupCount(); if (g > 0) { for (int i = 0; i < input.length(); i++) { // Print any empty groups for (int j = 1; j <= g; j++) if (i == matcher.start(j) && i == matcher.end(j)) System.out.print("()"); // Print ( for non-empty groups starting here for (int j = 1; j <= g; j++) if (i == matcher.start(j) && i != matcher.end(j)) System.out.print('('); System.out.print(input.charAt(i)); // Print ) for non-empty groups ending here for (int j = 1; j <= g; j++) if (i + 1 != matcher.start(j) && i + 1 == matcher.end(j)) System.out.print(')'); } System.out.println(); } } else System.out.println("No match"); } }
private void splitHead(String intro) { Pattern pattern = Pattern.compile("^[0-9]{0,2}(\\. ){0,1}" + intro + "$", Pattern.MULTILINE); Matcher matcher = pattern.matcher(fullText); if (matcher.find()) { if (debug) logger.info("Found " + intro + " at " + matcher.end()); head = fullText.substring(0, matcher.start()); body = fullText.substring(matcher.start()); } else { // try "...." after Abstract if (debug) logger.info("Trying to find abstract"); Pattern abstractPattern = Pattern.compile("\\s[0-9]*Abstract\\s"); Matcher abstractMatcher = abstractPattern.matcher(fullText); int abstractOffSet = 0; if (abstractMatcher.find()) { if (debug) logger.info("Found Abstract"); abstractOffSet = abstractMatcher.end(); Pattern pointPattern = Pattern.compile("\\.{4,}"); Matcher pointMatcher = pointPattern.matcher(fullText); while (pointMatcher.find()) { if (pointMatcher.end() > abstractOffSet) { head = fullText.substring(0, pointMatcher.end()); body = fullText.substring(pointMatcher.end()); return; } } head = fullText.substring(0, abstractMatcher.start()); body = fullText.substring(abstractMatcher.start()); } // Apparently abstract wasn't divided by points splitByHeading(); } }
public void prepareRequireSearch(final String file) { // if an extension is specified, try more targetted searches if (file.lastIndexOf('.') > file.lastIndexOf('/')) { Matcher matcher = null; if ((matcher = sourcePattern.matcher(file)).find()) { // source extensions suffixType = SuffixType.Source; // trim extension to try other options searchFile = file.substring(0, matcher.start()); } else if ((matcher = extensionPattern.matcher(file)).find()) { // extension extensions suffixType = SuffixType.Extension; // trim extension to try other options searchFile = file.substring(0, matcher.start()); } else { // unknown extension, fall back to search with extensions suffixType = SuffixType.Both; searchFile = file; } } else { // try all extensions suffixType = SuffixType.Both; searchFile = file; } }
/** * Method to split a text by headings. As of now we assume a Heading has a leading number followed * by a whitespace character and some text beginning with upper case letters, such as "3 Related * Work" */ private void splitByHeading() { Pattern pattern = Pattern.compile("^[0-9]*\\s[A-Z].*", Pattern.MULTILINE); Matcher matcher; int add = 0; // try to find headings after abstract Pattern abstractPattern = Pattern.compile("^(Abstract).{0,5}$", Pattern.MULTILINE); Matcher abstractMatcher = abstractPattern.matcher(body); if (abstractMatcher.find()) { add = abstractMatcher.end(); matcher = pattern.matcher(body.substring(abstractMatcher.end())); } else { matcher = pattern.matcher(body); } if (matcher.find()) { // found at least once if (debug) logger.info( "Splitting by heading at " + add + matcher.start() + " which is the heading: " + matcher.group()); head = body.substring(0, add + matcher.start()); body = body.substring(add + matcher.start()); } }
/** Returns the places of possible breaks between sentences. */ private static List<BreakPosition> getBreaks(String paragraph, Rule rule) { List<BreakPosition> res = new ArrayList<BreakPosition>(); Matcher bbm = null; if (rule.getBeforebreak() != null) bbm = rule.getCompiledBeforebreak().matcher(paragraph); Matcher abm = null; if (rule.getAfterbreak() != null) abm = rule.getCompiledAfterbreak().matcher(paragraph); if (bbm == null && abm == null) return res; if (abm != null) if (!abm.find()) return res; if (bbm == null) bbm = DEFAULT_BEFOREBREAK_PATTERN.matcher(paragraph); while (bbm.find()) { int bbe = bbm.end(); if (abm == null) res.add(new BreakPosition(bbe, rule)); else { int abs = abm.start(); while (abs < bbe) { boolean found = abm.find(); if (!found) return res; abs = abm.start(); } if (abs == bbe) res.add(new BreakPosition(bbe, rule)); } } return res; }
public static String interpret(String source) { StringBuilder result = new StringBuilder(); Matcher matcher = PATTERN_VARIABLE.matcher(source); int current = 0; while (matcher.find()) { if (matcher.group(1) == null) { String var = matcher.group(3); String value = System.getProperty(var); if (value != null) { result.append(source, current, matcher.start()); result.append(value); current = matcher.end(); } } else { result.append(source, current, matcher.start()); current = matcher.start(2); } } if (current < source.length()) { result.append(source, current, source.length()); } return result.toString(); }