protected Map<String, String> getAttributes() { if (attributes != null) { return attributes; } attributes = new HashMap<String, String>(); matcher = ADDITIONAL_ATTRIBUTES_PATTERN.matcher(firstLine); if (matcher.find()) { String s; Matcher attributeMatcher; s = matcher.group(2); attributeMatcher = ADDITIONAL_ATTRIBUTE_PATTERN.matcher(s); while (attributeMatcher.find()) { String key; String value; key = attributeMatcher.group(1); value = attributeMatcher.group(2); attributes.put(key.toLowerCase(Locale.ENGLISH), value); } } return attributes; }
/** * Import an XQuery library module from the given document. The namespace and preferred prefix of * the module are extracted from the module itself. The MIME type of the document is set to * "application/xquery" as a side-effect. * * @param module the non-XML document that holds the library module's source * @return this service, to chain calls * @throws DatabaseException if the module is an XML document, or the module declaration cannot be * found at the top of the document */ public QueryService importModule(Document module) { if (module instanceof XMLDocument) throw new DatabaseException("module cannot be an XML document: " + module); Matcher matcher = MODULE_DECLARATION_DQUOTE.matcher(module.contentsAsString()); if (!matcher.find()) { matcher = MODULE_DECLARATION_SQUOTE.matcher(module.contentsAsString()); if (!matcher.find()) throw new DatabaseException("couldn't find a module declaration at the top of " + module); } module.metadata().setMimeType("application/xquery"); String moduleNamespace = matcher.group(1); // TODO: should do URILiteral processing here to replace entity and character references and // normalize // whitespace, but since it seems that eXist doesn't do it either (bug?) there's no reason to // rush. Document prevModule = moduleMap.get(moduleNamespace); if (prevModule != null && !prevModule.equals(module)) throw new DatabaseException( "module " + moduleNamespace + " already bound to " + prevModule + ", can't rebind to " + module); moduleMap.put(moduleNamespace, module); return this; }
public boolean checkCondition(String format, ScoreBoardEvent event) { boolean triggerCondition = true; Matcher m = conditionPattern.matcher(format); if (!m.find()) throw new IllegalArgumentException("No conditions in format : " + format); do { String specifier = m.group(1); String comparator = m.group(2); String targetValue = m.group(3); if (null == comparator || null == targetValue) continue; String value = scoreBoardValues.get(specifier).getValue(); if (triggerCondition) { triggerCondition = false; // If current trigger event value == previous value after processing // (e.g. conversion to min:sec) then ignore, to prevent multiple consecutive // identical triggers if (value.equals( scoreBoardValues.get(specifier).getPreviousValue(event.getPreviousValue()))) return false; } try { if (!checkConditionValue(value, comparator, targetValue)) return false; } catch (IllegalArgumentException iaE) { return false; } } while (m.find()); return true; }
public String parse(String format) { StringBuffer buffer = new StringBuffer(); Matcher m = formatPattern.matcher(format); while (m.find()) m.appendReplacement(buffer, getFormatSpecifierValue(m.group())); m.appendTail(buffer); return buffer.toString(); }
/** * Leave only the value for RCS keywords. * * <p>For example, <code>$Revision: 1.1 $</code> becomes <code>1.0</code>. */ public String replaceRcsKeywords(String text) { if (matcher == null) { matcher = Pattern.compile( "\\$(Author|Date|Header|Id|Locker|Log|Name|RCSFile|Revision|Source|State): (.+?) \\$") .matcher(text); } else { matcher.reset(text); } StringBuffer buffer = new StringBuffer(); while (matcher.find()) { String string = matcher.group(2); // For the Date: keyword, have a shot at reformatting string if ("Date".equals(matcher.group(1))) { try { DateFormat dateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss"); Date date = dateFormat.parse(string); string = date.toString(); } catch (ParseException e) { } // if we can't parse, return unchanged } matcher.appendReplacement(buffer, string); } matcher.appendTail(buffer); return buffer.toString(); }
/** * Get dependencies of a source file. * * @param path The canonical path of source file. * @return Path of dependencies. */ private ArrayList<String> getDependencies(String path) { if (!dependenceMap.containsKey(path)) { ArrayList<String> dependencies = new ArrayList<String>(); Matcher m = PATTERN_REQUIRE.matcher(read(path, charset)); while (m.find()) { // Decide which root path to use. // Path wrapped in <> is related to root path. // Path wrapped in "" is related to parent folder of the source file. String root = null; if (m.group(1).equals("<")) { root = this.root; } else { root = new File(path).getParent(); } // Get path of required file. String required = m.group(2); File f = new File(root, required); if (f.exists()) { dependencies.add(canonize(f)); } else { App.exit("Cannot find required file " + required + " in " + path); } } dependenceMap.put(path, dependencies); } return dependenceMap.get(path); }
private InstanceList readFile() throws IOException { String NL = System.getProperty("line.separator"); Scanner scanner = new Scanner(new FileInputStream(fileName), encoding); ArrayList<Pipe> pipeList = new ArrayList<Pipe>(); pipeList.add(new CharSequence2TokenSequence(Pattern.compile("\\p{L}\\p{L}+"))); pipeList.add(new TokenSequence2FeatureSequence()); InstanceList testing = new InstanceList(new SerialPipes(pipeList)); try { while (scanner.hasNextLine()) { String text = scanner.nextLine(); text = text.replaceAll("\\x0d", ""); Pattern patten = Pattern.compile("^(.*?),(.*?),(.*)$"); Matcher matcher = patten.matcher(text); if (matcher.find()) { docIds.add(matcher.group(1)); testing.addThruPipe(new Instance(matcher.group(3), null, "test instance", null)); } } } finally { scanner.close(); } return testing; }
/** * Takes a line and looks for an archive request tag. If one is found, the information from the * tag is returned as an <CODE>ArchiveRequest</CODE>. If a tag is found, the data in the tag is * stored in the <CODE>HashMap</CODE> provided. * * @param currentLine The line in which to look for the archive tag. * @param group The <CODE>ArchiveGroup</CODE> to which to add the <CODE>ArchiveRequest</CODE>. Can * be <CODE>null</CODE>. * @param template The <CODE>Template</CODE> to which the <CODE>ArchiveRequest</CODE> belongs. * @return The archive request tag data as an instance of <CODE>ArchiveRequest</CODE>, <CODE>null * </CODE> if the line passed in does not contain an arFile tag. */ private ArchiveRequest parseArchiveRequestTag( String currentLine, ArchiveGroup group, Template template) { ArchiveRequest archiveTag; Matcher currentMatcher = archiveRequestPattern.matcher(currentLine); if (currentMatcher.find()) { String fileName = currentMatcher.group(1); archiveTag = template.getArchiveRequest(fileName); if (archiveTag == null) { archiveTag = new ArchiveRequest(fileName); template.addArchiveRequest(archiveTag); } if (group != null) { // Creating a new request object because the requests // in the groups flagged as not in database will mean the // group - request association is not in database. Requests // in the archiveRequests map flagged as not in database // means the request record is not there. String requestFileName = archiveTag.getFileName(); String requestFileLocation = archiveTag.getFileLocation(); ArchiveRequest groupRequest = new ArchiveRequest(requestFileLocation, requestFileName); group.addArchiveRequest(groupRequest); } } else archiveTag = null; return archiveTag; }
@Override public void processingInstruction(String target, String data) throws SAXException { _logger.fine("Processing Instruction " + target); _logger.fine("Processing Instruction data: " + data); if (target.equals("assemble")) { if (!_stack.isEmpty()) { ElementInfo element = _stack.get(_stack.size() - 1); Matcher matcher = PROCESSING_INSTRUCTION.matcher(data); while (matcher.find()) { if (matcher.groupCount() == 2) { String name = matcher.group(1); if (name.charAt(0) == '@') { element.inst.put(name, matcher.group(2)); } else { element.args.add(guessUntypedValue(name, matcher.group(2))); } _logger.fine( "Processing Instruction for " + element.data.getClass() + "\n\ttarget = " + target + "\n\t" + name + "=" + matcher.group(2)); } } } } }
protected RichText.Part text(PState s, String text, Map<? extends Attribute, ?> attrs) throws IOException { RichText.Part ret = null; int p = 0; while (true) { Matcher m = urlpat.matcher(text); if (!m.find(p)) break; URL url; try { String su = text.substring(m.start(), m.end()); if (su.indexOf(':') < 0) su = "http://" + su; url = new URL(su); } catch (java.net.MalformedURLException e) { p = m.end(); continue; } RichText.Part lead = new RichText.TextPart(text.substring(0, m.start()), attrs); if (ret == null) ret = lead; else ret.append(lead); Map<Attribute, Object> na = new HashMap<Attribute, Object>(attrs); na.putAll(urlstyle); na.put(ChatAttribute.HYPERLINK, new FuckMeGentlyWithAChainsaw(url)); ret.append(new RichText.TextPart(text.substring(m.start(), m.end()), na)); p = m.end(); } if (ret == null) ret = new RichText.TextPart(text, attrs); else ret.append(new RichText.TextPart(text.substring(p), attrs)); return (ret); }
public synchronized String format(String message) { Matcher matcher = variablePattern.matcher(message); while (matcher.find()) { String variable = matcher.group(); variable = variable.substring(1); if (variable.startsWith("{") && variable.endsWith("}")) variable = variable.substring(1, variable.length() - 1); String value = variables.get(variable); if (value == null) value = ""; message = message.replaceFirst(Pattern.quote(matcher.group()), Matcher.quoteReplacement(value)); } matcher = colorPattern.matcher(message); while (matcher.find()) message = message.substring(0, matcher.start()) + "\247" + message.substring(matcher.end() - 1); return message; }
private List<String> convertFollowupPatientIds(List<String> patientIds) { List<String> convertedPatientIds = new ArrayList<String>(); for (String patientId : patientIds) { Matcher patientIdMatcher = FOLLOW_UP_PATIENT_ID_REGEX.matcher(patientId); convertedPatientIds.add(patientIdMatcher.find() ? patientIdMatcher.group(1) : patientId); } return convertedPatientIds; }
public URLFinder(String text) { Matcher matcher = dfPattern.matcher(text); urls = new ArrayList<String>(); locations = new ArrayList<Integer[]>(); while (matcher.find()) { urls.add(matcher.group(1)); locations.add(new Integer[] {matcher.start(1), matcher.end(1)}); } }
public BasicSpellChecker(String file) throws IOException { BufferedReader in = new BufferedReader(new FileReader(file)); Pattern p = Pattern.compile("\\w+"); for (String temp = ""; temp != null; temp = in.readLine()) { Matcher m = p.matcher(temp.toLowerCase()); while (m.find()) nWords.put((temp = m.group()), nWords.containsKey(temp) ? nWords.get(temp) + 1 : 1); } in.close(); }
public String getResult() { if (result != null) { return result; } matcher = RESULT_PATTERN.matcher(firstLine); if (matcher.find()) { result = matcher.group(1); } return result; }
public int getStatus() { if (status != null) { return status; } matcher = STATUS_PATTERN.matcher(firstLine); if (matcher.find()) { status = Integer.parseInt(matcher.group(1)); } return status; }
/** * Removes all sections (both header and content) with the given sectionName * * @param sectionName the name of the section (case insensitive) to remove. * @param markup the markup to be stripped * @return the stripped markup */ public static String stripSection(String markup, String sectionName) { Pattern p = Pattern.compile( "(={2,})\\s*" + sectionName + "\\s*\\1.*?([^=]\\1[^=])", Pattern.CASE_INSENSITIVE + Pattern.DOTALL); Matcher m = p.matcher(markup); StringBuffer sb = new StringBuffer(); int lastIndex = 0; while (m.find()) { sb.append(markup.substring(lastIndex, m.start())); sb.append(m.group(2)); lastIndex = m.end(); } sb.append(markup.substring(lastIndex)); markup = sb.toString(); // if this was the last section in the doc, then it won't be discarded because we can't tell // where it ends. // best we can do is delete the title and the paragraph below it. p = Pattern.compile( "(={2,})\\s*" + sectionName + "\\s*\\1\\W*.*?\n\n", Pattern.CASE_INSENSITIVE + Pattern.DOTALL); m = p.matcher(markup); sb = new StringBuffer(); lastIndex = 0; while (m.find()) { sb.append(markup.substring(lastIndex, m.start())); lastIndex = m.end() - 2; } sb.append(markup.substring(lastIndex)); return sb.toString(); }
/* * This is comlicated. MOST AUs have articles that live below and issue level TOC * that is, * <blah>/<journal_id>/vol#/iss#/ is a toc with no relevant metadata * <blah>/<journal_id>/vol#/iss#/xxx is an article with metadata * (eg Economist Voice V1) * BUT * in some AUs there are issues with only 1 article, in which case * <blah>/<journal_id>/vol#/iss#/ is an abstract with metadata * (eg Rhodes Cook V4) * and a few AUs with a mixture * (eg Forum for Health Economics V5) * So to identify ALL articles, we'll also have to capture issue level items and then look * at the html and if it has article metadata in it, count it as an article. * */ @Override protected ArticleFiles createArticleFiles(CachedUrl cu) { String url = cu.getUrl(); Matcher mat = pattern.matcher(url); if (mat.find()) { // we matched, but could this pattern potentially be a toc? Matcher tocmat = TOC_pattern.matcher(url); // if we could be a TOC then we must have metadata to be considered an article if (tocmat.find()) { if (hasArticleMetadata(cu)) { return processUrl(cu, mat); } } else { // we're not a potential TOC, so treat this as an article without checking return processUrl(cu, mat); } return null; // this was a TOC, not an article } log.warning("Mismatch between article iterator factory and article iterator: " + url); return null; }
private static List<String> loadAccounts(String fileName) { List<String> accounts = new ArrayList<String>(); try { Pattern pattern = Pattern.compile("[\\w]{1,16}"); BufferedReader reader = new BufferedReader(new FileReader(new File(fileName))); String line; while ((line = reader.readLine()) != null) { Matcher matcher = pattern.matcher(line); if (!matcher.find()) continue; String username = matcher.group(); if (!matcher.find()) continue; String password = matcher.group(); accounts.add(username + ":" + password); } reader.close(); } catch (Exception exception) { throw new RuntimeException(exception); } System.out.println("Loaded " + accounts.size() + " accounts."); return accounts; }
/** * Strips all non-article links from the given markup; anything like [[this]] is removed unless it * goes to a wikipedia article, redirect, or disambiguation page. * * @param markup the text to be stripped * @return the stripped text */ public static String stripIsolatedLinks(String markup) { Vector<Integer> linkStack = new Vector<Integer>(); Pattern p = Pattern.compile("(\\[\\[|\\]\\])"); Matcher m = p.matcher(markup); StringBuffer sb = new StringBuffer(); int lastIndex = 0; while (m.find()) { String tag = markup.substring(m.start(), m.end()); if (tag.equals("[[")) linkStack.add(m.start()); else { if (!linkStack.isEmpty()) { int linkStart = linkStack.lastElement(); linkStack.remove(linkStack.size() - 1); if (linkStack.isEmpty()) { sb.append(markup.substring(lastIndex, linkStart)); // we have the whole link, with other links nested inside if it's an image String linkMarkup = markup.substring(linkStart + 2, m.start()); // System.out.println(" - " + linkStart + ", " + m.end() + ", " + markup.length()) ; if (markup.substring(Math.max(0, linkStart - 10), linkStart).matches("(?s).*(\\W*)\n") && (m.end() >= markup.length() - 1 || markup .substring(m.end(), Math.min(markup.length() - 1, m.end() + 10)) .matches("(?s)(\\W*)(\n.*|$)"))) { // discarding link } else { sb.append("[["); sb.append(linkMarkup); sb.append("]]"); } lastIndex = m.end(); } } } } if (!linkStack.isEmpty()) System.err.println( "MarkupStripper | Warning: links were not well formed, so we cannot guarantee that they were stripped out correctly. "); sb.append(markup.substring(lastIndex)); return sb.toString(); }
void findRemoveDirectives(boolean clean) { // if ( clean ) editor.startCompoundEdit(); Sketch sketch = editor.getSketch(); for (int i = 0; i < sketch.getCodeCount(); i++) { SketchCode code = sketch.getCode(i); String program = code.getProgram(); StringBuffer buffer = new StringBuffer(); Matcher m = pjsPattern.matcher(program); while (m.find()) { String mm = m.group(); // TODO this urgently needs tests .. /* remove framing */ mm = mm.replaceAll("^\\/\\*\\s*@pjs", "").replaceAll("\\s*\\*\\/\\s*$", ""); /* fix multiline nice formatting */ mm = mm.replaceAll("[\\s]*([^;\\s\\n\\r]+)[\\s]*,[\\s]*[\\n\\r]+", "$1,"); /* fix multiline version without semicolons */ mm = mm.replaceAll("[\\s]*([^;\\s\\n\\r]+)[\\s]*[\\n\\r]+", "$1;"); mm = mm.replaceAll("\n", " ").replaceAll("\r", " "); // System.out.println(mm); if (clean) { m.appendReplacement(buffer, ""); } else { String[] directives = mm.split(";"); for (String d : directives) { // System.out.println(d); parseDirective(d); } } } if (clean) { m.appendTail(buffer); // TODO: not working! code.setProgram(buffer.toString()); code.setModified(true); } } if (clean) { // editor.stopCompoundEdit(); editor.setText(sketch.getCurrentCode().getProgram()); sketch.setModified(false); sketch.setModified(true); } }
/** * Takes a line and looks for an archive group tag. If one is found, the information from the tag * is returned as an <CODE>ArchiveGroup</CODE>. If a tag is found, the data in the tag is stored * in the <CODE>HashMap</CODE> provided. * * @param currentLine The line in which to look for the archive tag. * @param template The <CODE>Template</CODE> to which the <CODE>ArchiveGroup</CODE> belongs. * @return The archive request tag data as an instance of <CODE>ArchiveGroup</CODE>, <CODE>null * </CODE> if the line passed in does not contain an arFile tag. */ private ArchiveGroup parseArchiveGroupTag(String currentLine, Template template) { ArchiveGroup archiveTag; Matcher currentMatcher = archiveRequestPattern.matcher(currentLine); if (currentMatcher.find()) { String fileName = currentMatcher.group(1); archiveTag = template.getArchiveGroup(fileName); if (archiveTag == null) { File groupFile = new File(fileName); archiveTag = new ArchiveGroup(groupFile.getParent(), groupFile.getName()); template.addArchiveGroup(archiveTag); } } else archiveTag = null; return archiveTag; }
private static List<String> findQuotedArguments(String command) { List<String> arguments = new ArrayList<>(); Pattern pattern = Pattern.compile("\".*?\""); Matcher matcher = pattern.matcher(command); while (matcher.find()) { String argument = command.substring(matcher.start(), matcher.end()); argument = unquote(argument); argument = argument.trim(); arguments.add(argument); } return arguments; }
private String presub(String query, Object[] params) { if (params == null) return query; StringBuffer buf = new StringBuffer(); Matcher matcher = PRE_SUB_PATTERN.matcher(query); while (matcher.find()) { matcher.appendReplacement( buf, ((String) params[Integer.parseInt(matcher.group(1)) - 1]) .replace("\\", "\\\\") .replace("$", "\\$")); } matcher.appendTail(buf); return buf.toString(); }
public void run() { // each file is processed into a local hash table and then merged with the global results // this will cause much less contention on the global table, but still avoids a sequential // update Hashtable<String, Integer> local_results = new Hashtable<String, Integer>(WordCountJ.HASH_SIZE, WordCountJ.LF); // grab a file to work on String cf; while ((cf = files.poll()) != null) { try { BufferedReader input = new BufferedReader(new FileReader(cf)); String text; // well go line-by-line... maybe this is not the fastest while ((text = input.readLine()) != null) { // parse words Matcher matcher = pattern.matcher(text); while (matcher.find()) { String word = matcher.group(1); if (local_results.containsKey(word)) { local_results.put(word, 1 + local_results.get(word)); } else { local_results.put(word, 1); } } } input.close(); } catch (Exception e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); return; } // merge local hashmap with shared one,could have a // seperate thread do this but that might be cheating Iterator<Map.Entry<String, Integer>> updates = local_results.entrySet().iterator(); while (updates.hasNext()) { Map.Entry<String, Integer> kv = updates.next(); String k = kv.getKey(); Integer v = kv.getValue(); synchronized (results) { if (results.containsKey(k)) { results.put(k, v + results.get(k)); } else { results.put(k, v); } } } local_results.clear(); } }
/** Set this news object using the news specified by newsFile */ public void setNews(String newsFile) throws IOException { Pattern pDocId = Pattern.compile("<DOC-ID>(.*)</DOC-ID>"); Pattern pTitle = Pattern.compile("<TITLE>(.*)</TITLE>"); Pattern pTimestamp = Pattern.compile("<TIMESTAMP>(.*)</TIMESTAMP>"); Pattern pContent = Pattern.compile("<CONTENT>(.*)</CONTENT>"); Matcher m = null; BufferedReader br = new BufferedReader(new FileReader(newsFile)); String line = ""; String news = ""; while ((line = br.readLine()) != null) { news += line; } // get news metadata and content m = pDocId.matcher(news); if (m.find()) { this.docId = m.group(1); } m = pTitle.matcher(news); if (m.find()) { this.title = m.group(1); } m = pTimestamp.matcher(news); if (m.find()) { this.timestamp = m.group(1); } m = pContent.matcher(news); if (m.find()) { this.content = m.group(1); } }
/** * Removes all section headers. * * @param markup the text to be stripped * @return the stripped markup */ public static String stripHeadings(String markup) { Pattern p = Pattern.compile("(={2,})([^=]+)(\\1)"); Matcher m = p.matcher(markup); StringBuffer sb = new StringBuffer(); int lastIndex = 0; while (m.find()) { sb.append(markup.substring(lastIndex, m.start())); lastIndex = m.end(); } sb.append(markup.substring(lastIndex)); return sb.toString(); }
public String getExtra() { if (getStatus() != SC_SUCCESS) { return null; } if (extraCreated) { return extra; } matcher = PARENTHESIS_PATTERN.matcher(firstLine); if (matcher.find()) { extra = matcher.group(1); } extraCreated = true; return extra; }
private void processResponse(InputStream response) throws IOException { Pattern p = Pattern.compile("<cdbId>\\s*(\\d+)"); BufferedReader in = new BufferedReader(new InputStreamReader(response)); String line = in.readLine(); while (line != null) { System.out.println(line); Matcher m = p.matcher(line); if (m.find()) { String idText = m.group(1); if (idText != null) { cdbId = Integer.parseInt(idText); } } line = in.readLine(); } }
/** * Replaces instances of Java reserved words that could not appear in a valid Java condition or * Java variable name that are being used as variable names in string. * * @param string the string in which the Java reserved words should be replaced. * @return string with the Java reserved words replaced with a substitute names. */ private static String replaceReservedWords(String string) { // cheap hack so that pattern never need to look for a key word at // the beginning or end of string. That way one may simplify the pattern // to looking for a reserved word that is not prefixed or suffix with a // letter or number. string = "(" + string + ")"; for (int i = 0; i < reservedWords.length; i++) { String reservedWord = reservedWords[i]; Pattern p = Pattern.compile("([\\W])(" + reservedWord + ")([\\W])"); Matcher m = p.matcher(string); while (m.find()) { string = m.replaceFirst(m.group(1) + "daikon" + reservedWord + m.group(3)); m = p.matcher(string); } } return string.substring(1, string.length() - 1); }