public Object getProperty(String name, Document doc) { String rawdoc = doc.getContent(); String[] titles = org.apache.commons.lang3.StringUtils.substringsBetween( rawdoc, "<" + name + ">", "</" + name + ">"); return titles[0]; }
/** * Evaluate a string expression in the following example format "this is a test string with * {param_name_1} and {param_name_n}", where "param_name_..." will be resolved based on the * supplied map of parameter names to parameter values. * * @param expression The string expression to evaluate. * @param params Map of parameters which include the parameter names defined within the * expression. * @return Evaluate string. */ public static String evaluateExpression( final String expression, final Map<String, String> params) { String result = expression; if (StringUtils.isNotBlank(expression) && DYNAMIC_EXPRESSION_PATTERN.matcher(expression).find()) { final String[] variableNames = StringUtils.substringsBetween( expression, VARIABLE_EXPRESSION_START, VARIABLE_EXPRESSION_END); if (variableNames.length > 0) { for (final String variableName : variableNames) { if (StringUtils.isNotBlank(variableName)) { final String value = params.get(variableName); final StringBuilder variableExpression = new StringBuilder(); variableExpression .append(VARIABLE_EXPRESSION_REGEX_START) .append(variableName) .append(VARIABLE_EXPRESSION_REGEX_END); result = result.replaceAll( variableExpression.toString(), value != null ? value : StringUtils.EMPTY); } } } } return result; }
public Map<String, Object> getAllProperties(Document doc) { Map<String, Object> allProperties = new HashMap<String, Object>(); String rawdoc = doc.getContent(); List<String> properties = getPropertiesNames(); for (String propertyName : properties) { String propertyValue = org.apache.commons.lang3.StringUtils.substringsBetween( rawdoc, "<" + propertyName + ">", "</" + propertyName + ">")[0]; propertyValue = org.apache.commons.lang3.StringEscapeUtils.escapeXml(propertyValue); allProperties.put(propertyName, propertyValue); } return allProperties; }
private static Map<String, List<Integer>> readCluster(String fileName) throws FileNotFoundException { Map<String, List<Integer>> map = new HashMap<>(); Scanner src = new Scanner( new File( getFilePath() + "/MCLAlgorithm/ClusterResults/R-MCL and MCL test/" + fileName)); while (src.hasNext()) { String[] line = src.nextLine().split(":"); String key = line[0]; String[] temp = StringUtils.substringsBetween(line[1], "[", "]"); String[] value = temp[0].split(","); List<Integer> list = new ArrayList<>(); for (String value1 : value) { if (value != null && !value1.equals("")) { list.add(new Integer(value1.trim())); } } map.put(key, list); } return map; }
public static String getSourceName(String rawXML) { String[] titles = org.apache.commons.lang3.StringUtils.substringsBetween(rawXML, "<title>", "</title>"); return titles[1]; }
public static String[] getXMLItemList(String rawXML) { return org.apache.commons.lang3.StringUtils.substringsBetween(rawXML, "<item>", "</item>"); }
/** * Driver method * * @param args input file, output file, summary size */ public static void main(String[] args) { Instant start = Instant.now(); String articlesFromInput = ""; System.out.println("-- Reading Input File --"); try { articlesFromInput = FileUtils.readFileToString(new File(args[0]), "UTF-8"); } catch (IOException e) { System.out.println("-- Cannot parse input file --"); } System.out.println("-- Done Reading Input File --"); float summarySize = Float.parseFloat(args[2]); String[] tagContents = StringUtils.substringsBetween(articlesFromInput, "<article>", "</article"); List<String> articleContents = Arrays.asList(tagContents); // StringBuilder aggregatedSummaries = new StringBuilder(); System.out.println("-- Let's Process the Articles --"); int articleNumber = 1; for (String article : articleContents) { // article = removeGarbage(article); // remove garbage for current // // article System.out.println("Processing article " + articleNumber + "/" + articleContents.size()); /* Getting sentences from the current article */ TextContent t = new TextContent(); t.setText(article); t.setSentenceBoundary(); String[] content = t.getSentence(); // content has the sentences /* * In case there is no sentence in the article, write empty string * in the output file */ if (content.length == 0) { // aggregatedSummaries.append("<summary>" + "" + "</summary>" + // "\n"); articleNumber++; writeSummaries(args[1], "<summary>" + "" + "</summary>" + "\n"); continue; } /* * In case there is one sentence in the article, write write that * sentence in the output file */ if (content.length == 1) { // aggregatedSummaries.append("<summary>" + content[0] + // "</summary>" + "\n"); articleNumber++; writeSummaries(args[1], "<summary>" + content[0] + "</summary>" + "\n"); continue; } /* Generating summaries using Classifier4j */ int articleSummaryLength = Math.round((float) content.length * summarySize); String summary = summarize(article, articleSummaryLength).trim(); // aggregatedSummaries.append("<summary>" + summary + "</summary>" + // "\n"); writeSummaries(args[1], "<summary>" + summary + "</summary>" + "\n"); articleNumber++; } // let's move to the second article // writeSummaries(args[1], aggregatedSummaries.toString()); Instant end = Instant.now(); System.out.println(Duration.between(start, end)); } // end driver method