public String extractLunchNameFromTitle(final String title) throws ParseException { final Pattern pattern = Pattern.compile("\\d{4}-\\d{2}-\\d{2}\\s(Bastians|Mittagessen|\\s|-)*(.*?)"); final Matcher matcher = pattern.matcher(title); if (!matcher.matches()) { return null; } final String result = stringUtil.trim(matcher.group(2)); if (result.isEmpty()) { return null; } return result; }
public String extractLunchNameFromTitleOrContent(final String title, final String htmlContent) throws ParseException { { try { final String name = stringUtil.trim(extractLunchNameFromContent(htmlContent)); if (name != null && name.length() > 1) { logger.debug("lunch name found in content: '" + name + "'"); return name; } } catch (ParseException e) { logger.trace("extract lunch title form content failed", e); } } { final String name = stringUtil.trim(extractLunchNameFromTitle(title)); if (name != null && name.length() > 1) { logger.debug("lunch name found in title: '" + name + "'"); return name; } } logger.debug("no lunch name found"); return null; }
public String extractLunchNameFromContent(final String htmlContent) throws ParseException { final Document document = Jsoup.parse(htmlContent); { final Elements elements = document.getElementsByClass("tipMacro"); for (final Element element : elements) { for (final Element td : element.getElementsByTag("p")) { final String innerHtml = td.html(); final String result = stringUtil.trim(htmlUtil.filterHtmlTages(innerHtml)); if (result != null && result.length() > 0) { logger.debug("found lunch lame " + result); return result; } } } } { int pos = 0; pos = parseUtil.indexOf(htmlContent, "ac:name=\"tip\"", pos); try { pos = parseUtil.indexOf(htmlContent, "INLINE", pos); } catch (final ParseException e) { // nop } final int pstart = parseUtil.indexOf(htmlContent, "<ac:rich-text-body>", pos); final int pend = parseUtil.indexOf(htmlContent, "</ac:rich-text-body>", pstart); final String result = stringUtil.trim(htmlUtil.filterHtmlTages(htmlContent.substring(pstart, pend))); if (result != null && result.length() > 0) { logger.debug("found lunch name " + result); return result; } } logger.debug("extractLunchNameFromContent failed " + htmlContent); return null; }