private static String getTitle(Source source) {
   Element titleElement = source.getFirstElement(HTMLElementName.TITLE);
   if (titleElement == null) return null;
   // TITLE element never contains other tags so just decode it collapsing whitespace:
   return CharacterReference.decodeCollapseWhiteSpace(titleElement.getContent());
 }
Beispiel #2
0
 /**
  * Returns the textual content inside the given HTML element from the given HTML source. Returns
  * null if the HTML element is not found.
  */
 private String getElementContent(Source source, String elementName) {
   Element el = source.getNextElement(0, elementName);
   return el == null ? null : CharacterReference.decode(el.getTextExtractor().toString());
 }