@Override public InputStream createFilteredInputStream(ArchivalUnit au, InputStream in, String encoding) { NodeFilter[] filters = new NodeFilter[] { // handled by parent: script, sfxlink, stylesheet HtmlNodeFilters.tag("noscript"), // toc - first top block ad // http://www.birpublications.org/toc/bjr/87/1044 HtmlNodeFilters.tagWithAttributeRegex("div", "class", "literatumAd"), // page header: login, register, etc., and journal menu such as // subscribe, alerts, ... HtmlNodeFilters.tagWithAttributeRegex("div", "id", "pageHeader"), // page footer HtmlNodeFilters.tagWithAttributeRegex("div", "id", "pageFooter"), // toc - BJR logo image right below pageHeader HtmlNodeFilters.tagWithAttributeRegex("div", "class", "^widget general-image"), // toc, abs, full, ref - menu above breadcrumbs HtmlNodeFilters.tagWithAttributeRegex("div", "class", "menuXml"), // toc - free.gif image tied to an abs HtmlNodeFilters.tagWithAttributeRegex("img", "src", "free.gif"), // toc - access icon container HtmlNodeFilters.tagWithAttribute("td", "class", "accessIconContainer"), // toc - pulldown with sections - may add citedby later HtmlNodeFilters.tagWithAttribute("div", "class", "publicationTooldropdownContainer"), // toc - right column, current issue HtmlNodeFilters.tagWithAttributeRegex("div", "class", "literatumBookIssueNavigation"), // toc, abs - share social media HtmlNodeFilters.tagWithAttributeRegex("div", "class", "general-bookmark-share"), // toc - right column impact factor block - no unique name found HtmlNodeFilters.tagWithAttributeRegex( "div", "class", "widget\\s+layout-one-column\\s+none\\s+widget-regular\\s+widget-border-toggle"), // ref - this seems unused but may get turned on // http://www.birpublications.org/doi/ref/10.1259/bjr.20130571 HtmlNodeFilters.tagWithAttribute("div", "id", "MathJax_Message"), // full - section choose pulldown appeared in multiple sections // http://www.birpublications.org/doi/full/10.1259/dmfr.20120050 HtmlNodeFilters.tagWithAttribute("div", "class", "sectionJumpTo"), // toc, abs, full, text and ref right column - most read // http://www.birpublications.org/toc/bjr/88/1052 HtmlNodeFilters.tagWithAttributeRegex("div", "class", "literatumMostReadWidget"), // abs - right column all literatumArticleToolsWidget // except Download Citation // http://www.birpublications.org/doi/abs/10.1259/bjr.20140472 HtmlNodeFilters.allExceptSubtree( HtmlNodeFilters.tagWithAttributeRegex("div", "class", "literatumArticleToolsWidget"), HtmlNodeFilters.tagWithAttributeRegex("a", "href", "/action/showCitFormats\\?")), }; // super.createFilteredInputStream adds bir filter to the baseAtyponFilters // and returns the filtered input stream using an array of NodeFilters that // combine the two arrays of NodeFilters. return super.createFilteredInputStream(au, in, encoding, filters); }