public void testFilterE() throws Exception {
   InputStream in;
   // all these should match, once filtered, the string HtmlHashEFiltered
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashE), ENC);
   String filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashF), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashG), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashH), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashI), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashJ), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashK), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashL), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
   in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashO), ENC);
   filtStr = StringUtil.fromInputStream(in);
   assertEquals(HtmlHashEFiltered, filtStr);
 }
  public void testFilterViewedBy() throws Exception {
    InputStream in;
    String filtStr = null;

    in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashN), ENC);
    filtStr = StringUtil.fromInputStream(in);
    assertEquals(HtmlHashNFiltered, filtStr);

    in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashP), ENC);
    filtStr = StringUtil.fromInputStream(in);
    assertEquals(HtmlHashPFiltered, filtStr);
  }
  public void testFilterD() throws Exception {
    InputStream in;

    in = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashD), ENC);
    String filtStr = StringUtil.fromInputStream(in);
    assertEquals(HtmlHashDFiltered, filtStr);
  }
  public void testFilterB() throws Exception {
    InputStream inB;

    inB = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashB), ENC);
    String filtStrB = StringUtil.fromInputStream(inB);
    assertEquals(HtmlHashBFiltered, filtStrB);
  }
  // Don't put the 2nd string through the filter - use it as a constant
  private void assertFilterToString(String orgString, String finalString) throws Exception {

    InputStream inA =
        fact.createFilteredInputStream(
            mau, new StringInputStream(orgString), Constants.DEFAULT_ENCODING);
    String filtered = StringUtil.fromInputStream(inA);
    assertEquals(filtered, finalString, filtered);
  }
 private static void doFilterTest(
     ArchivalUnit au, FilterFactory fact, String nameToHash, String expectedStr)
     throws PluginException, IOException {
   InputStream actIn;
   actIn =
       fact.createFilteredInputStream(
           au, new StringInputStream(nameToHash), Constants.DEFAULT_ENCODING);
   assertEquals(expectedStr, StringUtil.fromInputStream(actIn));
 }
  public void testFilterA() throws Exception {
    InputStream inA;

    // viewed-by test
    inA = fact.createFilteredInputStream(mau, new StringInputStream(HtmlHashA), ENC);
    String filtStrA = StringUtil.fromInputStream(inA);

    assertEquals(HtmlHashAFiltered, filtStrA);
  }
  protected void checkFilter(SimulatedArchivalUnit sau) throws Exception {
    log.debug("checkFilter()");
    CachedUrl cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html");

    enableFilter(sau, true);
    InputStream is = cu.openForHashing();
    String expected = "001file.html This is file 1, depth 0, branch 0. foobar ";
    assertEquals(expected, StringUtil.fromInputStream(is));
    is.close();
    enableFilter(sau, false);
    cu = sau.makeCachedUrl(sau.getUrlRoot() + "/001file.html");
    is = cu.openForHashing();
    expected =
        "<HTML><HEAD><TITLE>001file.html</TITLE></HEAD><BODY>\n"
            + "This is file 1, depth 0, branch 0.<br><!-- comment -->    "
            + "Citation String   foobar<br><script>"
            + "(defun fact (n) (cond ((= n 0) 1) (t (fact (sub1 n)))))</script>\n"
            + "</BODY></HTML>";
    assertEquals(expected, StringUtil.fromInputStream(is));
    is.close();
  }
  public void testSimpleDatasetXML() throws Exception {
    log.debug3("testSimpleDatasetXML");
    String file_input = StringUtil.fromInputStream(getResourceAsStream(testDatasetFile));
    String xml_url = TAR_A_BASE + SUBDIR + "dataset.xml";

    List<ArticleMetadata> mdList =
        extractFromContent(xml_url, "text/xml", file_input, nocheck_mle, null);
    assertEquals(6, mdList.size());
    Iterator<ArticleMetadata> mdIt = mdList.iterator();
    ArticleMetadata mdRecord = null;
    while (mdIt.hasNext()) {
      mdRecord = (ArticleMetadata) mdIt.next();
      validateDatasetMetadataRecord(mdRecord);
    }
  }
  public void testFiltering() throws Exception {
    InputStream inA;
    InputStream inB;

    /* impactFactor test */
    inA = fact.createFilteredInputStream(mau, new StringInputStream(tagsHtmlHash), ENC);

    assertEquals(tagsHtmlHashFiltered, StringUtil.fromInputStream(inA));

    /* whiteSpace test */
    inA = fact.createFilteredInputStream(mau, new StringInputStream(WhiteSpace1), ENC);

    inB = fact.createFilteredInputStream(mau, new StringInputStream(WhiteSpace2), ENC);

    assertEquals(StringUtil.fromInputStream(inA), StringUtil.fromInputStream(inB));

    /* rightCol test */
    inA = fact.createFilteredInputStream(mau, new StringInputStream(rightColHtml), ENC);
    assertEquals(rightColHtmlFiltered, StringUtil.fromInputStream(inA));

    /* mathjax text */
    inA = fact.createFilteredInputStream(mau, new StringInputStream(mathJaxHtml), ENC);
    assertEquals(mathJaxHtmlFiltered, StringUtil.fromInputStream(inA));

    // header & footer test
    inA = fact.createFilteredInputStream(mau, new StringInputStream(hrtagsHtmlHash), ENC);
    assertEquals(hrtagsHtmlHashFiltered, StringUtil.fromInputStream(inA));

    // metrics test
    inA = fact.createFilteredInputStream(mau, new StringInputStream(metricsHtml), ENC);
    assertEquals(metricsHtmlFiltered, StringUtil.fromInputStream(inA));

    // misc test
    inA = fact.createFilteredInputStream(mau, new StringInputStream(miscHtml), ENC);
    assertEquals(miscHtmlFiltered, StringUtil.fromInputStream(inA));
  }