public void testHeadNotPopular() throws Exception {
    VersionCounts versionCounts = VersionCounts.make();

    VoteBlock vb1 = makeVoteBlock("http://test.com/foo1");
    byte[] hash1 = addVersion(vb1, "content 1 for foo1");
    byte[] hash2 = addVersion(vb1, "content 2 for foo1");

    VoteBlock vb2 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb2, "content 1 for foo1");
    addVersion(vb2, "content 2 for foo1");

    VoteBlock vb3 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb3, "content 3 for foo1");
    addVersion(vb3, "content 2 for foo1");

    versionCounts.vote(vb1, participant1);
    versionCounts.vote(vb2, participant2);
    versionCounts.vote(vb3, participant3);

    Map<ParticipantUserData, HashResult> repairCandidates;
    repairCandidates = versionCounts.getRepairCandidates(0);
    assertSameElements(
        SetUtil.set(participant1, participant2, participant3), repairCandidates.keySet());

    repairCandidates = versionCounts.getRepairCandidates(1);
    assertSameElements(
        SetUtil.set(participant1, participant2, participant3), repairCandidates.keySet());

    repairCandidates = versionCounts.getRepairCandidates(2);
    assertSameElements(SetUtil.set(participant1, participant2), repairCandidates.keySet());

    repairCandidates = versionCounts.getRepairCandidates(3);
    assertEmpty(repairCandidates.keySet());
  }
 public void testDefaults() throws Exception {
   Properties p = initProps();
   KeyStore ks = KeyStoreUtil.createKeyStore(p);
   List aliases = ListUtil.fromIterator(new EnumerationIterator(ks.aliases()));
   assertIsomorphic(SetUtil.set("mykey", "mycert"), SetUtil.theSet(aliases));
   assertNotNull(ks.getCertificate("mycert"));
   assertNull(ks.getCertificate("foocert"));
   assertEquals("JCEKS", ks.getType());
 }
 // ensure that scanning continues after a nested parser throws an error
 // XXX Need to cause an IOException while reading CSS from the stream
 public void xxxtestDoCrawlStyleError() throws IOException {
   String url1 = "http://example.com/blah1.html";
   String url2 = "http://example.com/blah2.html";
   String url3 = "http://example.com/blah3.html";
   String source =
       "<html><head>"
           + "<style type=\"text/css\">\n"
           + "<!--\n"
           + "@import url(\'"
           + url1
           + "\');\n"
           + // ensure css parser got invoked
           "foo {bgcolor: #FFFF};"
           + // and that this causes an error
           "@import url(\'"
           + url2
           + "\');\n"
           + // so that this one isn't found
           "-->\n"
           + "  </style>\n"
           + "<a href="
           + url3
           + "></a>"
           + // and this one is
           "</head></html>";
   assertEquals(SetUtil.set(url1, url3), parseSingleSource(source));
 }
  public void testKeepsSingleQuoteInUrl() throws IOException {
    String url = "http://www.example.com/link'with'quotes.html";

    String source =
        "<html><head><title>Test</title></head><body>" + "<a href=\"" + url + "\">Link</a>";
    assertEquals(SetUtil.set(url), parseSingleSource(source));
  }
  public void testRelativeLinksWithLeadingSlash() throws IOException {
    String url1 = "http://www.example.com/blah/branch1/index.html";
    String url2 = "http://www.example.com/blah/branch2/index.html";
    String url3 = "http://www.example.com/journals/american_imago/toc/aim60.1.html";
    String url4 = "http://www.example.com/css/foo.css";
    String url5 = "http://www.example.com/javascript/bar.js";
    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href= branch1/index.html>link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<a href=\" branch2/index.html\">link2</a>"
            + "<a href =\" /journals/american_imago/toc/aim60.1.html\">"
            + "<link rel=\"stylesheet\" href=\"/css/foo.css\" >"
            + "<script type=\"text/javascript\" src=\"/javascript/bar.js\"></script>"
            + "Number 1, Spring 2003</a>";

    MockCachedUrl mcu = new MockCachedUrl("http://www.example.com/blah/");
    mcu.setContent(source);

    extractor.extractUrls(
        mau, new StringInputStream(source), ENC, "http://www.example.com/blah/", cb);

    Set expected = SetUtil.set(url1, url2, url3, url4, url5);
    assertEquals(expected, cb.getFoundUrls());
  }
  public void testStore() throws Exception {
    File dir = getTempDir();
    File file = new File(dir, "test.ks");
    Properties p = initProps();
    p.put(KeyStoreUtil.PROP_KEYSTORE_FILE, file.toString());
    assertFalse(file.exists());
    KeyStore ks = KeyStoreUtil.createKeyStore(p);
    assertTrue(file.exists());

    KeyStore ks2 = loadKeyStore(ks.getType(), file, PASSWD);
    List aliases = ListUtil.fromIterator(new EnumerationIterator(ks2.aliases()));
    assertIsomorphic(SetUtil.set("mykey", "mycert"), SetUtil.theSet(aliases));
    assertNotNull(ks2.getCertificate("mycert"));
    assertNull(ks2.getCertificate("foocert"));
    assertEquals("JCEKS", ks2.getType());
  }
  public void testMultipleIdenticalVersions() throws Exception {
    VersionCounts versionCounts = VersionCounts.make();

    VoteBlock vb1 = makeVoteBlock("http://test.com/foo1");
    byte[] hash1 = addVersion(vb1, "content 1 for foo1");
    byte[] hash2 = addVersion(vb1, "content 2 for foo1");

    VoteBlock vb2 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb2, "content 1 for foo1");
    addVersion(vb2, "content 1 for foo1");
    addVersion(vb2, "content 1 for foo1");
    addVersion(vb2, "content 1 for foo1");
    addVersion(vb2, "content 2 for foo1");

    VoteBlock vb3 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb3, "content 1 for foo1");
    addVersion(vb3, "content 2 for foo1");
    addVersion(vb3, "content 2 for foo1");
    addVersion(vb3, "content 2 for foo1");
    addVersion(vb3, "content 2 for foo1");

    versionCounts.vote(vb1, participant1);
    versionCounts.vote(vb2, participant2);
    versionCounts.vote(vb3, participant3);

    Map<ParticipantUserData, HashResult> repairCandidates;
    repairCandidates = versionCounts.getRepairCandidates(2);
    assertSameElements(
        SetUtil.set(participant1, participant2, participant3), repairCandidates.keySet());

    // With only three candidates, no version should reach a threshold
    // of 4, unless counting multiples is wrong.
    repairCandidates = versionCounts.getRepairCandidates(4);
    assertEmpty(repairCandidates.keySet());
  }
  public void testResolvesHtmlEntities() throws IOException {
    String url1 =
        "http://www.example.com/bioone/?" + "request=get-toc&issn=0044-7447&volume=32&issue=1";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href=http://www.example.com/bioone/?"
            + "request=get-toc&#38;issn=0044-7447&#38;volume=32&issue=1>link1</a>";
    assertEquals(SetUtil.set(url1), parseSingleSource(source));

    // ensure character entities processed before rel url resolution
    source =
        "<html><head><title>Test</title></head><body>"
            + "<base href=http://www.example.com/foo/bar>"
            + "<a href=&#46&#46/xxx>link1</a>";
    assertEquals(SetUtil.set("http://www.example.com/xxx"), parseSingleSource(source));
  }
  public void testIgnoresCRInUrl() throws IOException {
    String url = "http://www.example.com/linkwithspace.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href=\"http://www.example.com/link\rwith\rspace.html\">Link</a>";
    assertEquals(SetUtil.set(url), parseSingleSource(source));
  }
  public void testIgnoresNewLineInField() throws IOException {
    String url = "http://www.example.com/link.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<img\nsrc=\"http://www.example.com/link.html\">Link</a>";
    assertEquals(SetUtil.set(url), parseSingleSource(source));
  }
  public void testParsesFileWithQuotedUrls() throws IOException {
    String url = "http://www.example.com/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href=\"http://www.example.com/link3.html\">link3</a>";
    assertEquals(SetUtil.set(url), parseSingleSource(source));
  }
 /** Test that a single links is found. */
 public void testOneUrl() throws Exception {
   try {
     Set urls = SetUtil.set("http://www.foo.com/blah.jpg");
     assertEquals(urls, extractUrls(constructValidRDF(urls)));
   } catch (Throwable ex) {
     fail("", ex);
   }
 }
  private void singleTagParse(
      String url, String startTag, String endTag, ArchivalUnit au, boolean shouldParse)
      throws IOException {
    MockCachedUrl mcu = new MockCachedUrl("http://www.example.com");
    String content = makeContent(url, startTag, endTag);
    mcu.setContent(content);

    MyLinkExtractorCallback cb = new MyLinkExtractorCallback();
    extractor.extractUrls(mau, new StringInputStream(content), ENC, "http://www.example.com", cb);

    if (shouldParse) {
      Set expected = SetUtil.set(url);
      assertEquals("Misparsed: " + content, expected, cb.getFoundUrls());
    } else {
      Set expected = SetUtil.set();
      assertEquals("Misparsed: " + content, expected, cb.getFoundUrls());
    }
  }
  public void testHttpEquiv() throws IOException {
    String url1 = "http://example.com/blah.html";
    String source =
        "<html><head>"
            + "<meta http-equiv=\"refresh\" "
            + "content=\"0; url=http://example.com/blah.html\">"
            + "</head></html>";

    assertEquals(SetUtil.set(url1), parseSingleSource(source));

    source =
        "<html><head>"
            + "<meta http-equiv=\"refresh\" "
            + "content=\"0;url=http://example.com/blah.html\">"
            + "</head></html>";

    assertEquals(SetUtil.set(url1), parseSingleSource(source));
  }
  public void testSkipsMalformedComments() throws IOException {
    String url = "http://www.example.com/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<!--<a href=http://www.example.com/link1.html>link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<a href=http://www.example.com/link2.html>link2</a>--!>"
            + "<a href=http://www.example.com/link3.html>link3</a>";
    assertEquals(SetUtil.set(url), parseSingleSource(source));
  }
示例#16
0
  public void testHeadNotAllowed() throws Exception {
    VersionCounts versionCounts = VersionCounts.make();

    VoteBlock vb1 = makeVoteBlock("http://test.com/foo1");
    byte[] hash1 = addVersion(vb1, "content 1 for foo1");
    byte[] hash2 = addVersion(vb1, "content 2 for foo1");

    versionCounts.vote(vb1, participant1);

    Map<ParticipantUserData, HashResult> repairCandidates;
    repairCandidates = versionCounts.getRepairCandidates(0);
    assertSameElements(SetUtil.set(participant1), repairCandidates.keySet());

    // Same, but with an excluded version that doesn't matter.
    repairCandidates = versionCounts.getRepairCandidates(0, SetUtil.set(HashResult.make(hash2)));
    assertSameElements(SetUtil.set(participant1), repairCandidates.keySet());

    // Same, but with an excluded version that does matter
    repairCandidates = versionCounts.getRepairCandidates(0, SetUtil.set(HashResult.make(hash1)));
    assertEmpty(repairCandidates);
  }
  public void testDontParseJSByDefault() throws IOException {
    String url3 = "http://www.example.com/link1.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href = javascript:newWindow('http://www.example.com/link3.html')</a>"
            + "<a href = javascript:popup('http://www.example.com/link2.html')</a>"
            + "<img src = javascript:popup('"
            + url3
            + "') </img>";
    assertEquals(SetUtil.set(), parseSingleSource(source));
  }
 private void doScriptSkipTest(String openScript, String closeScript, String failMsg)
     throws IOException {
   String url = "http://www.example.com/link3.html";
   String src =
       "<html><head><title>Test</title></head><body>"
           + openScript
           + "<a href=http://www.example.com/link1.html>link1</a>"
           + "Filler, with <b>bold</b> tags and<i>others</i>"
           + "<a href=http://www.example.com/link2.html>link2</a>"
           + closeScript
           + "<a href=http://www.example.com/link3.html>link3</a>";
   assertEquals(failMsg, SetUtil.set(url), parseSingleSource(src));
 }
示例#19
0
  public void testSortedRepairCandidates() throws Exception {
    VersionCounts versionCounts = VersionCounts.make();

    VoteBlock vb1 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb1, "content 1 for foo1");

    VoteBlock vb2 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb2, "content 2 for foo1");

    VoteBlock vb3 = makeVoteBlock("http://test.com/foo1");
    addVersion(vb3, "content 3 for foo1");
    addVersion(vb3, "content 2 for foo1");

    versionCounts.vote(vb1, participant1);
    versionCounts.vote(vb2, participant2);
    versionCounts.vote(vb3, participant3);

    Map<Integer, Collection<ParticipantUserData>> repairCandidates;

    repairCandidates = versionCounts.getSortedRepairCandidatesMap(2);
    assertEquals(SetUtil.set(2), repairCandidates.keySet());
    assertSameElements(SetUtil.set(participant2), repairCandidates.get(2));
    assertEquals(ListUtil.list(participant2), versionCounts.getSortedRepairCandidates(2));

    repairCandidates = versionCounts.getSortedRepairCandidatesMap(1);
    assertIsomorphic(ListUtil.list(2, 1), repairCandidates.keySet());
    assertSameElements(SetUtil.set(participant2), repairCandidates.get(2));
    assertSameElements(SetUtil.set(participant1, participant3), repairCandidates.get(1));

    List<ParticipantUserData> lst = versionCounts.getSortedRepairCandidates(1);
    assertTrue(
        "" + lst,
        (lst.equals(ListUtil.list(participant2, participant1, participant3))
            || lst.equals(ListUtil.list(participant2, participant3, participant1))));

    assertEmpty(versionCounts.getSortedRepairCandidatesMap(4));
    assertEmpty(versionCounts.getSortedRepairCandidates(4));
  }
  public void testDoCrawlImageWithSrcInAltTagAfterSrcProper() throws IOException {
    String url = "http://www.example.com/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>" + "<img src=" + url + " alt=src>link3</a>";

    MockCachedUrl mcu = new MockCachedUrl(startUrl);
    mcu.setContent(source);

    extractor.extractUrls(mau, new StringInputStream(source), ENC, startUrl, cb);

    Set expected = SetUtil.set(url);
    assertEquals(expected, cb.getFoundUrls());
  }
  public void testRelativeLinksLocationTagsAndMultipleKeys() throws IOException {
    String url1 = "http://www.example.com/link1.html";
    String url2 = "http://www.example.com/link2.html#ref";
    String url3 = "http://www.example.com/dir/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href=link1.html>link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<a blah1=blah href=link2.html#ref blah2=blah>link2</a>"
            + "<a href=dir/link3.html>link3</a>";

    assertEquals(SetUtil.set(url1, url2, url3), parseSingleSource(source));
  }
  public void testIgnoresEmptyHrefInBaseTag() throws IOException {
    String url1 = "http://www.example.com/link1.html";
    String url2 = "http://www.example.com/link2.html";
    String url3 = "http://www.example.com/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href=link1.html>link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<base href=\"\" blah=blah>"
            + "<a href=link2.html>link2</a>"
            + "<a href=link3.html>link3</a>";
    assertEquals(SetUtil.set(url1, url2, url3), parseSingleSource(source));
  }
  /** Included to test a chunk of HighWire HTML that we're not parsing correctly */
  public void testParseHWPDF() throws IOException {
    //     Properties p = new Properties();
    //     p.setProperty(GoslingHtmlLinkExtractor.PARAM_PARSE_JS, "true");
    //     ConfigurationUtil.setCurrentConfigFromProps(p);
    //     extractor = new GoslingHtmlLinkExtractor();

    String url = "http://www.example.com/cgi/reprint/21/1/2.pdf";

    String source =
        "<table cellspacing=\"0\" cellpadding=\"10\" width=\"250\" border=\"0\">"
            + "<tr><td align=center bgcolor=\"#DBDBDB\">\n\n	"
            + "<font face=\"verdana,arial,helvetica,sans-serif\">"
            + "<strong><font size=+1>Automatic download</font><br>\n	"
            + "<font size=\"-1\">[<a target=\"_self\" href=\"/cgi/reprint/21/1/2.pdf\" "
            + "onclick=\"cancelLoadPDF()\">Begin manual download</a>]</strong></font>\n";
    assertEquals(SetUtil.set(url), parseSingleSource(source));
  }
  public void testRelativeLinksWithSameName() throws IOException {
    String url1 = "http://www.example.com/branch1/index.html";
    String url2 = "http://www.example.com/branch2/index.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href=branch1/index.html>link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<a href=branch2/index.html>link2</a>";

    MockCachedUrl mcu = new MockCachedUrl("http://www.example.com");
    mcu.setContent(source);

    extractor.extractUrls(mau, new StringInputStream(source), ENC, "http://www.example.com", cb);

    Set expected = SetUtil.set(url1, url2);
    assertEquals(expected, cb.getFoundUrls());
  }
  // Relative URLs before a malforned base tag should be extracted, as well
  // as any absolute URLs after the malformed base tag
  public void testInterpretsMalformedBaseTag() throws IOException {
    String url1 = "http://www.example.com/link1.html";
    String url2 = "http://www.example2.com/link2.html";
    String url3 = "http://www.example2.com/link3.html";
    String url4 = "http://www.example3.com/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<base href=http://www.example.com>"
            + "<a href=link1.html>link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<base href=javascript:www.example2.com>"
            + "<a href=link2.html>link2</a>"
            + "<base href=www.example.com>"
            + "<a href=http://www.example2.com/link3.html>link3</a>"
            + "<base href=http://www.example3.com>"
            + "<a href=link3.html>link4</a>";
    assertEquals(SetUtil.set(url1, url3, url4), parseSingleSource(source));
  }
  public void testParseJSIfConf() throws IOException {
    Properties p = new Properties();
    p.setProperty(GoslingHtmlLinkExtractor.PARAM_PARSE_JS, "true");
    ConfigurationUtil.setCurrentConfigFromProps(p);
    extractor = new GoslingHtmlLinkExtractor();

    String url = "http://www.example.com/link3.html";
    String url2 = "http://www.example.com/link2.html";
    String url3 = "http://www.example.com/link1.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href = javascript:newWindow('http://www.example.com/link3.html')</a>"
            + "<a href = javascript:popup('http://www.example.com/link2.html')</a>"
            + "<img src = javascript:popup('"
            + url3
            + "') </img>";
    assertEquals(SetUtil.set(url, url2, url3), parseSingleSource(source));
  }
  public void testMultipleLinks() throws IOException {
    String url1 = "http://www.example.com/link1.html";
    String url2 = "http://www.example.com/link2.html";
    String url3 = "http://www.example.com/link3.html";

    String source =
        "<html><head><title>Test</title></head><body>"
            + "<a href="
            + url1
            + ">link1</a>"
            + "Filler, with <b>bold</b> tags and<i>others</i>"
            + "<a href="
            + url2
            + ">link2</a>"
            + "<a href="
            + url3
            + ">link3</a>";

    assertEquals(SetUtil.set(url1, url2, url3), parseSingleSource(source));
  }
 public void testDontParseMailto() throws IOException {
   String source =
       "<html><head><title>Test</title></head><body>" + "<a href = mailto:[email protected]</a>";
   assertEquals(SetUtil.set(), parseSingleSource(source));
 }
  protected void performDoCrawlStyle(
      String openingStyleTag, String givenPrefix, String expectedPrefix) throws IOException {
    String url1 = "foo1.css";
    String url2 = "foo2.css";
    String url3 = "foo3.css";
    String url4 = "foo4.css";
    String url5 = "img5.gif";
    String url6 = "img6.gif";

    String source =
        "<html>\n"
            + " <head>\n"
            + "  <title>Test</title>\n"
            + "  "
            + openingStyleTag
            + "\n"
            + "<!--\n"
            + "@import url(\'"
            + givenPrefix
            + url1
            + "\');\n"
            + "@import url(\""
            + givenPrefix
            + url2
            + "\");\n"
            + "@import \'"
            + givenPrefix
            + url3
            + "\';\n"
            + "@import \""
            + givenPrefix
            + url4
            + "\";\n"
            + "foo {\n"
            + " bar: url(\'"
            + givenPrefix
            + url5
            + "\');\n"
            + " baz: url(\""
            + givenPrefix
            + url6
            + "\");\n"
            + "}\n"
            + "/* Comment */"
            + "-->\n"
            + "  </style>\n"
            + " </head>\n"
            + " <body>\n"
            + "  <p>Fake content</p>\n"
            + " </body>\n"
            + "</html>\n";

    assertEquals(
        SetUtil.set(
            expectedPrefix + url1,
            expectedPrefix + url2,
            expectedPrefix + url3,
            expectedPrefix + url4,
            expectedPrefix + url5,
            expectedPrefix + url6),
        parseSingleSource(source));
  }
 public void testEmptyAttribute() throws IOException {
   String source = "<html><head><title>Test</title></head><body>" + "<a href=>link3</a>";
   assertEquals(SetUtil.set(), parseSingleSource(source));
 }