public static void toCRAWDAD(LinkTrace links, OutputStream out, double timeMul)
      throws IOException {

    StatefulReader<LinkEvent, Link> linkReader = links.getReader();
    BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out));
    Map<Link, Long> activeContacts = new AdjacencyMap.Links<Long>();

    linkReader.seek(links.minTime());
    for (Link l : linkReader.referenceState()) activeContacts.put(l, links.minTime());
    while (linkReader.hasNext()) {
      for (LinkEvent lev : linkReader.next()) {
        Link l = lev.link();
        if (lev.isUp()) {
          activeContacts.put(l, linkReader.time());
        } else {
          double b = activeContacts.get(l) * timeMul;
          double e = linkReader.time() * timeMul;
          activeContacts.remove(l);
          bw.write(l.id1() + "\t" + l.id2() + "\t" + b + "\t" + e + "\n");
        }
      }
    }
    linkReader.close();
    bw.close();
  }
Ejemplo n.º 2
0
  public void crawled(LinkEvent le) {

    switch (le.getID()) {
      case LinkEvent.ERROR:
        logger.error(
            "Crawling error occured during download of URL: "
                + le.getLink().getURL()
                + " Root cause:	"
                + le.getException());
        break;

      case LinkEvent.TOO_DEEP:
      case LinkEvent.SKIPPED:
        logger.warn("Crawling event: " + le.getName() + ", URL: " + le.getLink().getURL());
        break;
      default:
        break;
    }

    if (le.getID() != LinkEvent.DOWNLOADED) return;

    logger.info("Crawling event: " + le.getName() + ", URL: " + le.getLink().getURL());

    Link l = le.getLink();
    Page p = l.getPage();

    BookPage page = new BookPage();
    page.setBookName(book.getName());
    page.setName(book.getName() + counter);
    page.setParentName(book.getName());
    page.setTitle(p.getTitle());
    page.setLocation(p.getURL().toString());
    page.setChildCount(counter);
    page.setContentType(book.getContentType());
    page.setPath(p.getURL().getPath());

    System.out.println(p.getContentBytes());
    try {

      ByteArrayInputStream is = new ByteArrayInputStream(p.getContentBytes());

      FileContentParser parser = new FileContentParser();
      parser.processContentType(p.getContentType(), is, page);

      // } catch (UnsupportedEncodingException e1) {
    } catch (ParserException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    try {
      im.addPage(page);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    counter++;
  }