public static void toCRAWDAD(LinkTrace links, OutputStream out, double timeMul) throws IOException { StatefulReader<LinkEvent, Link> linkReader = links.getReader(); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); Map<Link, Long> activeContacts = new AdjacencyMap.Links<Long>(); linkReader.seek(links.minTime()); for (Link l : linkReader.referenceState()) activeContacts.put(l, links.minTime()); while (linkReader.hasNext()) { for (LinkEvent lev : linkReader.next()) { Link l = lev.link(); if (lev.isUp()) { activeContacts.put(l, linkReader.time()); } else { double b = activeContacts.get(l) * timeMul; double e = linkReader.time() * timeMul; activeContacts.remove(l); bw.write(l.id1() + "\t" + l.id2() + "\t" + b + "\t" + e + "\n"); } } } linkReader.close(); bw.close(); }
public void crawled(LinkEvent le) { switch (le.getID()) { case LinkEvent.ERROR: logger.error( "Crawling error occured during download of URL: " + le.getLink().getURL() + " Root cause: " + le.getException()); break; case LinkEvent.TOO_DEEP: case LinkEvent.SKIPPED: logger.warn("Crawling event: " + le.getName() + ", URL: " + le.getLink().getURL()); break; default: break; } if (le.getID() != LinkEvent.DOWNLOADED) return; logger.info("Crawling event: " + le.getName() + ", URL: " + le.getLink().getURL()); Link l = le.getLink(); Page p = l.getPage(); BookPage page = new BookPage(); page.setBookName(book.getName()); page.setName(book.getName() + counter); page.setParentName(book.getName()); page.setTitle(p.getTitle()); page.setLocation(p.getURL().toString()); page.setChildCount(counter); page.setContentType(book.getContentType()); page.setPath(p.getURL().getPath()); System.out.println(p.getContentBytes()); try { ByteArrayInputStream is = new ByteArrayInputStream(p.getContentBytes()); FileContentParser parser = new FileContentParser(); parser.processContentType(p.getContentType(), is, page); // } catch (UnsupportedEncodingException e1) { } catch (ParserException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } try { im.addPage(page); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } counter++; }