Пример #1
0
  public static void getBlogs() throws Exception {

    BlockingQueue<String> queue = new ArrayBlockingQueue<String>(numCrawler * 4);

    CrawlerC[] crawler = new CrawlerC[numCrawler];
    for (int i = 0; i < crawler.length; i++) {
      crawler[i] = new CrawlerC(queue);
      crawler[i].start();
    }

    ResultSet rs = null;
    int offset = 1;
    while (true) {
      offset += 100;
      myStm.executeQuery("SELECT blogID from blogs where country = 'BR' LIMIT " + offset + ",100");
      System.out.println("\n---" + offset + "---");

      rs = myStm.getResultSet();
      try {
        if (!rs.first()) break;
        if (false) break;
        while (rs.next()) {
          // System.out.println(rs.getString("blogID"));
          if (!queue.offer(rs.getString("blogID"), 60, TimeUnit.SECONDS)) {
            System.out.println("Offer.Timeout");
          }
        }
      } catch (Exception e) {
      }
    }

    queue.clear();
    for (int i = 0; i < crawler.length; i++) queue.put(CrawlerC.NO_MORE_WORK);
    for (int i = 0; i < crawler.length; i++) crawler[i].join();
  }
Пример #2
0
  public void run() {
    while (true) {

      try {
        // System.out.println(r+": Take(wait)");
        // String[] info = q.take();
        String blogID = q.poll(60, TimeUnit.SECONDS);
        if (blogID == null) {
          System.out.println("Poll.Timeout");
          continue;
        }

        // System.out.println(r+": Take(get) : "+blogID);

        if (blogID == NO_MORE_WORK) {
          break;
        }

        URL feedUrl = new URL("http://www.blogger.com/feeds/" + blogID + "/comments/default");
        Query myQuery = new Query(feedUrl);
        myQuery.setMaxResults(25);

        System.out.print(r + "+,");
        Feed resultFeed = myService.query(myQuery, Feed.class);

        for (Entry entry : resultFeed.getEntries()) {
          if (entry.getAuthors().get(0).getUri() != null) {
            String profileID = entry.getAuthors().get(0).getUri().replaceAll("[^\\d]", "");
            if (profileID.length() == 20) {
              try {
                myStm.executeUpdate(
                    "INSERT IGNORE INTO author SET profileID = '" + profileID + "'");
                // System.out.print(r+"+,");
              } catch (Exception e) {
              }
            }
          }
        }

      } catch (Exception e) {
        System.out.print(r + "ERR,");
      }
    }

    System.out.println("Bye(" + r + ")");
    try {
      myStm.close();
    } catch (Exception e) {
    }
  }