コード例 #1
0
  public SnowballCrawler() {
    try {

      twitter = TwitterRetry.getInstance();

      AccessToken token = twitter.getOAuthAccessToken();
      System.out.println("Access Token " + token);

    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
    }
  }
コード例 #2
0
  public void crawl() throws Exception {
    String startnode = "keaneofficial";

    ArrayList<Long> usersIds = new ArrayList<Long>();
    HashMap<Long, HashSet<Long>> network = new HashMap<Long, HashSet<Long>>();

    HashMap<Long, Set<Long>> links = new HashMap<Long, Set<Long>>();
    HashMap<Long, List<Status>> statuses = new HashMap<Long, List<Status>>();

    User user1 = twitter.showUser(startnode);

    HashSet<Long> initialUsers = twitter.getFollowersIDs(user1.getId(), START_NUMBER_LINKS);

    ResponseList<User> users = twitter.lookupUsers(this.convertLongs(new ArrayList(initialUsers)));

    for (int i = 0; i < users.size() && users.size() < NUMBER_NODES; i++) {

      User user = (User) users.get(i); // it.next();

      try {

        // save links
        Iterator it2 =
            twitter
                .getFriends(user, -1)
                .iterator(); // this might throw an "unauthorized" exception that's why it should
                             // come first

        links.put(user.getId(), new HashSet<Long>());

        int countFriends = 0;
        while (it2.hasNext()) {
          User friend = (User) it2.next();

          if (SKIP_NODES_WITH_MORE_THAN > 0 && friend.getFriendsCount() > SKIP_NODES_WITH_MORE_THAN)
            continue;

          links.get(user.getId()).add(friend.getId());

          if (!users.contains(friend)) users.add(friend);
          countFriends++;
        }

        // successfully fetched user info, save user
        // ret.add(user);

        // save statuses
        Paging paging = new Paging(1, NUMBER_STATUSES);
        ResponseList<Status> userStatuses = twitter.getUserTimeline(user.getId(), paging);
        statuses.put(user.getId(), userStatuses);

        System.out.println("Done with: @" + user.getScreenName());

      } catch (Exception e) {
        // System.err.println(e.getMessage());
        System.out.println(
            "Error - not authorized for: " + user.getScreenName() + " - skipping...");

        // remove links to people that had "unauthorized access" exception

        for (Set<Long> linksPerUser : links.values()) {
          ArrayList<Long> markedForRemoval = new ArrayList<Long>();
          for (Long curLink : linksPerUser) {
            if (curLink == user.getId()) {
              markedForRemoval.add(curLink);
            }
          }
          for (Long toRemoveLink : markedForRemoval) {
            linksPerUser.remove(toRemoveLink);
          }
        }
      }
    }
    Output.saveUsers(users, usersOutputFile);
    Output.saveToCSVFile(links, networkOutputFile);
    Output.saveStatuses(statuses, contentOutputFile);
  }