public SnowballCrawler() { try { twitter = TwitterRetry.getInstance(); AccessToken token = twitter.getOAuthAccessToken(); System.out.println("Access Token " + token); } catch (Exception e) { e.printStackTrace(); System.exit(1); } }
public void crawl() throws Exception { String startnode = "keaneofficial"; ArrayList<Long> usersIds = new ArrayList<Long>(); HashMap<Long, HashSet<Long>> network = new HashMap<Long, HashSet<Long>>(); HashMap<Long, Set<Long>> links = new HashMap<Long, Set<Long>>(); HashMap<Long, List<Status>> statuses = new HashMap<Long, List<Status>>(); User user1 = twitter.showUser(startnode); HashSet<Long> initialUsers = twitter.getFollowersIDs(user1.getId(), START_NUMBER_LINKS); ResponseList<User> users = twitter.lookupUsers(this.convertLongs(new ArrayList(initialUsers))); for (int i = 0; i < users.size() && users.size() < NUMBER_NODES; i++) { User user = (User) users.get(i); // it.next(); try { // save links Iterator it2 = twitter .getFriends(user, -1) .iterator(); // this might throw an "unauthorized" exception that's why it should // come first links.put(user.getId(), new HashSet<Long>()); int countFriends = 0; while (it2.hasNext()) { User friend = (User) it2.next(); if (SKIP_NODES_WITH_MORE_THAN > 0 && friend.getFriendsCount() > SKIP_NODES_WITH_MORE_THAN) continue; links.get(user.getId()).add(friend.getId()); if (!users.contains(friend)) users.add(friend); countFriends++; } // successfully fetched user info, save user // ret.add(user); // save statuses Paging paging = new Paging(1, NUMBER_STATUSES); ResponseList<Status> userStatuses = twitter.getUserTimeline(user.getId(), paging); statuses.put(user.getId(), userStatuses); System.out.println("Done with: @" + user.getScreenName()); } catch (Exception e) { // System.err.println(e.getMessage()); System.out.println( "Error - not authorized for: " + user.getScreenName() + " - skipping..."); // remove links to people that had "unauthorized access" exception for (Set<Long> linksPerUser : links.values()) { ArrayList<Long> markedForRemoval = new ArrayList<Long>(); for (Long curLink : linksPerUser) { if (curLink == user.getId()) { markedForRemoval.add(curLink); } } for (Long toRemoveLink : markedForRemoval) { linksPerUser.remove(toRemoveLink); } } } } Output.saveUsers(users, usersOutputFile); Output.saveToCSVFile(links, networkOutputFile); Output.saveStatuses(statuses, contentOutputFile); }