Example #1
0
  public void getIssueAndPull() throws Exception {
    MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017);
    MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3");
    FindIterable<Document> issueIterable = database.getCollection("issueandpull").find();
    Connection connection = MysqlInfo.getMysqlConnection();
    connection.setAutoCommit(false);
    String sql =
        "update repotest set open_issues = ?,closed_issues = ?,open_pull=?,closed_pull=? where full_name = ?";
    PreparedStatement stmt = connection.prepareStatement(sql);
    JsonParser parser = new JsonParser();
    for (Document document : issueIterable) {
      String json = document.toJson();
      JsonObject repoIssue = parser.parse(json).getAsJsonObject();
      int openIssue = repoIssue.get("openissue").getAsInt();
      int closedIssue = repoIssue.get("closedissue").getAsInt();
      int openPull = repoIssue.get("openpull").getAsInt();
      int closedPull = repoIssue.get("closedpull").getAsInt();
      String repoName = repoIssue.get("fn").getAsString();
      System.out.println(repoName);
      stmt.setInt(1, openIssue);
      stmt.setInt(2, closedIssue);
      stmt.setInt(3, openPull);
      stmt.setInt(4, closedPull);
      stmt.setString(5, repoName);

      stmt.execute();
    }
    connection.commit();
    connection.close();
    mongoClient.close();
  }
Example #2
0
  public void getCommitCount() throws Exception {

    MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017);
    MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3");
    FindIterable<Document> issueIterable = database.getCollection("commitnumber").find();
    Connection connection = MysqlInfo.getMysqlConnection();
    connection.setAutoCommit(false);
    JsonParser parser = new JsonParser();
    for (Document document : issueIterable) {
      String json = document.toJson();
      JsonObject repoJsonObject = parser.parse(json).getAsJsonObject();
      int commit = repoJsonObject.get("commitnumber").getAsInt();
      String full_name = repoJsonObject.get("fn").getAsString();
      System.out.println(full_name);
      String sql = "update repotest set commit = ? where full_name = ?";
      PreparedStatement stmt = connection.prepareStatement(sql);
      stmt.setInt(1, commit);
      stmt.setString(2, full_name);
      stmt.execute();
    }

    connection.commit();
    connection.close();
    mongoClient.close();
  }
Example #3
0
  public void getCollaborators() throws Exception {
    // get mysql connection
    Connection connection = MysqlInfo.getMysqlConnection();
    connection.setAutoCommit(false);
    String conSql = "insert into collaborator(user_id,repo_id) values(?,?);";
    PreparedStatement conStmt = connection.prepareStatement(conSql);
    String repoSql = "update repotest set collaborator = ? where id = ?";
    PreparedStatement repoStmt = connection.prepareStatement(repoSql);

    // get repos from mongo
    MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017);
    MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3");
    FindIterable<Document> repoIterable = database.getCollection("repo").find();
    JsonParser parser = new JsonParser();
    Map<String, Integer> repoMap = new HashMap<String, Integer>();
    for (Document document : repoIterable) {
      String json = document.toJson();
      JsonObject repoJsonObject = parser.parse(json).getAsJsonObject();
      int id = repoJsonObject.get("id").getAsInt();
      String full_name = repoJsonObject.get("full_name").getAsString();
      System.out.println(id);
      repoMap.put(full_name, id);
    }

    Map<Integer, Integer> collaboratorMap = new HashMap<Integer, Integer>();

    FindIterable<Document> collaboratorIterable = database.getCollection("assignees").find();
    for (Document document : collaboratorIterable) {
      String json = document.toJson();
      JsonObject contriJsonObject = parser.parse(json).getAsJsonObject();
      int id = contriJsonObject.get("id").getAsInt();
      String repoName = contriJsonObject.get("fn").getAsString();
      int repo_id = repoMap.get(repoName);
      conStmt.setInt(1, id);
      conStmt.setInt(2, repo_id);
      conStmt.execute();

      if (collaboratorMap.containsKey(repo_id)) {
        collaboratorMap.put(repo_id, collaboratorMap.get(repo_id) + 1);
      } else {
        collaboratorMap.put(repo_id, 1);
      }
    }

    Set<Integer> keySet = collaboratorMap.keySet();
    for (Integer repoId : keySet) {
      int contri_count = collaboratorMap.get(repoId);
      repoStmt.setInt(1, contri_count);
      repoStmt.setInt(2, repoId);
      repoStmt.execute();
    }

    mongoClient.close();
    connection.commit();
    conStmt.close();
    repoStmt.close();
    connection.close();
  }
Example #4
0
  public static void main(String[] args) {
    // TODO Auto-generated method stub
    String[] repo = {
      "c9s/App-gh", "joshsh/sesametools", "jbr/sibilant", "r1k0/kigen"
    }; // "pouchdb/pouchdb"
    Mongo mongo = new Mongo(MongoInfo.getMongoServerIp(), 27017);
    DB db = mongo.getDB("ghcrawlerV3");
    DBCollection pullcache = db.getCollection("pullcacheB");
    DBCollection issuecache = db.getCollection("issuecacheB");
    DBCollection pulls = db.getCollection("pullscp");
    DBCollection issues = db.getCollection("issuescp");

    for (int i = 0; i < repo.length; i++) {
      pullcache.drop();
      issuecache.drop();

      PullCrawlerB pullCrawler = new PullCrawlerB();
      IssueCrawlerB issueCrawler = new IssueCrawlerB();
      issueCrawler.crawlIssues(repo[i]);
      pullCrawler.crawlPulls(repo[i]);

      DBCursor issuecursor = issuecache.find();
      issuecursor.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT);
      while (issuecursor.hasNext()) {
        issues.save(issuecursor.next());
      }
      issuecursor.close();

      DBCursor pullcursor = pullcache.find();
      pullcursor.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT);
      while (pullcursor.hasNext()) {
        pulls.save(pullcursor.next());
      }
      pullcursor.close();
    }
  }
Example #5
0
  public void analysisClasses() throws Exception {
    //		Map<String,Integer> wordMap = new HashMap<String, Integer>();
    //		String sql = "select description from repotest";
    //		Connection connection = MysqlInfo.getMysqlConnection();
    //		PreparedStatement stmt = connection.prepareStatement(sql);
    //		ResultSet resultSet = stmt.executeQuery();
    //		while (resultSet.next()) {
    //			String description = resultSet.getString("description");
    //			String[] items = description.split(" ");
    //			for (String item : items) {
    //				if(!wordMap.containsKey(item)){
    //					wordMap.put(item, 1);
    //				}else{
    //					wordMap.put(item, wordMap.get(item)+1);
    //					System.out.println(item+"---------------");
    //				}
    //			}
    //		}
    Mongo mongo = new Mongo(MongoInfo.getMongoServerIp(), 27017);
    DB db = mongo.getDB("ghcrawlerV3");
    DBCollection repo = db.getCollection("repository");
    DBCursor repos = repo.find();
    repos.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT);
    HashMap<String, Integer> map = new HashMap<String, Integer>();

    while (repos.hasNext()) {
      DBObject object = repos.next();
      if (object.get("description") != null) {
        // System.out.println(object.get("description").toString().replaceAll("[^a-zA-Z'0-9]", "
        // ").replaceAll("\\s+", " "));
        String description =
            object
                .get("description")
                .toString()
                .replaceAll("[^a-zA-Z0-9]", " ")
                .replaceAll("\\s+", " ");
        for (int i = 0; i < description.split(" ").length; i++) {
          String word = description.split(" ")[i].toLowerCase();
          if (map.containsKey(word)) {
            map.put(word, map.get(word) + 1);
          } else {
            map.put(word, 1);
          }
        }
      }
    }

    Connection connection = MysqlInfo.getMysqlConnection();
    String sqlInsert = "replace into word values(?,?)";
    Set<String> words = map.keySet();
    connection.setAutoCommit(false);
    for (String word : words) {
      int count = map.get(word);
      if (count < 2) {
        continue;
      }
      PreparedStatement stmt = connection.prepareStatement(sqlInsert);
      stmt.setString(1, word);
      stmt.setInt(2, count);
      stmt.execute();
      stmt.close();
    }
    connection.commit();
    connection.close();
  }
Example #6
0
  public void analyseLanguage() throws Exception {
    // get mysql connection
    Connection connection = MysqlInfo.getMysqlConnection();
    connection.setAutoCommit(false);
    String lanSql = "insert into language(repo_id,language,count) values(?,?,?);";
    PreparedStatement lanStmt = connection.prepareStatement(lanSql);
    String repoSql = "update repotest set language = ? where id = ?";
    PreparedStatement repoStmt = connection.prepareStatement(repoSql);

    // get repos from mongo
    MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017);
    MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3");
    FindIterable<Document> repoIterable = database.getCollection("repo").find();
    JsonParser parser = new JsonParser();
    Map<String, Integer> repoMap = new HashMap<String, Integer>();
    for (Document document : repoIterable) {
      String json = document.toJson();
      JsonObject repoJsonObject = parser.parse(json).getAsJsonObject();
      int id = repoJsonObject.get("id").getAsInt();
      String full_name = repoJsonObject.get("full_name").getAsString();
      System.out.println(id);
      repoMap.put(full_name, id);
    }

    Map<Integer, String> languageMap = new HashMap<Integer, String>();
    // the most language line of each repo
    Map<Integer, Integer> lanNumMap = new HashMap<Integer, Integer>();

    FindIterable<Document> collaboratorIterable = database.getCollection("languages").find();
    for (Document document : collaboratorIterable) {
      String json = document.toJson();
      String[] items = json.split(",")[1].split(":");
      String language = items[0].trim().replaceAll("\"", "");
      int num = Integer.parseInt(items[1].trim());

      System.out.println(language + "\t" + num);
      JsonObject lanJsonObject = parser.parse(json).getAsJsonObject();
      String repoName = lanJsonObject.get("fn").getAsString();
      int repo_id = repoMap.get(repoName);

      if (lanNumMap.containsKey(repo_id)) {
        if (num >= lanNumMap.get(repo_id)) {
          languageMap.put(repo_id, language);
          lanNumMap.put(repo_id, num);
        }
      } else {
        languageMap.put(repo_id, language);
        lanNumMap.put(repo_id, num);
      }
      lanStmt.setInt(1, repo_id);
      lanStmt.setString(2, language);
      lanStmt.setInt(3, num);
      lanStmt.execute();
    }

    Set<Integer> keySet = languageMap.keySet();
    for (Integer repoId : keySet) {
      String language = languageMap.get(repoId);
      repoStmt.setString(1, language);
      repoStmt.setInt(2, repoId);
      repoStmt.execute();
    }

    mongoClient.close();
    connection.commit();
    lanStmt.close();
    repoStmt.close();
    connection.close();
  }
Example #7
0
  /**
   * fetch the repo info from mongo to mysql
   *
   * @throws Exception
   */
  public void getRepo() throws Exception {
    // fetch from mongo
    MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017);
    MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3");
    FindIterable<Document> repoIterable = database.getCollection("repository").find();

    // get mysql connection
    Connection connection = MysqlInfo.getMysqlConnection();
    // refresh update time
    String updateSql = "update updatetime set repo_update_time = ?";
    PreparedStatement updateStmt = connection.prepareStatement(updateSql);
    Date time = Calendar.getInstance().getTime();
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
    updateStmt.setString(1, sdf.format(time));
    updateStmt.execute();

    connection.setAutoCommit(false);
    String sql =
        "replace into repotest(id,full_name,description,fork,owner_id,owner_name,owner_type,create_time,push_time,update_time,stargazers,subscribers,fork_num,size,hot,mature,popular,nb,gitclone_url,github_url,language) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);";
    // String sql = "replace into
    // repotest(id,full_name,description,fork,owner_id,owner_name,owner_type,create_time,push_time,update_time,stargazers,subscribers,fork_num,size,hot,mature,popular,nb) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);";

    PreparedStatement stmt = connection.prepareStatement(sql);

    JsonParser parser = new JsonParser();
    for (Document document : repoIterable) {
      String json = document.toJson();
      JsonObject repoJsonObject = parser.parse(json).getAsJsonObject();
      int id = repoJsonObject.get("id").getAsInt();
      System.out.println(id);
      stmt.setInt(1, id);

      String full_Name = repoJsonObject.get("full_name").getAsString();
      System.out.println(full_Name);
      stmt.setString(2, full_Name);

      String description = "";
      if (repoJsonObject.has("description") && !repoJsonObject.get("description").isJsonNull()) {
        description = repoJsonObject.get("description").getAsString();
      }
      stmt.setString(3, description);

      boolean fork = repoJsonObject.get("fork").getAsBoolean();
      int forkNum = fork ? 1 : 0;
      stmt.setInt(4, forkNum);

      int owner_id = repoJsonObject.get("owner").getAsJsonObject().get("id").getAsInt();
      stmt.setInt(5, owner_id);

      String[] items = full_Name.split("/");
      String owner_name = items[0];
      stmt.setString(6, owner_name);

      String ownerType = repoJsonObject.get("owner").getAsJsonObject().get("type").getAsString();
      int ot_num = 1;
      if (ownerType.equals("Organization")) {
        ot_num = 2;
      }
      stmt.setInt(7, ot_num);

      String createTime = repoJsonObject.get("created_at").getAsString();
      stmt.setString(8, createTime);

      String pushTime = "";
      if (repoJsonObject.has("pushed_at") && !repoJsonObject.get("pushed_at").isJsonNull()) {
        pushTime = repoJsonObject.get("pushed_at").getAsString();
      }
      stmt.setString(9, pushTime);

      String updateTime = repoJsonObject.get("updated_at").getAsString();
      stmt.setString(10, updateTime);

      int starCount = repoJsonObject.get("stargazers_count").getAsInt();
      stmt.setInt(11, starCount);

      int subscriber = repoJsonObject.get("subscribers_count").getAsInt();
      stmt.setInt(12, subscriber);

      int forksCount = repoJsonObject.get("forks_count").getAsInt();
      stmt.setInt(13, forksCount);

      int size = repoJsonObject.get("size").getAsInt();
      stmt.setInt(14, size);

      int hot = (int) (Math.log10(starCount) * 2.5);
      if (hot > 10) {
        hot = 10;
      }
      stmt.setInt(15, hot);
      int mature = (int) (Math.log10(forksCount) * 2.5);
      if (mature > 10) {
        mature = 10;
      }
      stmt.setInt(16, mature);

      int popular = (int) (Math.log10(subscriber) * 2.5);
      if (popular > 10) {
        popular = 10;
      }
      stmt.setInt(17, popular);

      int nb = (int) (Math.log10(size) * 2.5);
      if (nb > 10) {
        nb = 10;
      }
      stmt.setInt(18, nb);

      String gitclone_url = repoJsonObject.get("clone_url").getAsString();
      stmt.setString(19, gitclone_url);

      String github_url = repoJsonObject.get("html_url").getAsString();
      stmt.setString(20, github_url);

      String language = "unknown";
      if (!repoJsonObject.get("language").isJsonNull()) {
        language = repoJsonObject.get("language").getAsString();
      }

      stmt.setString(21, language);
      stmt.execute();
    }
    connection.commit();
    stmt.close();
    connection.close();
    mongoClient.close();
  }