public void getIssueAndPull() throws Exception { MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017); MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3"); FindIterable<Document> issueIterable = database.getCollection("issueandpull").find(); Connection connection = MysqlInfo.getMysqlConnection(); connection.setAutoCommit(false); String sql = "update repotest set open_issues = ?,closed_issues = ?,open_pull=?,closed_pull=? where full_name = ?"; PreparedStatement stmt = connection.prepareStatement(sql); JsonParser parser = new JsonParser(); for (Document document : issueIterable) { String json = document.toJson(); JsonObject repoIssue = parser.parse(json).getAsJsonObject(); int openIssue = repoIssue.get("openissue").getAsInt(); int closedIssue = repoIssue.get("closedissue").getAsInt(); int openPull = repoIssue.get("openpull").getAsInt(); int closedPull = repoIssue.get("closedpull").getAsInt(); String repoName = repoIssue.get("fn").getAsString(); System.out.println(repoName); stmt.setInt(1, openIssue); stmt.setInt(2, closedIssue); stmt.setInt(3, openPull); stmt.setInt(4, closedPull); stmt.setString(5, repoName); stmt.execute(); } connection.commit(); connection.close(); mongoClient.close(); }
public void getCommitCount() throws Exception { MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017); MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3"); FindIterable<Document> issueIterable = database.getCollection("commitnumber").find(); Connection connection = MysqlInfo.getMysqlConnection(); connection.setAutoCommit(false); JsonParser parser = new JsonParser(); for (Document document : issueIterable) { String json = document.toJson(); JsonObject repoJsonObject = parser.parse(json).getAsJsonObject(); int commit = repoJsonObject.get("commitnumber").getAsInt(); String full_name = repoJsonObject.get("fn").getAsString(); System.out.println(full_name); String sql = "update repotest set commit = ? where full_name = ?"; PreparedStatement stmt = connection.prepareStatement(sql); stmt.setInt(1, commit); stmt.setString(2, full_name); stmt.execute(); } connection.commit(); connection.close(); mongoClient.close(); }
public void getCollaborators() throws Exception { // get mysql connection Connection connection = MysqlInfo.getMysqlConnection(); connection.setAutoCommit(false); String conSql = "insert into collaborator(user_id,repo_id) values(?,?);"; PreparedStatement conStmt = connection.prepareStatement(conSql); String repoSql = "update repotest set collaborator = ? where id = ?"; PreparedStatement repoStmt = connection.prepareStatement(repoSql); // get repos from mongo MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017); MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3"); FindIterable<Document> repoIterable = database.getCollection("repo").find(); JsonParser parser = new JsonParser(); Map<String, Integer> repoMap = new HashMap<String, Integer>(); for (Document document : repoIterable) { String json = document.toJson(); JsonObject repoJsonObject = parser.parse(json).getAsJsonObject(); int id = repoJsonObject.get("id").getAsInt(); String full_name = repoJsonObject.get("full_name").getAsString(); System.out.println(id); repoMap.put(full_name, id); } Map<Integer, Integer> collaboratorMap = new HashMap<Integer, Integer>(); FindIterable<Document> collaboratorIterable = database.getCollection("assignees").find(); for (Document document : collaboratorIterable) { String json = document.toJson(); JsonObject contriJsonObject = parser.parse(json).getAsJsonObject(); int id = contriJsonObject.get("id").getAsInt(); String repoName = contriJsonObject.get("fn").getAsString(); int repo_id = repoMap.get(repoName); conStmt.setInt(1, id); conStmt.setInt(2, repo_id); conStmt.execute(); if (collaboratorMap.containsKey(repo_id)) { collaboratorMap.put(repo_id, collaboratorMap.get(repo_id) + 1); } else { collaboratorMap.put(repo_id, 1); } } Set<Integer> keySet = collaboratorMap.keySet(); for (Integer repoId : keySet) { int contri_count = collaboratorMap.get(repoId); repoStmt.setInt(1, contri_count); repoStmt.setInt(2, repoId); repoStmt.execute(); } mongoClient.close(); connection.commit(); conStmt.close(); repoStmt.close(); connection.close(); }
public static void main(String[] args) { // TODO Auto-generated method stub String[] repo = { "c9s/App-gh", "joshsh/sesametools", "jbr/sibilant", "r1k0/kigen" }; // "pouchdb/pouchdb" Mongo mongo = new Mongo(MongoInfo.getMongoServerIp(), 27017); DB db = mongo.getDB("ghcrawlerV3"); DBCollection pullcache = db.getCollection("pullcacheB"); DBCollection issuecache = db.getCollection("issuecacheB"); DBCollection pulls = db.getCollection("pullscp"); DBCollection issues = db.getCollection("issuescp"); for (int i = 0; i < repo.length; i++) { pullcache.drop(); issuecache.drop(); PullCrawlerB pullCrawler = new PullCrawlerB(); IssueCrawlerB issueCrawler = new IssueCrawlerB(); issueCrawler.crawlIssues(repo[i]); pullCrawler.crawlPulls(repo[i]); DBCursor issuecursor = issuecache.find(); issuecursor.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT); while (issuecursor.hasNext()) { issues.save(issuecursor.next()); } issuecursor.close(); DBCursor pullcursor = pullcache.find(); pullcursor.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT); while (pullcursor.hasNext()) { pulls.save(pullcursor.next()); } pullcursor.close(); } }
public void analysisClasses() throws Exception { // Map<String,Integer> wordMap = new HashMap<String, Integer>(); // String sql = "select description from repotest"; // Connection connection = MysqlInfo.getMysqlConnection(); // PreparedStatement stmt = connection.prepareStatement(sql); // ResultSet resultSet = stmt.executeQuery(); // while (resultSet.next()) { // String description = resultSet.getString("description"); // String[] items = description.split(" "); // for (String item : items) { // if(!wordMap.containsKey(item)){ // wordMap.put(item, 1); // }else{ // wordMap.put(item, wordMap.get(item)+1); // System.out.println(item+"---------------"); // } // } // } Mongo mongo = new Mongo(MongoInfo.getMongoServerIp(), 27017); DB db = mongo.getDB("ghcrawlerV3"); DBCollection repo = db.getCollection("repository"); DBCursor repos = repo.find(); repos.addOption(com.mongodb.Bytes.QUERYOPTION_NOTIMEOUT); HashMap<String, Integer> map = new HashMap<String, Integer>(); while (repos.hasNext()) { DBObject object = repos.next(); if (object.get("description") != null) { // System.out.println(object.get("description").toString().replaceAll("[^a-zA-Z'0-9]", " // ").replaceAll("\\s+", " ")); String description = object .get("description") .toString() .replaceAll("[^a-zA-Z0-9]", " ") .replaceAll("\\s+", " "); for (int i = 0; i < description.split(" ").length; i++) { String word = description.split(" ")[i].toLowerCase(); if (map.containsKey(word)) { map.put(word, map.get(word) + 1); } else { map.put(word, 1); } } } } Connection connection = MysqlInfo.getMysqlConnection(); String sqlInsert = "replace into word values(?,?)"; Set<String> words = map.keySet(); connection.setAutoCommit(false); for (String word : words) { int count = map.get(word); if (count < 2) { continue; } PreparedStatement stmt = connection.prepareStatement(sqlInsert); stmt.setString(1, word); stmt.setInt(2, count); stmt.execute(); stmt.close(); } connection.commit(); connection.close(); }
public void analyseLanguage() throws Exception { // get mysql connection Connection connection = MysqlInfo.getMysqlConnection(); connection.setAutoCommit(false); String lanSql = "insert into language(repo_id,language,count) values(?,?,?);"; PreparedStatement lanStmt = connection.prepareStatement(lanSql); String repoSql = "update repotest set language = ? where id = ?"; PreparedStatement repoStmt = connection.prepareStatement(repoSql); // get repos from mongo MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017); MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3"); FindIterable<Document> repoIterable = database.getCollection("repo").find(); JsonParser parser = new JsonParser(); Map<String, Integer> repoMap = new HashMap<String, Integer>(); for (Document document : repoIterable) { String json = document.toJson(); JsonObject repoJsonObject = parser.parse(json).getAsJsonObject(); int id = repoJsonObject.get("id").getAsInt(); String full_name = repoJsonObject.get("full_name").getAsString(); System.out.println(id); repoMap.put(full_name, id); } Map<Integer, String> languageMap = new HashMap<Integer, String>(); // the most language line of each repo Map<Integer, Integer> lanNumMap = new HashMap<Integer, Integer>(); FindIterable<Document> collaboratorIterable = database.getCollection("languages").find(); for (Document document : collaboratorIterable) { String json = document.toJson(); String[] items = json.split(",")[1].split(":"); String language = items[0].trim().replaceAll("\"", ""); int num = Integer.parseInt(items[1].trim()); System.out.println(language + "\t" + num); JsonObject lanJsonObject = parser.parse(json).getAsJsonObject(); String repoName = lanJsonObject.get("fn").getAsString(); int repo_id = repoMap.get(repoName); if (lanNumMap.containsKey(repo_id)) { if (num >= lanNumMap.get(repo_id)) { languageMap.put(repo_id, language); lanNumMap.put(repo_id, num); } } else { languageMap.put(repo_id, language); lanNumMap.put(repo_id, num); } lanStmt.setInt(1, repo_id); lanStmt.setString(2, language); lanStmt.setInt(3, num); lanStmt.execute(); } Set<Integer> keySet = languageMap.keySet(); for (Integer repoId : keySet) { String language = languageMap.get(repoId); repoStmt.setString(1, language); repoStmt.setInt(2, repoId); repoStmt.execute(); } mongoClient.close(); connection.commit(); lanStmt.close(); repoStmt.close(); connection.close(); }
/** * fetch the repo info from mongo to mysql * * @throws Exception */ public void getRepo() throws Exception { // fetch from mongo MongoClient mongoClient = new MongoClient(MongoInfo.getMongoServerIp(), 27017); MongoDatabase database = mongoClient.getDatabase("ghcrawlerV3"); FindIterable<Document> repoIterable = database.getCollection("repository").find(); // get mysql connection Connection connection = MysqlInfo.getMysqlConnection(); // refresh update time String updateSql = "update updatetime set repo_update_time = ?"; PreparedStatement updateStmt = connection.prepareStatement(updateSql); Date time = Calendar.getInstance().getTime(); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); updateStmt.setString(1, sdf.format(time)); updateStmt.execute(); connection.setAutoCommit(false); String sql = "replace into repotest(id,full_name,description,fork,owner_id,owner_name,owner_type,create_time,push_time,update_time,stargazers,subscribers,fork_num,size,hot,mature,popular,nb,gitclone_url,github_url,language) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);"; // String sql = "replace into // repotest(id,full_name,description,fork,owner_id,owner_name,owner_type,create_time,push_time,update_time,stargazers,subscribers,fork_num,size,hot,mature,popular,nb) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?);"; PreparedStatement stmt = connection.prepareStatement(sql); JsonParser parser = new JsonParser(); for (Document document : repoIterable) { String json = document.toJson(); JsonObject repoJsonObject = parser.parse(json).getAsJsonObject(); int id = repoJsonObject.get("id").getAsInt(); System.out.println(id); stmt.setInt(1, id); String full_Name = repoJsonObject.get("full_name").getAsString(); System.out.println(full_Name); stmt.setString(2, full_Name); String description = ""; if (repoJsonObject.has("description") && !repoJsonObject.get("description").isJsonNull()) { description = repoJsonObject.get("description").getAsString(); } stmt.setString(3, description); boolean fork = repoJsonObject.get("fork").getAsBoolean(); int forkNum = fork ? 1 : 0; stmt.setInt(4, forkNum); int owner_id = repoJsonObject.get("owner").getAsJsonObject().get("id").getAsInt(); stmt.setInt(5, owner_id); String[] items = full_Name.split("/"); String owner_name = items[0]; stmt.setString(6, owner_name); String ownerType = repoJsonObject.get("owner").getAsJsonObject().get("type").getAsString(); int ot_num = 1; if (ownerType.equals("Organization")) { ot_num = 2; } stmt.setInt(7, ot_num); String createTime = repoJsonObject.get("created_at").getAsString(); stmt.setString(8, createTime); String pushTime = ""; if (repoJsonObject.has("pushed_at") && !repoJsonObject.get("pushed_at").isJsonNull()) { pushTime = repoJsonObject.get("pushed_at").getAsString(); } stmt.setString(9, pushTime); String updateTime = repoJsonObject.get("updated_at").getAsString(); stmt.setString(10, updateTime); int starCount = repoJsonObject.get("stargazers_count").getAsInt(); stmt.setInt(11, starCount); int subscriber = repoJsonObject.get("subscribers_count").getAsInt(); stmt.setInt(12, subscriber); int forksCount = repoJsonObject.get("forks_count").getAsInt(); stmt.setInt(13, forksCount); int size = repoJsonObject.get("size").getAsInt(); stmt.setInt(14, size); int hot = (int) (Math.log10(starCount) * 2.5); if (hot > 10) { hot = 10; } stmt.setInt(15, hot); int mature = (int) (Math.log10(forksCount) * 2.5); if (mature > 10) { mature = 10; } stmt.setInt(16, mature); int popular = (int) (Math.log10(subscriber) * 2.5); if (popular > 10) { popular = 10; } stmt.setInt(17, popular); int nb = (int) (Math.log10(size) * 2.5); if (nb > 10) { nb = 10; } stmt.setInt(18, nb); String gitclone_url = repoJsonObject.get("clone_url").getAsString(); stmt.setString(19, gitclone_url); String github_url = repoJsonObject.get("html_url").getAsString(); stmt.setString(20, github_url); String language = "unknown"; if (!repoJsonObject.get("language").isJsonNull()) { language = repoJsonObject.get("language").getAsString(); } stmt.setString(21, language); stmt.execute(); } connection.commit(); stmt.close(); connection.close(); mongoClient.close(); }