/** * 提取网页中所有的IssueComment元素 * * @param source */ private List<IssueCommentEvent> processComment( NodeList nodeList, List<IssueCommentEvent> icList) { SimpleNodeIterator sni = nodeList.elements(); while (sni.hasMoreNodes()) { Node node = sni.nextNode(); if (node.getText().matches("div id=\"issuecomment-.*\".*+")) { IssueCommentEvent i = new IssueCommentEvent(); // TODO 解析comment工作 Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author\""); i.setActor(actorNode.toPlainTextString()); Node contentNode = DownloadUtil.getSomeChild(node, "div class=\"comment-body"); i.setCommentBody(contentNode.toPlainTextString()); Node timeNode = DownloadUtil.getSomeChild(node, "datetime"); Pattern pattern = Pattern.compile("datetime=\".*\""); Matcher matcher = pattern.matcher(timeNode.getText()); if (matcher.find()) { String time = matcher.group().split("\"")[1]; i.setCreatedAt(time); System.out.println(time); } icList.add(i); } else { // 得到该节点的子节点列表 NodeList childList = node.getChildren(); // 孩子节点为空,说明是值节点 if (null != childList) { // 如果孩子结点不为空则递归调用 processComment(childList, icList); } } } return icList; }
public void getDataFromPullUrl(String url) { /** 将数据填充到issueCommentEvent对象或IssuesEvent对象中,并用hibernate实现存储 */ NodeList nodeList = null; String artifactId = ""; try { String homePage = htmlAnalyzer.getResource(url); nodeList = htmlAnalyzer.getNodeListByHtmlPage(homePage); artifactId = processArtifactId(homePage); if (!(artifactId).equals(url)) { System.out.println("artifact 已经转换为其他形式"); return; } } catch (Exception e) { System.out.println("出现异常"); return; } List<PullRequestEvent> pList = new ArrayList<PullRequestEvent>(); List<IssueCommentEvent> icList = new ArrayList<IssueCommentEvent>(); List<PullRequestReviewCommentEvent> prList = new ArrayList<PullRequestReviewCommentEvent>(); List<PushEvent> psList = new ArrayList<PushEvent>(); /** * 具体处理流程,一个完整的pullrequest大概包括: 1、开启一个pullrequest请求; 2、代码的拥有者或协作者对代码进行评论; * 3、代码的拥有者对代码进行codeReview并对某一段代码进行评论 4、代码的提交者对代码进行修改 5、代码被主干的拥有者认可,最后将代码merge(push)到主干上 */ pList = this.processOpenPull(nodeList, pList); pList = this.processClosePull(nodeList, pList); pList = this.processMileStone(nodeList, pList); pList = this.processRemoveMileStone(nodeList, pList); pList = this.processReference(nodeList, pList); pList = this.processLabled(nodeList, pList); pList = this.processUnLabled(nodeList, pList); icList = this.processComment(nodeList, icList); /*prList = this.processPullRequestReviewComment(nodeList, prList);*/ pList = this.processAssigned(nodeList, pList); pList = this.processUnassigned(nodeList, pList); /** 解析所有对象中共有的信息,包括ArtifactId,网页URL,数据来源类型net,网页的repository */ for (IssueCommentEvent i : icList) { i.setArtifactId(artifactId); i.setHtmlUrl(url); i.setSourceType("net"); i.setRepo(getRepo()); } for (PullRequestEvent p : pList) { p.setArtifactId(artifactId); p.setPullrequestHtmlUrl(url); p.setSourceType("net"); p.setRepo(getRepo()); } for (PullRequestReviewCommentEvent pr : prList) { pr.setArtifactId(artifactId); pr.setHtmlUrl(url); pr.setSourceType("net"); pr.setRepo(getRepo()); } for (PushEvent ps : psList) { ps.setArtifactId(artifactId); ps.setHtmlUrl(url); ps.setSourceType("net"); ps.setRepo(getRepo()); } /** 向数据库中持久化数据 */ session.beginTransaction(); for (IssueCommentEvent ic : icList) { session.save(ic); } for (PullRequestEvent p : pList) { session.save(p); if (p.getAction().equals("open")) { ArtifactOwner artifactOwner = new ArtifactOwner(); artifactOwner.setArtifactId(artifactId); artifactOwner.setOwner(p.getActor()); session.save(artifactOwner); } } for (PullRequestReviewCommentEvent pr : prList) { session.save(pr); } for (PushEvent ps : psList) { session.save(ps); } session.getTransaction().commit(); }