Ejemplo n.º 1
0
 /**
  * 提取网页中所有的IssueComment元素
  *
  * @param source
  */
 private List<IssueCommentEvent> processComment(
     NodeList nodeList, List<IssueCommentEvent> icList) {
   SimpleNodeIterator sni = nodeList.elements();
   while (sni.hasMoreNodes()) {
     Node node = sni.nextNode();
     if (node.getText().matches("div id=\"issuecomment-.*\".*+")) {
       IssueCommentEvent i = new IssueCommentEvent();
       // TODO 解析comment工作
       Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author\"");
       i.setActor(actorNode.toPlainTextString());
       Node contentNode = DownloadUtil.getSomeChild(node, "div class=\"comment-body");
       i.setCommentBody(contentNode.toPlainTextString());
       Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
       Pattern pattern = Pattern.compile("datetime=\".*\"");
       Matcher matcher = pattern.matcher(timeNode.getText());
       if (matcher.find()) {
         String time = matcher.group().split("\"")[1];
         i.setCreatedAt(time);
         System.out.println(time);
       }
       icList.add(i);
     } else {
       // 得到该节点的子节点列表
       NodeList childList = node.getChildren();
       // 孩子节点为空,说明是值节点
       if (null != childList) { // 如果孩子结点不为空则递归调用
         processComment(childList, icList);
       }
     }
   }
   return icList;
 }
Ejemplo n.º 2
0
  /**
   * 处理开启pullrequest的需求
   *
   * @param nodeList
   * @param pList
   * @return
   */
  public List<PullRequestEvent> processOpenPull(NodeList nodeList, List<PullRequestEvent> pList) {
    SimpleNodeIterator sni = nodeList.elements();
    while (sni.hasMoreNodes()) {
      Node node = sni.nextNode();
      if (node.getText().contains("div id=\"issue-")) {
        PullRequestEvent pullRequestEvent = new PullRequestEvent();
        pullRequestEvent.setAction("open");
        Node commentNode = DownloadUtil.getSomeChild(node, "div class=\"comment-body");
        pullRequestEvent.setBody(commentNode.toPlainTextString());
        Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author");
        pullRequestEvent.setActor(actorNode.toPlainTextString());
        Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          pullRequestEvent.setCreatedAt(time);
        }
        pList.add(pullRequestEvent);

      } else {
        // 得到该节点的子节点列表
        NodeList childList = node.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processOpenPull(childList, pList);
        }
      }
    }
    return pList;
  }
Ejemplo n.º 3
0
  public List<PullRequestReviewCommentEvent> processSubPullRequestReviewComment(
      NodeList nodeList, List<PullRequestReviewCommentEvent> prList, String discussionId) {
    SimpleNodeIterator sni2 = nodeList.elements();
    while (sni2.hasMoreNodes()) {
      Node node2 = sni2.nextNode();
      if (node2.getText().contains("div id=\"discussion_r")) {
        PullRequestReviewCommentEvent p = new PullRequestReviewCommentEvent();
        // TODO 解析comment工作
        p.setDiscussionId(discussionId);

        Node actorNode = DownloadUtil.getSomeChild(node2, "class=\"author\"");
        p.setActor(actorNode.toPlainTextString());
        System.out.println(actorNode.toPlainTextString());
        Node contentNode = DownloadUtil.getSomeChild(node2, "div class=\"comment-body");
        p.setCommentBody(contentNode.toPlainTextString());
        System.out.println(contentNode.toPlainTextString().trim());
        Node timeNode = DownloadUtil.getSomeChild(node2, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          p.setCreatedAt(time);
        }
        prList.add(p);
      } else {
        // 得到该节点的子节点列表
        NodeList childList = node2.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processSubPullRequestReviewComment(childList, prList, discussionId);
        }
      }
    }
    return prList;
  }
Ejemplo n.º 4
0
  /**
   * 提取网页中的删除操作
   *
   * @param nodeList
   * @param dList
   * @return
   */
  public List<DeleteEvent> processDelete(NodeList nodeList, List<DeleteEvent> dList) {
    SimpleNodeIterator sni = nodeList.elements();
    while (sni.hasMoreNodes()) {
      Node node = sni.nextNode();
      if (node.getText().contains("discussion-item-head_ref_deleted")) {
        DeleteEvent d = new DeleteEvent();
        // TODO 解析comment工作
        Node deleteNode = DownloadUtil.getSomeChild(node, "span title=\"");
        d.setRef(deleteNode.getText().split("\"")[1]);
        System.out.println(deleteNode.getText().split("\"")[1]);
        Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author\"");
        d.setActor(actorNode.toPlainTextString());
        System.out.println(actorNode.toPlainTextString());
        Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          d.setDeleteAt(time);
        }
        dList.add(d);
      } else {
        // 得到该节点的子节点列表
        NodeList childList = node.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processDelete(childList, dList);
        }
      }
    }

    return dList;
  }
Ejemplo n.º 5
0
  /** Test scan with data which is of diff nodes type */
  public void testScan() throws ParserException {
    createParser(
        "<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>", "http://www.yahoo.com");
    parser.setNodeFactory(
        new PrototypicalNodeFactory(
            new Tag[] {
              new LinkTag(), new ImageTag(),
            }));
    parseAndAssertNodeCount(1);
    assertTrue("Node should be a link node", node[0] instanceof LinkTag);

    LinkTag linkTag = (LinkTag) node[0];
    // Get the link data and cross-check
    Node[] dataNode = new Node[10];
    int i = 0;
    for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) {
      dataNode[i++] = e.nextNode();
    }
    assertEquals("Number of data nodes", new Integer(2), new Integer(i));
    assertTrue("First data node should be an Image Node", dataNode[0] instanceof ImageTag);
    assertTrue("Second data node shouls be a String Node", dataNode[1] instanceof Text);

    // Check the contents of each data node
    ImageTag imageTag = (ImageTag) dataNode[0];
    assertEquals("Image URL", "http://www.yahoo.com/abcd.jpg", imageTag.getImageURL());
    Text stringNode = (Text) dataNode[1];
    assertEquals("String Contents", "Hello World", stringNode.getText());
  }
Ejemplo n.º 6
0
 public void testLinkDataContents() throws ParserException {
   createParser(
       "<a href=\"http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689\" target=\"_new\"><img src=\"http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif\" width=468 height=60 border=\"0\" alt=\"See Signs in Theaters 8-2 - Starring Mel Gibson\" align=><font face=\"verdana,arial,helvetica\" SIZE=\"1\"><b></b></font></a>",
       "http://transfer.go.com");
   parser.setNodeFactory(
       new PrototypicalNodeFactory(
           new Tag[] {
             new LinkTag(), new ImageTag(),
           }));
   parseAndAssertNodeCount(1);
   assertTrue("Node 0 should be a link tag", node[0] instanceof LinkTag);
   LinkTag linkTag = (LinkTag) node[0];
   assertEquals(
       "Link URL",
       "http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689",
       linkTag.getLink());
   assertEquals("Link Text", "", linkTag.getLinkText());
   Node[] containedNodes = new Node[10];
   int i = 0;
   for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) {
     containedNodes[i++] = e.nextNode();
   }
   assertEquals("There should be 5 contained nodes in the link tag", 5, i);
   assertTrue(
       "First contained node should be an image tag", containedNodes[0] instanceof ImageTag);
   ImageTag imageTag = (ImageTag) containedNodes[0];
   assertEquals(
       "Image Location",
       "http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif",
       imageTag.getImageURL());
   assertEquals("Image Height", "60", imageTag.getAttribute("HEIGHT"));
   assertEquals("Image Width", "468", imageTag.getAttribute("WIDTH"));
   assertEquals("Image Border", "0", imageTag.getAttribute("BORDER"));
   assertEquals(
       "Image Alt",
       "See Signs in Theaters 8-2 - Starring Mel Gibson",
       imageTag.getAttribute("ALT"));
   assertTrue("Second contained node should be Tag", containedNodes[1] instanceof Tag);
   Tag tag1 = (Tag) containedNodes[1];
   assertEquals(
       "Tag Contents", "font face=\"verdana,arial,helvetica\" SIZE=\"1\"", tag1.getText());
   assertTrue("Third contained node should be Tag", containedNodes[2] instanceof Tag);
   Tag tag2 = (Tag) containedNodes[2];
   assertEquals("Tag Contents", "b", tag2.getText());
   assertTrue("Fourth contained node should be a Tag", containedNodes[3] instanceof Tag);
   Tag tag = (Tag) containedNodes[3];
   assertTrue("Fourth contained node should be an EndTag", tag.isEndTag());
   assertEquals("Fourth Tag contents", "/b", tag.getText());
   assertTrue("Fifth contained node should be a Tag", containedNodes[4] instanceof Tag);
   tag = (Tag) containedNodes[4];
   assertTrue("Fifth contained node should be an EndTag", tag.isEndTag());
   assertEquals("Fifth Tag contents", "/font", tag.getText());
 }
Ejemplo n.º 7
0
  /**
   * Gets a frame by name. Names are checked without case sensitivity and conversion to uppercase is
   * performed with the locale provided.
   *
   * @param name The name of the frame to retrieve.
   * @param locale The locale to use when converting to uppercase.
   * @return The specified frame or <code>null</code> if it wasn't found.
   */
  public FrameTag getFrame(String name, Locale locale) {
    Node node;
    FrameTag ret;

    ret = null;

    name = name.toUpperCase(locale);
    for (SimpleNodeIterator e = getFrames().elements(); e.hasMoreNodes() && (null == ret); ) {
      node = e.nextNode();
      if (node instanceof FrameTag) {
        ret = (FrameTag) node;
        if (!ret.getFrameName().toUpperCase(locale).equals(name)) ret = null;
      }
    }

    return (ret);
  }
Ejemplo n.º 8
0
 /**
  * 处理对pullrequest的review时,comment的操作, 与processSubPullRequestReviewComment配合一起使用
  *
  * @param nodeList
  * @param prList
  * @return
  */
 public List<PullRequestReviewCommentEvent> processPullRequestReviewComment(
     NodeList nodeList, List<PullRequestReviewCommentEvent> prList) {
   SimpleNodeIterator sni = nodeList.elements();
   while (sni.hasMoreNodes()) {
     Node node = sni.nextNode();
     if (node.getText().contains("div id=\"diff-for-comment-")) {
       String discussionId = node.getText().split("\"")[1];
       System.out.println(discussionId);
       NodeList subNodeList = node.getChildren();
       prList = processSubPullRequestReviewComment(subNodeList, prList, discussionId);
     } else {
       // 得到该节点的子节点列表
       NodeList childList = node.getChildren();
       // 孩子节点为空,说明是值节点
       if (null != childList) { // 如果孩子结点不为空则递归调用
         processPullRequestReviewComment(childList, prList);
       }
     }
   }
   return prList;
 }
Ejemplo n.º 9
0
 private void processNodeList(NodeList list, List<String> valueList) {
   // 迭代开始
   SimpleNodeIterator iterator = list.elements();
   while (iterator.hasMoreNodes()) {
     Node node = iterator.nextNode();
     // 得到该节点的子节点列表
     NodeList childList = node.getChildren();
     // 孩子节点为空,说明是值节点
     if (null == childList) {
       // 得到值节点的值
       String result = node.toPlainTextString().trim();
       // 若包含关键字,则简单打印出来文本
       // System.out.println(result);
       if (result != null && !"".equals(result)) valueList.add(result);
     } // end if
     // 孩子节点不为空,继续迭代该孩子节点
     else {
       processNodeList(childList, valueList);
     } // end else
   } // end wile
 }
Ejemplo n.º 10
0
  /**
   * 处理Reference了当前pullrequest的操作
   *
   * @param source
   */
  public List<PullRequestEvent> processReference(NodeList nodeList, List<PullRequestEvent> pList) {
    SimpleNodeIterator sni = nodeList.elements();
    while (sni.hasMoreNodes()) {
      Node node = sni.nextNode();
      if (node.getText().contains("div class=\"discussion-item discussion-item-ref\"")) {
        PullRequestEvent pullRequestEvent = new PullRequestEvent();
        pullRequestEvent.setAction("ref");
        Node anotherAtifactNode = DownloadUtil.getSomeChild(node, "class=\"title-link\"");
        pullRequestEvent.setBody(
            anotherAtifactNode == null ? "" : anotherAtifactNode.toPlainTextString());
        Pattern artifactPattern = Pattern.compile("[a-zA-Z]+/[a-zA-Z]+/[a-zA-Z]+/[a-z[0-9]]+");
        Matcher artifactMatcher =
            artifactPattern.matcher(anotherAtifactNode == null ? "" : anotherAtifactNode.getText());
        if (artifactMatcher.find()) {
          String anotherAtifact = artifactMatcher.group();
          pullRequestEvent.setPullrequestBaseRef(anotherAtifact);
          System.out.println(anotherAtifact);
        }
        Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author\"");
        pullRequestEvent.setActor(actorNode == null ? "" : actorNode.toPlainTextString());
        Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          pullRequestEvent.setCreatedAt(time);
        }
        pList.add(pullRequestEvent);

      } else {
        // 得到该节点的子节点列表
        NodeList childList = node.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processReference(childList, pList);
        }
      }
    }
    return pList;
  }
Ejemplo n.º 11
0
  /**
   * 处理labeled操作
   *
   * @param source
   */
  public List<PullRequestEvent> processLabled(NodeList nodeList, List<PullRequestEvent> pList) {
    SimpleNodeIterator sni = nodeList.elements();
    while (sni.hasMoreNodes()) {
      Node node = sni.nextNode();
      if (node.getText().contains("class=\"discussion-item discussion-item-labeled\"")) {
        PullRequestEvent pullRequestEvent = new PullRequestEvent();
        pullRequestEvent.setAction("labeled");
        List<Node> lableList = new ArrayList<Node>();
        lableList = DownloadUtil.getLableList(node, "style=\"color:", lableList);
        String lables = "";
        for (int i = 0; i < lableList.size(); i++) {
          lables += lableList.get(i).toPlainTextString();
          if (i != lableList.size() - 1) {
            lables += ",";
          }
        }
        System.out.println(lables);
        pullRequestEvent.setPullrequestBaseLabels(lables);
        Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author\"");
        pullRequestEvent.setActor(actorNode.toPlainTextString());
        Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          pullRequestEvent.setCreatedAt(time);
        }
        pList.add(pullRequestEvent);

      } else {
        // 得到该节点的子节点列表
        NodeList childList = node.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processLabled(childList, pList);
        }
      }
    }
    return pList;
  }
Ejemplo n.º 12
0
  public void testBadImageInLinkBug() throws ParserException {
    createParser(
        "<a href=\"registration.asp?EventID=1272\"><img border=\"0\" src=\"\\images\\register.gif\"</a>",
        "http://www.fedpage.com/Event.asp?EventID=1272");
    parseAndAssertNodeCount(1);
    assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
    LinkTag linkTag = (LinkTag) node[0];
    // Get the image tag from the link

    Node insideNodes[] = new Node[10];
    int j = 0;
    for (SimpleNodeIterator e = linkTag.children(); e.hasMoreNodes(); ) {
      insideNodes[j++] = e.nextNode();
    }
    assertEquals("Number of contained internal nodes", 1, j);
    assertTrue(insideNodes[0] instanceof ImageTag);
    ImageTag imageTag = (ImageTag) insideNodes[0];
    assertEquals(
        "Image Tag Location",
        "http://www.fedpage.com/images\\register.gif",
        imageTag.getImageURL());
  }
Ejemplo n.º 13
0
  /**
   * 处理取消指派某人操作
   *
   * <p>跟之前一样,取消指派的是后面的家伙
   *
   * @param nodeList
   * @param pList
   * @return
   */
  private List<PullRequestEvent> processUnassigned(
      NodeList nodeList, List<PullRequestEvent> pList) {
    SimpleNodeIterator sni = nodeList.elements();
    while (sni.hasMoreNodes()) {
      Node node = sni.nextNode();
      if (node.getText().contains("class=\"discussion-item discussion-item-unassigned\"")) {
        PullRequestEvent pEvent = new PullRequestEvent();
        pEvent.setAction("assigned");
        Node assignedNode = DownloadUtil.getSomeChild(node, "class=\"author\"");
        pEvent.setPullrequestAssgnee(assignedNode.toPlainTextString());
        Node actorNode = DownloadUtil.getSomeChild(node, "class=\"discussion-item-entity\"");
        if (actorNode != null) {
          pEvent.setActor(actorNode.toPlainTextString());
        } else {
          pEvent.setActor(assignedNode.toPlainTextString());
        }
        System.out.println(actorNode.toPlainTextString());
        Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          pEvent.setCreatedAt(time);
        }
        pList.add(pEvent);

      } else {
        // 得到该节点的子节点列表
        NodeList childList = node.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processUnassigned(childList, pList);
        }
      }
    }
    return pList;
  }
Ejemplo n.º 14
0
  /**
   * 处理移除里程碑动作
   *
   * @param nodeList
   * @param pList
   * @return
   */
  public List<PullRequestEvent> processRemoveMileStone(
      NodeList nodeList, List<PullRequestEvent> pList) {
    SimpleNodeIterator sni = nodeList.elements();
    while (sni.hasMoreNodes()) {
      Node node = sni.nextNode();
      if (node.getText().contains("div class=\"discussion-item discussion-item-demilestoned\"")) {
        PullRequestEvent p = new PullRequestEvent();
        p.setAction("removeMilestone");
        Node milestoneNode = DownloadUtil.getSomeChild(node, "class=\"discussion-item-entity\"");
        Pattern milestonePattern = Pattern.compile("[a-zA-Z]+/[a-zA-Z]+/[a-zA-Z]+/.*+");
        Matcher milestoneMatcher = milestonePattern.matcher(milestoneNode.getText());
        if (milestoneMatcher.find()) {
          String milestone = milestoneMatcher.group().split("\"")[0];
          p.setBody(milestone);
        }
        Node actorNode = DownloadUtil.getSomeChild(node, "class=\"author\"");
        p.setActor(actorNode.toPlainTextString());
        Node timeNode = DownloadUtil.getSomeChild(node, "datetime");
        Pattern pattern = Pattern.compile("datetime=\".*\"");
        Matcher matcher = pattern.matcher(timeNode.getText());
        if (matcher.find()) {
          String time = matcher.group().split("\"")[1];
          p.setCreatedAt(time);
        }
        pList.add(p);

      } else {
        // 得到该节点的子节点列表
        NodeList childList = node.getChildren();
        // 孩子节点为空,说明是值节点
        if (null != childList) { // 如果孩子结点不为空则递归调用
          processRemoveMileStone(childList, pList);
        }
      }
    }
    return pList;
  }
Ejemplo n.º 15
0
  /**
   * Output a string representing this object tag.
   *
   * @return A string showing the contents of the object tag.
   */
  public String toString() {
    HashMap parameters;
    Iterator params;
    String paramName;
    String paramValue;
    boolean found;
    Node node;
    StringBuffer ret;

    ret = new StringBuffer(500);
    ret.append("Object Tag\n");
    ret.append("**********\n");
    ret.append("ClassId = ");
    ret.append(getObjectClassId());
    ret.append("\n");
    ret.append("CodeBase = ");
    ret.append(getObjectCodeBase());
    ret.append("\n");
    ret.append("CodeType = ");
    ret.append(getObjectCodeType());
    ret.append("\n");
    ret.append("Data = ");
    ret.append(getObjectData());
    ret.append("\n");
    ret.append("Height = ");
    ret.append(getObjectHeight());
    ret.append("\n");
    ret.append("Standby = ");
    ret.append(getObjectStandby());
    ret.append("\n");
    ret.append("Type = ");
    ret.append(getObjectType());
    ret.append("\n");
    ret.append("Width = ");
    ret.append(getObjectWidth());
    ret.append("\n");
    parameters = getObjectParams();
    params = parameters.entrySet().iterator();
    if (null == params) ret.append("No Params found.\n");
    else
      for (int cnt = 0; params.hasNext(); cnt++) {
        Map.Entry entry = (Entry) params.next();
        paramName = (String) entry.getKey();
        paramValue = (String) entry.getValue();
        ret.append(cnt);
        ret.append(": Parameter name = ");
        ret.append(paramName);
        ret.append(", Parameter value = ");
        ret.append(paramValue);
        ret.append("\n");
      }
    found = false;
    for (SimpleNodeIterator e = children(); e.hasMoreNodes(); ) {
      node = e.nextNode();
      if (node instanceof Tag) if (((Tag) node).getTagName().equals("PARAM")) continue;
      if (!found) ret.append("Miscellaneous items :\n");
      else ret.append(" ");
      found = true;
      ret.append(node.toString());
    }
    if (found) ret.append("\n");
    ret.append("End of Object Tag\n");
    ret.append("*****************\n");

    return (ret.toString());
  }
Ejemplo n.º 16
0
  private void scanPage() throws IOException, ParserException, ParseException {
    URL u = new URL(this.url);
    HttpURLConnection conn = (HttpURLConnection) u.openConnection();
    Parser parser = new Parser(conn);
    System.setProperty("sun.net.client.defaultConnectTimeout", "30000000"); // jdk1.4换成这个,连接超时
    System.setProperty("sun.net.client.defaultReadTimeout", "30000000"); // jdk1.4换成这个,读操作超时
    // con.setConnectTimeout(5000);//jdk 1.5换成这个,连接超时
    // con.setReadTimeout(5000);//jdk 1.5换成这个,读操作超时
    parser.setEncoding("UTF-8");
    NodeFilter filter = new NodeClassFilter(CompositeTag.class);
    NodeList tags = parser.extractAllNodesThatMatch(filter);
    SimpleNodeIterator iter = tags.elements();

    CompositeTag tag = null;
    while (iter.hasMoreNodes()) {
      tag = (CompositeTag) iter.nextNode();
      String id = tag.getAttribute("id");
      String cls = tag.getAttribute("class");
      if ((tag instanceof LinkTag)) {
        LinkTag lt = (LinkTag) tag;

        if (cls == null) {
          continue;
        }
        if (cls.startsWith("gae-click*Product-Page*Breadcrumb*Category")) {
          this.category = lt.getStringText();
          continue;
        }
        if (cls.startsWith("gae-click*Product-Page*Breadcrumb*Sub-Category")) {
          this.subCategory = lt.getStringText();
          continue;
        }
        if (cls.startsWith("gae-click*Product-Page*Breadcrumb*Brand")) {
          this.brand = lt.getStringText();
          continue;
        }
        if (cls.startsWith("gae-click*Product-Page*PrForm*Free-Shipping")) {
          this.freight = "Free Shipping!";
        } else if (cls.equalsIgnoreCase("link fn")) {
          this.pname = lt.getStringText();
          continue;
        }
      } else if ((tag instanceof LabelTag)) {
        LabelTag lt = (LabelTag) tag;
        if ((id != null) && (id.startsWith("label")) && (cls != null) && (cls.startsWith("d"))) {
          String l = lt.getLabel();
          l = l.replace("\n", "");
          int idx = l.indexOf(40);
          if (idx > 0) {
            l = l.substring(0, idx);
          }
          this.dimNames.put(cls, l);
        }
      } else if (!(tag instanceof SelectTag)) {
        if ((tag instanceof Span)) {
          if ((id != null) && (id.equalsIgnoreCase("sku"))) {
            String sku = tag.getStringText();
            this.pid = sku.substring(sku.indexOf(35) + 1);
          }
        } else if ((tag instanceof Bullet)) {
          Bullet b = (Bullet) tag;
          String text = b.getStringText().trim();

          if (text.startsWith("Weight")) {
            int idx = text.indexOf(":");
            this.weight = text.substring(idx + 1).trim();
          }

        } else if ((tag instanceof Div)) {
          Div div = (Div) tag;
          if (cls == null) {
            continue;
          }
          if (cls.equalsIgnoreCase("description")) {
            StringBuilder sb = new StringBuilder();
            BulletList bullets = (BulletList) div.getChild(0);
            SimpleNodeIterator bls = bullets.elements();
            while (bls.hasMoreNodes()) {
              Node n = bls.nextNode();
              if ((n instanceof Bullet)) {
                Bullet bl = (Bullet) n;
                sb.append(bl.getStringText());
              }
            }
            this.intro = sb.toString();
          }
        } else if ((this.items == null) && ((tag instanceof ScriptTag))) {
          this.items = readScript((ScriptTag) tag);
        }
      }
    }
  }