Beispiel #1
0
 @Override
 public List<String> selectList(String text) {
   HtmlCleaner htmlCleaner = new HtmlCleaner();
   TagNode tagNode = htmlCleaner.clean(text);
   if (tagNode == null) {
     return null;
   }
   List<String> results = new ArrayList<String>();
   try {
     Object[] objects = tagNode.evaluateXPath(xpathStr);
     if (objects != null && objects.length >= 1) {
       for (Object object : objects) {
         if (object instanceof TagNode) {
           TagNode tagNode1 = (TagNode) object;
           results.add(htmlCleaner.getInnerHtml(tagNode1));
         } else {
           results.add(object.toString());
         }
       }
     }
   } catch (XPatherException e) {
     e.printStackTrace();
   }
   return results;
 }
Beispiel #2
0
    @Override
    public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {
      String src = node.getAttributeByName("src");

      if (src == null) {
        src = node.getAttributeByName("href");
      }

      if (src == null) {
        src = node.getAttributeByName("xlink:href");
      }
      builder.append("\uFFFC");

      String resolvedHref = spine.resolveHref(src);

      if (imageCache.containsKey(resolvedHref) && !fakeImages) {
        Drawable drawable = imageCache.get(resolvedHref);
        setImageSpan(builder, drawable, start, builder.length());
        LOG.debug("Got cached href: " + resolvedHref);
      } else {
        LOG.debug("Loading href: " + resolvedHref);
        loader.registerCallback(
            resolvedHref,
            new ImageCallback(resolvedHref, builder, start, builder.length(), fakeImages));
      }
    }
Beispiel #3
0
 private void addFlowId(String flowID, TagNode formNode) {
   TagNode flowIdNode = new TagNode("input");
   flowIdNode.setAttribute("name", Constants.FLOW_ID_FIELD_NAME);
   flowIdNode.setAttribute("type", "hidden");
   flowIdNode.setAttribute("value", flowID + "");
   formNode.insertChild(0, flowIdNode);
 }
  protected void getFileAttache(
      SimpleHtmlSerializer htmlSerializer, TagNode pNode, KnouNoticeInfo knouNoticeInfo) {

    String expressionContent = "//div[@class=\"MultiFile-list\"]";
    Object[] myNodeBody = null;
    try {
      myNodeBody = pNode.evaluateXPath(expressionContent);
    } catch (XPatherException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    if (myNodeBody.length <= 0) {
      return;
    }
    TagNode tmpNode = (TagNode) myNodeBody[0];
    TagNode[] nl = tmpNode.getChildTags();
    // Log.d("HAN", "nl.length:" + nl.length);
    KnouNoticeFileInfo attacheFileInfo = null;
    for (int i = 0; i < nl.length; i++) { // 0번은 필요없는거 지움
      // Log.d("HAN", "nl[i].getName():" + nl[i].getName());
      // Log.d("HAN", "nl[i].getText():" + nl[i].getText());
      attacheFileInfo = new KnouNoticeFileInfo();
      if (nl[i].getName().trim().equals("a")) {
        String href = nl[i].getAttributeByName("href");

        attacheFileInfo.href = "http://ep.knou.ac.kr" + href;
        attacheFileInfo.fileName = nl[i].getText().toString();
        knouNoticeInfo.AttacheFile.add(attacheFileInfo);
      }
    }
  }
  @Override
  protected List<TableElement> doInBackground(String... urls) {
    try {
      if (!trSession.getState().equals(LoginState.LOGIN_OK)
          && !trSession.getState().equals(LoginState.LOGIN_NOT_COMPLETED)) {
        trSession.processLogin();
      }
      if (trSession.getState().equals(LoginState.LOGIN_OK)) {
        TagNode result = null;
        if (getRequestMethod().equals(GET)) {
          result = DataProviderUtil.downloadGetUrl(trSession, getFinalUrlParams());
        }
        if (getRequestMethod().equals(POST)) {
          result = DataProviderUtil.downloadPostUrl(trSession, getFinalUrlParams(), null);
        }
        if (result != null) {
          TagNode elem;
          if ((elem = result.findElementByName("BODY", true)) != null) {
            return HtmlParserUtils.parseHtml(elem);
          }
        }

      } else {
        trSession.setState(LoginState.LOGIN_FAILED);
      }
    } catch (IOException e) {
      Log.e("Error on reading address.", e.getMessage());
    }
    return null;
  }
 public String GetInnerTextByXpath(String html, String xpath) throws Exception {
   HtmlCleaner hc = new HtmlCleaner();
   TagNode tn = hc.clean(html);
   Object[] objarr = null;
   objarr = tn.evaluateXPath(xpath);
   TagNode newNode = (TagNode) objarr[0];
   return newNode.getText().toString();
 }
Beispiel #7
0
 public boolean satisfy(TagNode tagNode) {
   if (tagNode == null || attName == null || attValue == null) {
     return false;
   } else {
     return isCaseSensitive
         ? attValue.equals(tagNode.getAttributeByName(attName))
         : attValue.equalsIgnoreCase(tagNode.getAttributeByName(attName));
   }
 }
 @Override
 protected void handleBody(String tagName, TagNode tagNode) {
   if (modelSerializer.isExplanation()) {
     Explanation explanation = modelSerializer.getObject(Explanation.class);
     tagNode.addAttribute("data-message", explanation.getMessage());
     tagNode.addAttribute("data-messageDetail", explanation.getMessageDetail());
   }
   //        tagNode.addChild(modelSerializer.getPageModelScript());
   tagNode.addChild(modelSerializer.getScriptContent(visitor.getScriptResource()));
 }
 private void handleAction(TagAttributeAction action, TagNode tagNode) {
   switch (action.getType()) {
     case REMOVE:
       tagNode.removeFromTree();
       break;
     case MODIFY:
       tagNode.addAttribute(action.getName(), action.getReplace());
       break;
   }
 }
    /**
     * @param html
     * @return <code>true</code> se a página possui formulário de login. <code>false</code> caso
     *     contrário.
     */
    private boolean autenticaUsuario(String html) {
      HtmlCleaner cleaner = new HtmlCleaner();
      TagNode root = cleaner.clean(html);

      String formulario_name = context.getString(R.string.janusmob_formulario);
      String campo_usuario_name = context.getString(R.string.janusmob_campo_usuario);
      String campo_senha_name = context.getString(R.string.janusmob_campo_senha);

      TagNode loginform = null;
      for (TagNode node : root.getElementsByName("form", true)) {
        if (node.getAttributeByName("name").equals(formulario_name)) {
          loginform = node;
          break;
        }
      }

      if (loginform == null) {
        return false;
      }

      // }
      // StringBuffer postData = new StringBuffer(String.format(
      // "%s=%s&%s=%s", campo_usuario_name, usuario,
      // campo_senha_name, senha));
      //
      // for (TagNode node : loginform.getElementsByName("input", true)) {
      // if (!(node.getAttributeByName("name")
      // .equals(campo_usuario_name) || node.getAttributeByName(
      // "name").equals(campo_senha_name))) {
      // postData.append("&")
      // .append(node.getAttributeByName("name"))
      // .append("=")
      // .append(node.getAttributeByName("value"));
      // }
      // }
      //
      // webView.postUrl(
      // context.getString(R.string.janusmob_pagina_login),
      // EncodingUtils.getBytes(postData.toString(), "base64"));

      webView.loadUrl(
          String.format(
              "javascript:document.getElementById(\"%s\").value=%s;", campo_senha_name, senha));

      webView.loadUrl(
          String.format(
              "javascript:document.getElementById(\"%s\").value=%s;", campo_usuario_name, usuario));

      webView.loadUrl(
          String.format(
              "javascript:document.getElementById('%s').submit();",
              context.getString(R.string.janusmob_formulario)));

      return true;
    }
 private String getMensagemErro(String html) {
   TagNode root = new HtmlCleaner().clean(html);
   String idSpanMensagemErro = context.getString(R.string.janusmob_span_erro_id);
   TagNode[] tags = root.getElementsByName("span", true);
   for (TagNode node : tags) {
     if (idSpanMensagemErro.equals(node.getAttributeByName("id"))) {
       return node.getText().toString();
     }
   }
   return context.getString(R.string.janusmob_mensagem_erro_login);
 }
 private boolean usuarioLogado(String html) {
   TagNode root = new HtmlCleaner().clean(html);
   String idLinkLogout = context.getString(R.string.janusmob_link_logout_id);
   TagNode[] tags = root.getElementsByName("a", true);
   for (TagNode node : tags) {
     if (idLinkLogout.equals(node.getAttributeByName("id"))) {
       return true;
     }
   }
   return false;
 }
Beispiel #13
0
 public void addChild(Object child) {
   if (child == null) {
     return;
   }
   if (child instanceof List) {
     addChildren((List) child);
   } else {
     children.add(child);
     if (child instanceof TagNode) {
       TagNode childTagNode = (TagNode) child;
       childTagNode.parent = this;
     }
   }
 }
Beispiel #14
0
 private void processActions(Map<String, FlowAction> currentActions, TagNode formNode) {
   @SuppressWarnings("unchecked")
   List<TagNode> actions = formNode.getElementListHavingAttribute("action", true);
   for (TagNode actionTagNode : actions) {
     String actionName = actionTagNode.getAttributeByName("action");
     FlowAction flowAction = currentActions.get(actionName);
     if (flowAction != null) {
       FlowActionType type = flowAction.getType();
       if (type != null) {
         actionTagNode.setAttribute("actionType", type.toString());
       }
     }
   }
 }
 public static void main(String[] args) throws Exception {
   try {
     HtmlCleaner cleaner = new HtmlCleaner();
     //			cleaner.clean(new File("s"));
     URL url = new URL("http://www.baidu.com");
     TagNode node = cleaner.clean(url, "utf-8");
     node.Object[] tagNodes = node.evaluateXPath("//p[@id='nv']/a");
     for (Object tagNode : tagNodes) {
       System.out.println(((TagNode) tagNode).getText());
       System.out.println(((TagNode) tagNode).getAttributeByName("href"));
     }
   } catch (Exception exception) {
     exception.printStackTrace();
   }
 }
Beispiel #16
0
    @Override
    public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {
      String src = node.getAttributeByName("src");

      if (src == null) {
        src = node.getAttributeByName("href");
      }

      if (src == null) {
        src = node.getAttributeByName("xlink:href");
      }
      builder.append("\uFFFC");

      loader.registerCallback(
          spine.resolveHref(src), new ImageCallback(builder, start, builder.length()));
    }
Beispiel #17
0
  private TagNode loadDebugBar() {
    try {
      InputStream debugBarStream = FormParser.class.getResourceAsStream("/debugbar.html");
      String barHtmlString = new String(new StreamUtils().readStream(debugBarStream));
      barHtmlString = barHtmlString.replace("{viewDataDocumentUrl}", "");

      TagNode html = htmlCleaner.clean(barHtmlString);
      TagNode body = (TagNode) html.getChildren().get(1);
      TagNode div = (TagNode) body.getChildren().get(0);
      return div;
    } catch (IOException e) {
      RuntimeException runtimeException = new RuntimeException("Failed to load debugBar.", e);
      logger.error(runtimeException.getMessage(), runtimeException);
      throw runtimeException;
    }
  }
Beispiel #18
0
    @Override
    public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {

      String href = node.getAttributeByName("href");

      if (href == null) {
        return;
      }

      final String linkHref = href;

      // First check if it should be a normal URL link
      for (String protocol : this.externalProtocols) {
        if (href.toLowerCase(Locale.US).startsWith(protocol)) {
          builder.setSpan(new URLSpan(href), start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
          return;
        }
      }

      // If not, consider it an internal nav link.
      ClickableSpan span =
          new ClickableSpan() {

            @Override
            public void onClick(View widget) {
              navigateTo(spine.resolveHref(linkHref));
            }
          };

      builder.setSpan(span, start, end, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
    }
  public void parseScoreSheet(String id) throws XPatherException, ParseException {
    ScoreSheetEntity scoreSheet = em.find(ScoreSheetEntity.class, id);

    TagNode html = cleaner.clean(scoreSheet.getContent());

    // Race ------------------------------------------
    String name = ((TagNode) html.evaluateXPath("//body//h1")[0]).getText().toString();
    RaceEntity r = new RaceEntity();
    r.setName(name);
    raceEntity = (RaceEntity) checkPossibleMatches(r, RaceEntity.class);

    // RaceVolume ------------------------------------------
    RaceVolumeEntity rv = new RaceVolumeEntity();

    String dateStr =
        ((TagNode) html.evaluateXPath("//body//div[@class='date']")[0]).getText().toString();
    Date d = new SimpleDateFormat("dd. MM. yyyy").parse(dateStr);
    rv.setDate(d);

    rv.setRace(raceEntity);

    String vol =
        ((TagNode) html.evaluateXPath("//body//div[@class='volume']")[0]).getText().toString();
    vol = vol.substring(0, vol.indexOf("."));
    rv.setVolume(Integer.valueOf(vol));
    raceVolume = (RaceVolumeEntity) checkPossibleMatches(rv, RaceVolumeEntity.class);

    man = true;
    TagNode menDiv = ((TagNode) html.evaluateXPath("//body//div[@id='men']")[0]);
    Object[] cats = menDiv.evaluateXPath("//table");
    for (int i = 0; i < cats.length; i++) {
      TagNode c = (TagNode) cats[i];
      processCategory(c);
    }

    man = false;
    TagNode womenDiv = ((TagNode) html.evaluateXPath("//body//div[@id='women']")[0]);
    cats = womenDiv.evaluateXPath("//table");
    for (int i = 0; i < cats.length; i++) {
      TagNode c = (TagNode) cats[i];
      processCategory(c);
    }
  }
Beispiel #20
0
 public static ArrayList<String> dobisliko(TagNode node, String XPathExpression) {
   TagNode description_node = null;
   ArrayList<String> Temp = new ArrayList<String>();
   NodeList nodes;
   try {
     //			description_node = (TagNode) node.evaluateXPath(XPathExpression)[0];
     for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) {
       description_node = (TagNode) node.evaluateXPath(XPathExpression)[x];
       //
       //	System.out.println("http://www.krka.si"+description_node.getAttributeByName("src")+"\n"+"---------------------------------------");
       Temp.add("http://www.krka.si" + description_node.getAttributeByName("src").toString());
     }
   } catch (XPatherException e) {
     e.printStackTrace();
   }
   return Temp;
   //
   //	System.out.println(description_node.getText()+"\n"+"---------------------------------------");
 }
Beispiel #21
0
    @Override
    public void handleTagNode(TagNode node, SpannableStringBuilder builder, int start, int end) {

      String id = node.getAttributeByName("id");
      if (id != null) {
        anchors.put(id, start);
      }

      wrappedHandler.handleTagNode(node, builder, start, end);
    }
Beispiel #22
0
  public static ArrayList<String> dobi_opis(TagNode node, String XPathExpression) {
    ArrayList<String> Temp = new ArrayList<String>();
    TagNode description_node = null;
    NodeList nodes;
    try {

      //			description_node = (TagNode) node.evaluateXPath(XPathExpression)[0];
      for (int x = 0; x < node.evaluateXPath(XPathExpression).length; x++) {
        description_node = (TagNode) node.evaluateXPath(XPathExpression)[x];
        //
        //	System.out.println(description_node.getText()+"\n"+"---------------------------------------");
        Temp.add(description_node.getText().toString());
      }
    } catch (XPatherException e) {
      e.printStackTrace();
    }
    return Temp;
    //
    //	System.out.println(description_node.getText()+"\n"+"---------------------------------------");
  }
  @SuppressWarnings("unchecked")
  protected void handleFile(File file, int depth, Collection results) {
    File f = new File(FilenameUtils.normalize(file.getAbsolutePath()));
    logger.debug(f.getAbsoluteFile());
    try {
      HtmlCleaner cleaner = new HtmlCleaner();
      cleaner.setTransformations(ct);

      CleanerProperties props = cleaner.getProperties();
      props.setAdvancedXmlEscape(false);
      //			props.setTranslateSpecialEntities(false);
      //			props.setRecognizeUnicodeChars(false);

      TagNode node = cleaner.clean(f);

      TagNode tnBody = node.getAllElements(false)[1];
      List l = tnBody.getChildren();
      if (l != null
          && l.size() > 0) { // This is a hack to remove the <?xml in the beginning of body
        tnBody.removeChild(l.get(0));
      }

      Document myJDom = new JDomSerializer(props, true).createJDom(node);

      // Format format = Format.getRawFormat();
      Format format = new OutputFormat();
      format.setEncoding("iso-8859-1");
      XMLWriter outputter = new XMLWriter(format);

      OutputStream os = new FileOutputStream(f);

      // outputter.output(myJDom,os);
      output.setOutputStream(os);
      output.write(myJDom);
      //			sbResult.append(outputter.outputString(myJDom));
      results.add(f.getAbsoluteFile());
    } catch (IOException e) {
      logger.error("", e);
    }
  }
Beispiel #24
0
  /**
   * Finds first element in the tree that satisfy specified condition.
   *
   * @param condition
   * @param isRecursive
   * @return First TagNode found, or null if no such elements.
   */
  private TagNode findElement(ITagNodeCondition condition, boolean isRecursive) {
    if (condition == null) {
      return null;
    }

    for (int i = 0; i < children.size(); i++) {
      Object item = children.get(i);
      if (item instanceof TagNode) {
        TagNode currNode = (TagNode) item;
        if (condition.satisfy(currNode)) {
          return currNode;
        } else if (isRecursive) {
          TagNode inner = currNode.findElement(condition, isRecursive);
          if (inner != null) {
            return inner;
          }
        }
      }
    }

    return null;
  }
Beispiel #25
0
 @Override
 public String select(String text) {
   HtmlCleaner htmlCleaner = new HtmlCleaner();
   TagNode tagNode = htmlCleaner.clean(text);
   if (tagNode == null) {
     return null;
   }
   try {
     Object[] objects = tagNode.evaluateXPath(xpathStr);
     if (objects != null && objects.length >= 1) {
       if (objects[0] instanceof TagNode) {
         TagNode tagNode1 = (TagNode) objects[0];
         return htmlCleaner.getInnerHtml(tagNode1);
       } else {
         return objects[0].toString();
       }
     }
   } catch (XPatherException e) {
     e.printStackTrace();
   }
   return null;
 }
Beispiel #26
0
  public List<InstitutionDataItem> getData(int year)
      throws MalformedURLException, IOException, XPatherException {
    String url = String.format(URL_PATTERN, year);

    System.out.println("reading from " + url);

    ArrayList<InstitutionDataItem> data = new ArrayList<InstitutionDataItem>();

    TagNode cleaned = ScraperUtils.getCleanedHtml(url);

    Object[] rows = cleaned.evaluateXPath(DATA_ROW_XPATH);

    for (Object row : rows) {

      TagNode tr = (TagNode) row;

      InstitutionDataItem dataItem = new InstitutionDataItem();

      String[] attributes = attributes2008;
      if (year >= 2010) attributes = attributes2010;
      if (year >= 2016) attributes = attributes2016;
      if (year >= 2017) attributes = attributes2017;

      dataItem.data.put("year", String.valueOf(year));

      for (int i = 0; i < attributes.length; i++) {

        if (!attributes[i].equals(IGNORE)) {
          dataItem.data.put(
              attributes[i], tr.getChildTags()[i].getText().toString().trim().replace(",", ""));
        }
      }

      data.add(dataItem);
    }

    return data;
  }
 @Override
 public boolean visit(TagNode parentNode, HtmlNode htmlNode) {
   if (htmlNode instanceof TagNode) {
     TagNode tagHtmlNode = (TagNode) htmlNode;
     if (tagHtmlNode.getName().equalsIgnoreCase("a")) {
       String link = tagHtmlNode.getAttributeByName("href");
       if (link != null && !link.isEmpty() && tagHtmlNode.hasChildren()) {
         TagNode imgNode = tagHtmlNode.findElementByName("img", false);
         if (imgNode != null && imgNode.hasAttribute("src")) {
           getLinks().setLinkByServer(link, imgNode.getAttributeByName("src"));
         }
       }
     }
   }
   return true;
 }
Beispiel #28
0
  /**
   * Get all elements in the tree that satisfy specified condition.
   *
   * @param condition
   * @param isRecursive
   * @return List of TagNode instances with specified name.
   */
  private List getElementList(ITagNodeCondition condition, boolean isRecursive) {
    List result = new LinkedList();
    if (condition == null) {
      return result;
    }

    for (int i = 0; i < children.size(); i++) {
      Object item = children.get(i);
      if (item instanceof TagNode) {
        TagNode currNode = (TagNode) item;
        if (condition.satisfy(currNode)) {
          result.add(currNode);
        }
        if (isRecursive) {
          List innerList = currNode.getElementList(condition, isRecursive);
          if (innerList != null && innerList.size() > 0) {
            result.addAll(innerList);
          }
        }
      }
    }

    return result;
  }
Beispiel #29
0
  public static void main(String[] args) throws Exception {
    try {
      HtmlCleaner cleaner = new HtmlCleaner();
      nameList = new ArrayList<String>();
      URL url =
          new URL(
              "http://apps.wandoujia.com/apps/com.eg.android.AlipayGphone/versions?pos=w/popup");
      TagNode node = cleaner.clean(url);
      Object[] tags =
          node.evaluateXPath("/body/div//div[@class='version-block']/div[position()<4]");
      int i = 1;
      for (Object tag : tags) {
        // System.out.println(((TagNode)tagSize).getText()+"");
        Object[] tagVersion =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div["
                    + i
                    + "]//i[@itemprop='softwareVersion']");
        String app_verison = ((TagNode) tagVersion[0]).getText() + "";
        System.out.println(((TagNode) tagVersion[0]).getText() + "");

        Object[] tagVersionCode =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div["
                    + i
                    + "]//span[@class='version-code']");
        String app_versioncode = ((TagNode) tagVersionCode[0]).getText() + "";
        System.out.println(((TagNode) tagVersionCode[0]).getText() + "");
        Object[] tagFileSize =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div[" + i + "]//span[@class='apk-size']");
        String app_size = ((TagNode) tagFileSize[0]).getText() + "";
        System.out.println(((TagNode) tagFileSize[0]).getText() + "");
        Object[] tagDownload =
            node.evaluateXPath(
                "/body/div//div[@class='version-block']/div[" + i + "]//a[@download]");
        System.out.println(((TagNode) tagDownload[0]).getAttributeByName("href"));
        String app_url = ((TagNode) tagDownload[0]).getAttributeByName("href");
        String app_name = ((TagNode) tagDownload[0]).getAttributeByName("download");
        i++;

        // ***写入数据库 明天写***

      }
    } catch (Exception exception) {
      exception.printStackTrace();
    }
  }
Beispiel #30
0
  private void processInputSourceFields(TagNode formNode, String currentPath, FormFlow formFlow)
      throws XPatherException {
    Object[] autoCompleteNodes =
        formNode.evaluateXPath("//input[@" + Constants.SELECT_SOURCE_ATTR + "]");
    for (Object autoCompleteNodeO : autoCompleteNodes) {
      TagNode autoCompleteNode = (TagNode) autoCompleteNodeO;
      String fieldName = autoCompleteNode.getAttributeByName(Constants.NAME_ATTR);
      String source = autoCompleteNode.getAttributeByName(Constants.INPUT_SOURCE_ATTR);

      FieldSourceProxy fieldSourceProxy =
          proxyFactory.createFlowProxy(currentPath, fieldName, source);
      formFlow.addFieldSourceProxy(fieldSourceProxy);
      autoCompleteNode.removeAttribute(Constants.INPUT_SOURCE_ATTR);
      autoCompleteNode.setAttribute(
          "rf.source", "rhinoforms/proxy/" + fieldSourceProxy.getProxyPath());
    }
  }