Java SHelper Examples

Programming Language: Java

Class/Type: SHelper

Examples at hotexamples.com: 3

Java SHelper - 3 examples found. These are the top rated real world Java examples of SHelper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

countLetters(1)

encodingCleanup(1)

innerTrim(1)

Example #1

Show file

File: Converter.java Project: didagu/clickngo

  /**
   * This method detects the charset even if the first call only returns some bytes. It will read
   * until 4K bytes are reached and then try to determine the encoding
   *
   * @throws IOException
   */
  protected String detectCharset(
      String key, ByteArrayOutputStream bos, BufferedInputStream in, String enc)
      throws IOException {

    // Grab better encoding from stream
    byte[] arr = new byte[K2];
    int nSum = 0;
    while (nSum < K2) {
      int n = in.read(arr);
      if (n < 0) break;

      nSum += n;
      bos.write(arr, 0, n);
    }

    String str = bos.toString(enc);
    int encIndex = str.indexOf(key);
    int clength = key.length();
    if (encIndex > 0) {
      char startChar = str.charAt(encIndex + clength);
      int lastEncIndex;
      if (startChar == '\'')
        // if we have charset='something'
        lastEncIndex = str.indexOf("'", ++encIndex + clength);
      else if (startChar == '\"')
        // if we have charset="something"
        lastEncIndex = str.indexOf("\"", ++encIndex + clength);
      else {
        // if we have "text/html; charset=utf-8"
        int first = str.indexOf("\"", encIndex + clength);
        if (first < 0) first = Integer.MAX_VALUE;

        // or "text/html; charset=utf-8 "
        int sec = str.indexOf(" ", encIndex + clength);
        if (sec < 0) sec = Integer.MAX_VALUE;
        lastEncIndex = Math.min(first, sec);

        // or "text/html; charset=utf-8 '
        int third = str.indexOf("'", encIndex + clength);
        if (third > 0) lastEncIndex = Math.min(lastEncIndex, third);
      }

      // re-read byte array with different encoding
      // assume that the encoding string cannot be greater than 40 chars
      if (lastEncIndex > encIndex + clength && lastEncIndex < encIndex + clength + 40) {
        String tmpEnc = SHelper.encodingCleanup(str.substring(encIndex + clength, lastEncIndex));
        try {
          in.reset();
          bos.reset();
          return tmpEnc;
        } catch (IOException ex) {
          Log.e(
              Constants.TAG,
              "Couldn't reset stream to re-read with new encoding " + tmpEnc + " " + ex.toString());
        }
      }
    }
    return null;
  }

Example #2

Show file

File: OutputFormatter.java Project: soyoungboy/lightning-browser-gsy

  /** takes an element and turns the P tags into \n\n */
  public String getFormattedText(Element topNode) {
    removeNodesWithNegativeScores(topNode);
    StringBuilder sb = new StringBuilder();
    append(topNode, sb, nodesToKeepCssSelector);
    String str = SHelper.innerTrim(sb.toString());
    if (str.length() > 100) return str;

    // no subelements
    if (str.isEmpty() || !topNode.text().isEmpty() && str.length() <= topNode.ownText().length())
      str = topNode.text();

    // if jsoup failed to parse the whole html now parse this smaller
    // snippet again to avoid html tags disturbing our text:
    return Jsoup.parse(str).text();
  }

Example #3

Show file

File: OutputFormatter.java Project: soyoungboy/lightning-browser-gsy

  protected void append(Element node, StringBuilder sb, String tagName) {
    // is select more costly then getElementsByTag?
    MAIN:
    for (Element e : node.select(tagName)) {
      Element tmpEl = e;
      // check all elements until 'node'
      while (tmpEl != null && !tmpEl.equals(node)) {
        if (unlikely(tmpEl)) continue MAIN;
        tmpEl = tmpEl.parent();
      }

      String text = node2Text(e);
      if (text.isEmpty()
          || text.length() < minParagraphText
          || text.length() > SHelper.countLetters(text) * 2) continue;

      sb.append(text);
      sb.append("\n\n");
    }
  }