Esempio n. 1
0
 private Elements parseContains(String query) {
   String value = getValue(query, CONTAINS_TAG);
   if (value.contains(AND_PLACEHOLDER) && !isUniqueValue(value, AND_PLACEHOLDER)) {
     throw new IllegalArgumentException(
         "Argument selector part: " + query + " is illegal, #and tag can only exist one");
   }
   if (value.contains(OR_PLACEHOLDER) && !isUniqueValue(value, OR_PLACEHOLDER)) {
     throw new IllegalArgumentException(
         "Argument selector part: " + query + " is illegal, #or tag can only exist one");
   }
   boolean existsAndTag = value.contains(AND_PLACEHOLDER);
   boolean existsOrTag = value.contains(OR_PLACEHOLDER);
   if (existsAndTag && existsOrTag) {
     throw new IllegalArgumentException(
         "Argument selector part: " + query + " is illegal, #and and #or tag can only exist one");
   }
   String tag = null;
   if (existsAndTag) {
     tag = AND_PLACEHOLDER;
   } else {
     tag = OR_PLACEHOLDER;
   }
   String[] values = value.split(tag);
   Elements result = new Elements();
   for (Element ele : elements) {
     for (String v : values) {
       String ownText = ele.ownText();
       if (ownText.contains(v)) {
         result.add(ele);
       }
     }
   }
   return result;
 }
Esempio n. 2
0
 private static void accumulateParents(Element el, Elements parents) {
   Element parent = el.parent();
   if (parent != null && !parent.tagName().equals("#root")) {
     parents.add(parent);
     accumulateParents(parent, parents);
   }
 }
Esempio n. 3
0
  /**
   * Get sibling elements. If the element has no sibling elements, returns an empty list. An element
   * is not a sibling of itself, so will not be included in the returned list.
   *
   * @return sibling elements
   */
  public Elements siblingElements() {
    if (parentNode == null) return new Elements(0);

    List<Element> elements = parent().children();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el : elements) if (el != this) siblings.add(el);
    return siblings;
  }
Esempio n. 4
0
 private Elements parseLast(String query) {
   if (!LAST_TAG.equals(query)) {
     throw new IllegalArgumentException("Argument selector part: " + query + " is illegal");
   } else {
     Elements eles = new Elements();
     eles.add(elements.last());
     return eles;
   }
 }
Esempio n. 5
0
 public void getKATorrents() {
   CloseableHttpClient httpClient = HttpClientBuilder.create().build();
   CloseableHttpResponse response = null;
   try {
     HttpGet httpGet = new HttpGet("https://kat.cr/tv/?field=time_add&sorder=desc");
     httpGet.addHeader(
         "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     httpGet.addHeader("accept-encoding", "gzip, deflate, sdch");
     httpGet.addHeader("accept-language", "en-US,en;q=0.8");
     httpGet.addHeader("dnt", "1");
     httpGet.addHeader("upgrade-insecure-requests", "1");
     httpGet.addHeader(
         "user-agent",
         "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
     response = httpClient.execute(httpGet);
     Header contentType = response.getFirstHeader("Content-Type");
     HttpEntity httpEntity = response.getEntity();
     String[] contentArray = contentType.getValue().split(";");
     String charset = "UTF-8";
     if (contentArray.length > 1 && contentArray[1].contains("=")) {
       charset = contentArray[1].trim().split("=")[1];
     }
     Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath());
     Elements oddResults = pageDoc.getElementsByClass("odd");
     Elements evenResults = pageDoc.getElementsByClass("even");
     Elements allshows = new Elements();
     for (int i = 0; i < evenResults.size(); i++) {
       allshows.add(oddResults.get(i));
       allshows.add(evenResults.get(i));
     }
     allshows.add(oddResults.last());
     response.close();
     KATToTvShowEpisode kat = new KATToTvShowEpisode();
     List<TvShowEpisode> theShows = kat.makeKATBeans(allshows);
     DBActions.insertTvEpisodes(theShows, "https://kat.cr/tv/?field=time_add&sorder=desc");
   } catch (MalformedURLException MURLe) {
     MURLe.printStackTrace();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
Esempio n. 6
0
  /**
   * @param sspHandler
   * @param elementHandler
   * @param elementHandlerWithoutDataTableMarkup
   */
  private void extractTableWithDataTableMarkup(
      ElementHandler<Element> elementHandler,
      ElementHandler<Element> elementHandlerWithoutDataTableMarkup) {

    Elements elementsWithMarkup = new Elements();

    for (Element el : elementHandler.get()) {
      if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
        elementsWithMarkup.add(el);
      } else if (elementHandlerWithoutDataTableMarkup != null) {
        elementHandlerWithoutDataTableMarkup.add(el);
      }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
  }
Esempio n. 7
0
 private Elements parseNextElement(String query) {
   if (!NEXT_ELEMENT_TAG.equals(query)) {
     throw new IllegalArgumentException("Argument selector part: " + query + " is illegal");
   } else {
     Elements eles = new Elements();
     if (elements.size() == 1) {
       Element element = elements.first().nextElementSibling();
       if (element == null) {
         return eles;
       }
       eles.add(element);
     } else {
       eles = elements;
     }
     return eles;
   }
 }
Esempio n. 8
0
 private Elements parseAttr(String query) {
   String value = getValue(query, ATTR_TAG);
   if (!value.contains("=")) {
     throw new IllegalArgumentException("Argument selector part: " + query + " is illegal");
   }
   if (value.indexOf("=") != value.lastIndexOf("=")) {
     throw new IllegalArgumentException("Argument selector part: " + query + " is illegal");
   }
   String[] map = value.split("=");
   Elements eles = new Elements();
   for (Element element : elements) {
     if (element.hasAttr(map[0]) && element.attr(map[0]).equals(map[1])) {
       eles.add(element);
     }
   }
   return eles;
 }
Esempio n. 9
0
 private Elements parseNextNode(String query) {
   if (!NEXT_NODE_TAG.equals(query)) {
     throw new IllegalArgumentException("Argument selector part: " + query + " is illegal");
   } else {
     Elements eles = new Elements();
     if (elements.size() == 1) {
       Attributes attributes = new Attributes();
       Node nextNode = elements.first().nextSibling();
       if (nextNode == null) {
         return eles;
       }
       attributes.put("value", nextNode.toString());
       eles.add(new Element(Tag.valueOf("nextnode"), "", attributes));
     } else {
       eles = elements;
     }
     return eles;
   }
 }
Esempio n. 10
0
  /**
   * This method creates the lines by reading trough the file.
   *
   * @param charLengthThreshold This is the average characterLength as calculated in the Page class.
   */
  public void createLines(double charLengthThreshold) {
    String pos;
    String[] positions;
    int lastX2 = 0;
    int lastY2 = 0;
    Elements currentLine = new Elements();

    int lineNumber = 0;

    for (Element span : spans) {
      pos = span.attr("title");
      positions = pos.split("\\s+");
      int x1 = Integer.parseInt(positions[1]);
      int y1 = Integer.parseInt(positions[2]);
      int x2 = Integer.parseInt(positions[3]);
      int y2 = Integer.parseInt(positions[4]);

      // This is where the modifier can be placed for the 1,2,3 parameter as described in the
      // version test:
      if (((x1 <= lastX2)
              || y1 > lastY2
              || CommonMethods.calcDistance(lastY2, y1)
                  > (averageLineDistance * verticalThresholdModifier))
          && spans.indexOf(span) != 0) {
        Line line = new Line(currentLine, charLengthThreshold, horizontalThresholdModifier);
        //                System.out.println(line);
        line.setLineNumber(lineNumber);
        table.add(line);
        currentLine = new Elements();
        lineNumber += 1;
      }
      lastX2 = x2;
      lastY2 = y2;
      currentLine.add(span);
    }
    if (currentLine.size() > 4) { // For in case the last line is part of the table
      Line line = new Line(currentLine, charLengthThreshold, horizontalThresholdModifier);
      table.add(line);
    }
  }