private Elements parseContains(String query) { String value = getValue(query, CONTAINS_TAG); if (value.contains(AND_PLACEHOLDER) && !isUniqueValue(value, AND_PLACEHOLDER)) { throw new IllegalArgumentException( "Argument selector part: " + query + " is illegal, #and tag can only exist one"); } if (value.contains(OR_PLACEHOLDER) && !isUniqueValue(value, OR_PLACEHOLDER)) { throw new IllegalArgumentException( "Argument selector part: " + query + " is illegal, #or tag can only exist one"); } boolean existsAndTag = value.contains(AND_PLACEHOLDER); boolean existsOrTag = value.contains(OR_PLACEHOLDER); if (existsAndTag && existsOrTag) { throw new IllegalArgumentException( "Argument selector part: " + query + " is illegal, #and and #or tag can only exist one"); } String tag = null; if (existsAndTag) { tag = AND_PLACEHOLDER; } else { tag = OR_PLACEHOLDER; } String[] values = value.split(tag); Elements result = new Elements(); for (Element ele : elements) { for (String v : values) { String ownText = ele.ownText(); if (ownText.contains(v)) { result.add(ele); } } } return result; }
private static void accumulateParents(Element el, Elements parents) { Element parent = el.parent(); if (parent != null && !parent.tagName().equals("#root")) { parents.add(parent); accumulateParents(parent, parents); } }
/** * Get sibling elements. If the element has no sibling elements, returns an empty list. An element * is not a sibling of itself, so will not be included in the returned list. * * @return sibling elements */ public Elements siblingElements() { if (parentNode == null) return new Elements(0); List<Element> elements = parent().children(); Elements siblings = new Elements(elements.size() - 1); for (Element el : elements) if (el != this) siblings.add(el); return siblings; }
private Elements parseLast(String query) { if (!LAST_TAG.equals(query)) { throw new IllegalArgumentException("Argument selector part: " + query + " is illegal"); } else { Elements eles = new Elements(); eles.add(elements.last()); return eles; } }
public void getKATorrents() { CloseableHttpClient httpClient = HttpClientBuilder.create().build(); CloseableHttpResponse response = null; try { HttpGet httpGet = new HttpGet("https://kat.cr/tv/?field=time_add&sorder=desc"); httpGet.addHeader( "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); httpGet.addHeader("accept-encoding", "gzip, deflate, sdch"); httpGet.addHeader("accept-language", "en-US,en;q=0.8"); httpGet.addHeader("dnt", "1"); httpGet.addHeader("upgrade-insecure-requests", "1"); httpGet.addHeader( "user-agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"); response = httpClient.execute(httpGet); Header contentType = response.getFirstHeader("Content-Type"); HttpEntity httpEntity = response.getEntity(); String[] contentArray = contentType.getValue().split(";"); String charset = "UTF-8"; if (contentArray.length > 1 && contentArray[1].contains("=")) { charset = contentArray[1].trim().split("=")[1]; } Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath()); Elements oddResults = pageDoc.getElementsByClass("odd"); Elements evenResults = pageDoc.getElementsByClass("even"); Elements allshows = new Elements(); for (int i = 0; i < evenResults.size(); i++) { allshows.add(oddResults.get(i)); allshows.add(evenResults.get(i)); } allshows.add(oddResults.last()); response.close(); KATToTvShowEpisode kat = new KATToTvShowEpisode(); List<TvShowEpisode> theShows = kat.makeKATBeans(allshows); DBActions.insertTvEpisodes(theShows, "https://kat.cr/tv/?field=time_add&sorder=desc"); } catch (MalformedURLException MURLe) { MURLe.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } }
/** * @param sspHandler * @param elementHandler * @param elementHandlerWithoutDataTableMarkup */ private void extractTableWithDataTableMarkup( ElementHandler<Element> elementHandler, ElementHandler<Element> elementHandlerWithoutDataTableMarkup) { Elements elementsWithMarkup = new Elements(); for (Element el : elementHandler.get()) { if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) { elementsWithMarkup.add(el); } else if (elementHandlerWithoutDataTableMarkup != null) { elementHandlerWithoutDataTableMarkup.add(el); } } elementHandler.clean().addAll(elementsWithMarkup); }
private Elements parseNextElement(String query) { if (!NEXT_ELEMENT_TAG.equals(query)) { throw new IllegalArgumentException("Argument selector part: " + query + " is illegal"); } else { Elements eles = new Elements(); if (elements.size() == 1) { Element element = elements.first().nextElementSibling(); if (element == null) { return eles; } eles.add(element); } else { eles = elements; } return eles; } }
private Elements parseAttr(String query) { String value = getValue(query, ATTR_TAG); if (!value.contains("=")) { throw new IllegalArgumentException("Argument selector part: " + query + " is illegal"); } if (value.indexOf("=") != value.lastIndexOf("=")) { throw new IllegalArgumentException("Argument selector part: " + query + " is illegal"); } String[] map = value.split("="); Elements eles = new Elements(); for (Element element : elements) { if (element.hasAttr(map[0]) && element.attr(map[0]).equals(map[1])) { eles.add(element); } } return eles; }
private Elements parseNextNode(String query) { if (!NEXT_NODE_TAG.equals(query)) { throw new IllegalArgumentException("Argument selector part: " + query + " is illegal"); } else { Elements eles = new Elements(); if (elements.size() == 1) { Attributes attributes = new Attributes(); Node nextNode = elements.first().nextSibling(); if (nextNode == null) { return eles; } attributes.put("value", nextNode.toString()); eles.add(new Element(Tag.valueOf("nextnode"), "", attributes)); } else { eles = elements; } return eles; } }
/** * This method creates the lines by reading trough the file. * * @param charLengthThreshold This is the average characterLength as calculated in the Page class. */ public void createLines(double charLengthThreshold) { String pos; String[] positions; int lastX2 = 0; int lastY2 = 0; Elements currentLine = new Elements(); int lineNumber = 0; for (Element span : spans) { pos = span.attr("title"); positions = pos.split("\\s+"); int x1 = Integer.parseInt(positions[1]); int y1 = Integer.parseInt(positions[2]); int x2 = Integer.parseInt(positions[3]); int y2 = Integer.parseInt(positions[4]); // This is where the modifier can be placed for the 1,2,3 parameter as described in the // version test: if (((x1 <= lastX2) || y1 > lastY2 || CommonMethods.calcDistance(lastY2, y1) > (averageLineDistance * verticalThresholdModifier)) && spans.indexOf(span) != 0) { Line line = new Line(currentLine, charLengthThreshold, horizontalThresholdModifier); // System.out.println(line); line.setLineNumber(lineNumber); table.add(line); currentLine = new Elements(); lineNumber += 1; } lastX2 = x2; lastY2 = y2; currentLine.add(span); } if (currentLine.size() > 4) { // For in case the last line is part of the table Line line = new Line(currentLine, charLengthThreshold, horizontalThresholdModifier); table.add(line); } }