public static boolean getFormFields( ResponseWrapper rw, List<NameValuePairString> hiddenFormFields, String formSelector) { // --- analisi della pagina contente la form, specifica al sito Document doc = rw.getJSoupDocument(); Elements els = doc.select(formSelector); // per debug, dovrebbe essere uo if (els == null || els.size() <= 0) { log.error("unable to find form at selector: " + formSelector); System.exit(1); return false; } Element loginForm = els.get(0); if (loginForm == null) { log.error("failed to get form to analyze at: " + rw.dump()); System.exit(1); } // log.info("login form OUTER HTML\n" + loginForm.outerHtml()); Elements inputFields = loginForm.select("input"); // display all for (Element e : inputFields) { String type = e.attr("type"); if (type.equals("submit")) { continue; } String attrName = e.attr("name"); hiddenFormFields.add(new NameValuePairString(attrName, e.val())); log.debug("captured form input: " + attrName + " = " + e.val()); } return false; }
public static boolean isSubPathOf(String sub, String path, boolean acceptEqual) { if (!(isPathCorrect(sub) && isPathCorrect(path))) { log.error("exiting invalid path(s): " + sub + " altro " + path); isPathCorrect(path); isPathCorrect(sub); System.exit(1); return false; } if (path.endsWith("/") && path.length() > 1) path = path.substring(0, path.length() - 1); if (sub.endsWith("/") && sub.length() > 1) sub = sub.substring(0, sub.length() - 1); // da qui path corretti ed eventualmente normalizzati if (acceptEqual) { if (sub.equals(path)) return true; } if (sub.length() <= path.length()) return false; String[] subElems = sub.split("/"); String[] pathElems = path.split("/"); for (int i = 0; i < subElems.length && i < pathElems.length; i++) { if (!subElems[i].equals(pathElems[i])) return false; } return true; }
public static boolean isSubDomainOf(String sub, String dom, boolean acceptEqual) { if (!isDomainCorrect(sub) || !isDomainCorrect(dom)) { log.error("exiting invalid domain(s): " + dom + " altro " + sub); System.exit(1); } if (acceptEqual && sub.equals(dom)) return true; if (!sub.endsWith(dom)) return false; boolean ret = sub.matches(subDomainLeftRegex + dom); return ret; }
public static Result textSelList(Element elsPar, ArrayList<String> jsoupSelectors) { Result res = new Result(); Elements els; Elements elsTemp; els = elsPar.select(jsoupSelectors.remove(0)); for (String sel : jsoupSelectors) { elsTemp = els.select(sel); if (elsTemp != null) els = elsTemp; } if (els == null || els.size() != 1) { log.error("jsoup selector on elements does not match 1"); System.exit(1); return res.setContinua(false); } return res.setRetStr(els.get(0).text()); }
public static boolean isRelativeURL(String url) { boolean ret = false; if (!WEBUtils.isCorrectURL(url)) { if (!WEBUtils.isCorrectURL("http://google.com" + url)) { // rifiuta URL relative come /a log.error("esco, not a correct URL: " + url); System.exit(1); } } if (url.contains("http:") || url.contains("https:")) { return false; } // probably wrong // check if it contains an hostname return !url.matches("[\\w]+\\.?[\\w]+/[^/].*"); }