示例#1
0
 @Override
 public Collection<News> crawl() {
   HashSet<News> news = new HashSet<>();
   try {
     String startURL = Settings.HOMEPAGE;
     Document doc = Jsoup.connect(startURL).get();
     Elements contents = doc.select("article"); // extract all articles out of src
     long counter = 1;
     for (Element content : contents) { // getting content for all article
       Elements articleLink = content.select("a.teaser__link");
       Element img = articleLink.select("img").first();
       String imageSrc = null;
       try { // try to clean image src
         imageSrc = img.attr("data-srcset");
         imageSrc = imageSrc.split(",")[0].split(" ")[0];
       } catch (Exception e) {
       }
       String title = articleLink.select("div.title__catchline").text();
       String undertitle = articleLink.select("div.title__name").text();
       String link = articleLink.select("[href]").attr("href");
       news.add(new News(counter, title, undertitle, link, imageSrc, "DE"));
       counter++;
     }
   } catch (Exception ex) {
     System.out.println("Website not parsed!!");
     return null;
   }
   return news;
 }
示例#2
0
  private void buildResultList(Elements productInfo) {
    ParsedToken product = new ParsedToken();
    product.setText(productInfo.select("strong.title").tagName("em").text());
    product.setSalesamount(
        Integer.parseInt(
            productInfo.select("span.condition").tagName("em").text().replaceAll("[^0-9]", "")));

    this.productinfoList.add(product);
  }
  @Override
  public void initialize(URL location, ResourceBundle resources) {
    // TODO 自動生成されたメソッド・スタブ
    addOption.setOnAction(
        event -> {
          for (int i = 0; i < cmb.size(); i++) {
            //				System.out.println(cmb.get(i).getValue());
            if (cmb.get(i).getValue() == null) {
              break;
            } else if (i == cmb.size() - 1) {
              addTask.setDisable(false);
            }
          }
        });

    addTask.setOnAction(
        event -> {
          try {
            String url = webView.getEngine().getLocation();
            System.out.println(url);
            Document document = Jsoup.connect(url).get();

            Elements input = document.select("input");

            Map params = new HashMap<String, String>();
            for (ComboBox cmbx : cmb) {
              ValuePair vp = (ValuePair) cmbx.getValue();
              params.put(vp.getName(), vp.getvalue());
            }
            //				System.out.println(input.select("[name=shop_bid]").first());
            //				System.out.println(input.select("[name=shop_bid]").first().val());
            params.put("shop_bid", input.select("[name=shop_bid]").first().val());
            params.put("item_id", input.select("[name=item_id]").first().val());
            params.put("__event", input.select("[name=__event]").first().val());
            params.put("units", "1");

            Map map = new HashMap<String, Long>();

            //				System.out.println(document.select("#stime").size());
            if (document.select("#stime").size() != 0) {
              System.out.println(document.select("#stime"));
              map.put("stime", Long.parseLong((input.select("#stime").first().val())));
              map.put("etime", Long.parseLong((input.select("#etime").first().val())));
            } else {
              map = null;
            }

            BuyTask task = new BuyTask(url, params, map);
            task.call();
            this.getScene().getWindow().hide();
          } catch (Exception e) {
            // TODO 自動生成された catch ブロック
            e.printStackTrace();
          }
        });
  }
 @Override
 public String getPostSectionString(PostExtractionDetails postExtractionDetails, Document doc) {
   StringBuilder wikiPostContentSB = new StringBuilder();
   Elements contentElements = doc.select("div.mw-content-ltr");
   contentElements.select("div.noprint").remove();
   contentElements.select("div#stub").remove();
   for (Element contentElement : contentElements) {
     convertImagesToLocal(contentElement);
   }
   wikiPostContentSB.append(contentElements.outerHtml());
   return wikiPostContentSB.toString();
 }
  /**
   * Parse search results from a search result site
   *
   * @param pUrl
   */
  private void parseSearchResults(String pUrl) {
    LOGGER.info("Started parsing: " + pUrl);
    Document doc = null;

    doc = ParserUtils.connectGetUrl(ParserUtils.getUri(pUrl).toASCIIString());
    doc.setBaseUri(DEFAULT_VSP_URL);
    Elements results = doc.select("div[class*=map-list-item]");
    for (Element result : results) {
      PersistentEntity ent = new PersistentEntity();
      Elements infoElement = result.select("div[class*=info-content]");
      LOGGER.debug(infoElement.select("p[class*=establishment-category]").first().ownText());
      String tmp =
          result
              .select("div[class*=info-content]")
              .select("p[class*=establishment-category]")
              .first()
              .ownText();

      ent.setIndustry(new Utf8(tmp.split("/")[0]));
      ent.setLabel(new Utf8(tmp));
      // getting same as value to where it is
      EylloLink link =
          ParserUtils.detectUrl(
              infoElement.select("p[class*=establishment-name]").select("a").first());
      if (link != null) {
        LOGGER.debug(DEFAULT_VSP_URL + link.getLinkHref());
        ent.putToSameAs(
            new Utf8(DEFAULT_VSP_URL + link.getLinkHref()), new Utf8(link.getLinkText()));
        ent.setName(new Utf8(link.getLinkText()));
      }
      // getting its address and phone
      PersistentPoint point = new PersistentPoint();
      infoElement = result.select("div[class*=establishment-details]").select("p");
      ent.addToTelephones(new Utf8(infoElement.get(0).ownText()));
      point.setAddress(new Utf8(infoElement.get(0).text()));
      if (!result.attr("data-lng").toString().equals("")
          && !result.attr("data-lat").toString().equals("")) {
        // Format in [lon, lat], note, the order of lon/lat here in order to conform with GeoJSON.
        point.addToCoordinates(Double.parseDouble(result.attr("data-lng")));
        point.addToCoordinates(Double.parseDouble(result.attr("data-lat")));
        point.setAccuracy(EylloLocation.GEOCODER_VERIF_ACC_HIGH);
      }
      ent.setPersistentpoint(point);
      ent.addToScenarioId(getScenarioId());

      this.pEntities.add(ent);
    }
    LOGGER.info("Completed getting basic information from entities.");
  }
示例#6
0
  /*
   * Getting news from "http://enib.net/"
   */
  public List<News> getNews() {
    Document doc = null;
    try {
      doc = Jsoup.connect("http://enib.net/").get();
    } catch (IOException e) {
      System.out.println("Can't load news");
      e.printStackTrace();
    }

    /*
     * Getting name, information, description and add it to the news List
     */
    Elements getter = doc.getElementsByClass("news");
    for (Element get : getter) {
      String news = "";
      String name = get.select("h1").text();
      String information = get.select("h2").text();
      Elements markdown = get.getElementsByClass("markdown");
      for (Element paragraph : markdown.select("p")) {
        news = news + paragraph.text() + System.getProperty("line.separator");
      }
      News n = new News(name, information, news);
      this.news.add(n);
    }
    return this.news;
  }
  /**
   * achieve the num of people him/her fellowed
   *
   * @param doc
   * @return
   */
  private String getFellowPeopleNum(Document doc) {
    Elements friendHtml = doc.select("div[id=\"friend\"]");
    Elements fellowPeopleNumHtml = null;

    if (friendHtml != null) {
      fellowPeopleNumHtml = friendHtml.select("a");
      // 关注人数
      if (fellowPeopleNumHtml != null) {
        String fellowPeopleNum =
            UtilsMethod.findFirstStringByRegex("成员[0-9]+", fellowPeopleNumHtml.text());
        if (fellowPeopleNum != null) {
          fellowPeopleNum = fellowPeopleNum.replaceAll("[\\D]+", "");
          if (fellowPeopleNum != null) {
            return fellowPeopleNum;
          } else {
            return null;
          }
        } else {
          return null;
        }
      } else {
        return null;
      }
    } else {
      return null;
    }
  }
  private static int parseDates(Elements dates) throws ParseException {
    int column = 0;

    Date dt = DB_DATETIME_FORMATTER4.parse(today);
    Calendar calendar = Calendar.getInstance();
    calendar.setTime(dt);

    int m = calendar.get(Calendar.MONTH) + 1;
    int d = calendar.get(Calendar.DAY_OF_MONTH);

    String str =
        d < 10
            ? String.valueOf(m) + "月" + "0" + String.valueOf(d) + "日"
            : String.valueOf(m) + "月" + String.valueOf(d) + "日";

    Elements dateNotes = dates.select("th");
    for (int j = 1; j < dateNotes.size(); j++) {
      Element el = dateNotes.get(j);
      String temp = DBclass.xmlFilte(el.text());
      if (temp.indexOf(str) > -1) {
        column = j;
        break;
      }
    }

    return column;
  }
 @Test
 public void getStockQuoteFromWebsite() throws IOException {
   Document doc = Jsoup.connect("http://www.investopedia.com/markets/stocks/ibm").get();
   Elements table = doc.getElementById("MarketsSummary").getElementsByTag("table");
   Elements td = table.select("td");
   System.out.println("---Start---");
   System.out.println(td.get(1).text());
 }
示例#10
0
  public static void main(String args[]) throws IOException {
    // Element.ownText()

    // Step 1: To extract all labels and instances...
    Document doc = Jsoup.connect("http://127.0.0.1/master%20project/websites/home.php").get();
    Elements labelElements = doc.getElementsByAttributeValue("id", "label");
    Elements instanceElements = doc.getElementsByAttributeValue("id", "instance");

    // Step 2: To pair C(l,i) using single link clustering algorithm... NOTE: special Date case...
    HashMap<String, String[]> singleLinkClusterMap = new HashMap<String, String[]>();
    // singleLinkClusterMap.put(key, value)

    for (int i = 0; i < labelElements.size(); i++) {
      // Keys...
      String key = labelElements.select("[tag=" + i + "]").text();

      if (!key.toLowerCase().equals("date")) {
        // Values...
        Elements instanceElementsForThisKey = instanceElements.select("[tag=" + i + "]");
        String[] values = new String[instanceElementsForThisKey.size()];
        for (int j = 0; j < instanceElementsForThisKey.size(); j++) {
          values[j] = instanceElementsForThisKey.remove(0).text();
        }
        singleLinkClusterMap.put(key, values);
      } else {
        Date date = new Date();
        String modifiedDate = new SimpleDateFormat("yyyy-MM-dd").format(date);
        String[] values = {modifiedDate.toString()};
        singleLinkClusterMap.put(key, values);
      }
    }

    System.out.println("label:" + singleLinkClusterMap);

    // Step 3: To create base Ontology

    // TEST: to fire the source page with a request query then extract the data from resulting
    // page...

    // Step 4: To create one(or more) slave to which the base ontology and interface website address
    // is sent to.
    // This(These) slaves will then repeat steps 1 and 2 then create their own Ontology O'

    // Step 5: The new ontology O' will then be sent back to the Master to merge with original O.
  }
示例#11
0
  private Elements addStyleForTable(Elements pcont) {

    Elements td = pcont.select("td");
    for (Element d : td) {
      d.attr("style", "border: 1px solid #aaa;width:auto");
      d.removeAttr("class");
    }
    return pcont;
  }
示例#12
0
 public void parseWebLink() throws IOException {
   Document doc = Jsoup.connect(url).get();
   Elements info = doc.select("div[id*=MediaStoryList");
   Elements links = info.select("a[href]");
   boolean writeOrNot = true;
   try {
     /* define timeToken to compare either the target urls in the file */
     SimpleDateFormat sdf = new SimpleDateFormat("yyMMddHH");
     Date rightNow = new Date();
     String timeToken = sdf.format(rightNow);
     /* read the target#.txt where # is node id file to decide write or not */
     File file = new File("News/target" + node_id + ".txt");
     if (file.exists()) {
       FileInputStream fstream = new FileInputStream("News/target" + node_id + ".txt");
       DataInputStream in = new DataInputStream(fstream);
       BufferedReader br = new BufferedReader(new InputStreamReader(in));
       String strLine;
       int isSame = 1;
       while ((strLine = br.readLine()) != null) {
         isSame = strLine.compareTo(timeToken + "count = " + count);
         if (isSame == 0) {
           writeOrNot = false;
           System.out.println(
               "The set of urls have already written in the file:target" + node_id + ".txt");
         }
       }
       // Close the input stream
       in.close();
     }
     /* end the reading file */
     /* decide write or not */
     if (writeOrNot) {
       /* write to file named target#.txt where # is node id */
       FileWriter outputToTxt = new FileWriter("News/target" + node_id + ".txt", true);
       BufferedWriter writeToFile = new BufferedWriter(outputToTxt);
       writeToFile.write(timeToken + "count = " + count);
       writeToFile.newLine();
       int urlIsSame = 1;
       for (int i = 0; i < links.size(); i++) {
         String levelTwoUrl = links.get(i).attr("href");
         // String compareIsSame = links.get(i+1).attr("href");
         // System.out.println(line + "\n");
         if (i != 0) {
           urlIsSame = levelTwoUrl.compareTo(links.get(i - 1).attr("href"));
         }
         if (urlIsSame != 0) {
           writeToFile.write(levelTwoUrl);
           writeToFile.newLine();
         }
       }
       writeToFile.close();
       System.out.println("The file : target" + node_id + ".txt is written!");
     }
   } catch (IOException e) {
     System.out.println("The IO Error msg is:" + e.getMessage());
   }
 }
示例#13
0
  // Busca os endereços pelo número do CEP.
  public List<Address> getByCep(String cep) throws IOException {

    listEnderecos = new ArrayList<Address>();

    // mapeamento dos parametros que será passado na requisição
    Map<String, String> query = new HashMap<String, String>();
    query.put("CEP", cep);
    query.put("Metodo", "listaLogradouro");
    query.put("TipoConsulta", "cep");
    query.put("StartRow", "1");
    query.put("EndRow", "10");

    // Faz uma requisição no site do correios (www.buscacep.com.br) com Json, passando os parametros
    // mapeados,
    // requisição deverá ser do tipo post.
    // Armazena o retorno em uma variavel doc.
    Document doc =
        Jsoup.connect(Utils.adressCorreios)
            .data(query)
            .header("Origin", "http://www.buscacep.correios.com.br")
            .header("Referer", "http://www.buscacep.correios.com.br")
            .post();

    // Acessa o retorno do doc e percorre o resultado buscando as informações dos endereços
    // Armazena os resultados na lista de endereços criadas e retorna a mesma para que outras
    // classes possam acessar.
    Elements elements = doc.select("table").eq(2);
    Elements rows = elements.select("tr");

    Iterator<Element> rowIterator = rows.iterator();

    while (rowIterator.hasNext()) {
      Address enderecos = new Address();

      Element element = rowIterator.next();

      Elements logradouro = element.children().select("td").eq(0);
      enderecos.setLogradouro(logradouro.text());
      Elements bairro = element.children().select("td").eq(1);
      enderecos.setBairro(bairro.text());
      Elements cidade = element.children().select("td").eq(2);
      Elements estado = element.children().select("td").eq(3);
      StringBuilder sbLocalidade = new StringBuilder();
      sbLocalidade.append(cidade.text());
      sbLocalidade.append("/");
      sbLocalidade.append(estado.text());
      enderecos.setLocalidade(sbLocalidade.toString());
      Elements codigopostal = element.children().select("td").eq(4);
      enderecos.setCEP(codigopostal.text());

      listEnderecos.add(enderecos);
    }

    return listEnderecos;
  }
示例#14
0
  @Override
  public List<ParsedToken> collectSearchResult(Elements elements) {

    if (elements != null) {
      Elements productList = elements.select("#productList");
      String products = productList.attr("data-products");

      String[] temp1 = products.split("\\[");
      String[] temp2 = temp1[1].split("\\]");

      String[] producIdList = temp2[0].split(",");

      for (String id : producIdList) {
        String pId = "#".concat(id.trim());
        Elements productInfo = elements.select(pId);
        buildResultList(productInfo);
      }
    }

    return this.productinfoList;
  }
示例#15
0
文件: TestJsoup.java 项目: idsoy/test
  public static void main(String[] args) {

    String url = "http://www.xe.com/currencyconverter/convert/?Amount=1&From=USD&To=CNY";

    String html = HttpClientUtil.doGet(url);

    Document doc = Jsoup.parse(html);
    Elements elements = doc.getElementsByClass("ucc-result-table");
    String rate = elements.select(".rightCol").first().ownText().replace(" ", "");

    System.out.println(rate);
  }
示例#16
0
 public boolean checkLogin(Document doc) {
   Elements elmts = doc.select(".zu-top-nav-userinfo");
   if (!elmts.isEmpty()) {
     userName = elmts.select(".name").text();
     log.info("登录成功!" + "登录用户为:" + userName);
     isLogin = true;
     return true;
   }
   log.info("未登录");
   isLogin = false;
   return false;
 }
示例#17
0
 public String leituraJxr()
     throws IOException { // método para pegar os nomes dos métodos declarados
   Elements elements = document.getElementsByTag("pre");
   elements.select("a.jxr_linenumber").remove();
   // elements.select("strong.jxr_keyword").remove();
   // elements.select("span.jxr_string").remove();
   // elements.select("em.jxr_comment").remove();
   for (Element children : elements) {
     children.getElementsByClass("jxr_comment").remove();
     children.getElementsByClass("jxr_javadoccomment").remove();
   }
   return elements.text(); // retorna o código sem lixo
 }
示例#18
0
  // Busca o Cep pelo logradouro.
  public List<String> getByAdress(String address) throws IOException {

    listAddress = new ArrayList<String>();

    // mapeamento dos parametros que será passado na requisição
    Map<String, String> query = new HashMap<String, String>();

    query.put("relaxation", address);
    query.put("TipoCep", "ALL");
    query.put("semelhante", "N");
    query.put("cfm", "1");
    query.put("Metodo", "listaLogradouro");
    query.put("TipoConsulta", "relaxation");
    query.put("StartRow", "1");
    query.put("EndRow", "10");

    // Faz uma requisição no site do correios (www.buscacep.com.br) com Json, passando os parametros
    // mapeados,
    // requisição deverá ser do tipo post.
    // Armazena o retorno em uma variavel doc.
    Document doc =
        Jsoup.connect(Utils.adressCorreios)
            .timeout(20000)
            .data(query)
            .header("Origin", "http://www.buscacep.correios.com.br")
            .header("Referer", "http://www.buscacep.correios.com.br")
            .post();

    // Acessa o retorno do doc e percorre o resultado buscando as informações de Cep de acordo com o
    // endereço passado.
    // Armazena os resultados na lista criada e retorna a mesma para que outras classes possam
    // acessar
    Elements elements = doc.select("table").eq(2);
    Elements rows = elements.select("tr");

    Iterator<Element> rowIterator = rows.iterator();

    while (rowIterator.hasNext()) {
      Address enderecos = new Address();

      Element element = rowIterator.next();

      Elements codigopostal = element.children().select("td").eq(4);

      enderecos.setCEP(codigopostal.text());

      listAddress.add(enderecos.getCEP());
    }

    return listAddress;
  }
示例#19
0
  @Override
  Product parseProductUrl(String url) throws IOException {
    try {
      Product product = new Product();
      url = url.replaceAll("\r\n", "");
      Document doc = Jsoup.connect(url).timeout(0).get();
      Elements elements = doc.select("table[style=padding-left:10px;]").select("td");

      product.setName(
          elements
              .get(1)
              .html()
              .substring(0, elements.get(1).html().indexOf("<a style="))
              .replace(
                  "&nbsp;<img style=\"border: 2px solid #fff; box-shadow: rgba(0, 0, 0, 0.6) 0px 2px 2px;\" src=\"./images/new.jpg \" />",
                  ""));
      product.setCena(elements.select("p").text().replace("z³", ""));
      product.setOpis(elements.select("td[style=width:800px]").text().replace("Opis: ", ""));
      product.setName(
          product.getName()
              + " "
              + product
                  .getOpis()
                  .substring(
                      product.getOpis().indexOf("Kolor: ") + 7, product.getOpis().length() - 1));
      product.setImg(elements.select("td").select("a").select("img[width=450px]").attr("abs:src"));
      // System.out.println(product.getName());
      // System.out.println(product.getCena());
      // System.out.println(product.getOpis());
      // System.out.println(product.getImg());
      // System.out.println();

      return product;
    } catch (Exception e) {
      e.printStackTrace();
      return null;
    }
  }
  public static String updateAFGXml(boolean isActivate, String target, String ectXml) {
    String conditionStr =
        isActivate ? "<cp:conditions/>" : "<cp:conditions><ss:rule-deactivated/></cp:conditions>";
    Document doc = Jsoup.parse(ectXml, "UTF-8");
    Elements ruleAudio = doc.select("cp|rule[id=cfu] ");

    Elements ruleAudioCondition = ruleAudio.select("cp|conditions");
    ruleAudioCondition.remove(); // we cant change it to "<cp:conditions/> directly
    ruleAudio.prepend(conditionStr);

    Elements ruleAudioForwardTarget = ruleAudio.select("ss|forward-to>ss|target");
    ruleAudioForwardTarget.html(target);

    String r = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
    r += doc.getElementsByTag("ss:simservs").outerHtml();

    // modify for jsoup problem
    r = r.replaceAll("noreplytimer", "NoReplyTimer");
    // r= r.replaceAll("\n", "");
    r = r.replaceAll(">\\s+(.+)\\s+<", ">$1<");

    return r;
  }
示例#21
0
 public void getIPTShows() {
   CloseableHttpClient httpClient = HttpClientBuilder.create().build();
   CloseableHttpResponse response = null;
   String pageURL = "https://www.iptorrents.com";
   try {
     HttpGet httpGet = new HttpGet(pageURL);
     httpGet.addHeader(
         "User-Agent",
         "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36");
     response = httpClient.execute(httpGet);
     response.removeHeaders("Transfer-Encoding");
     HttpPost thePost = new HttpPost(pageURL + "?username=mcpchelper81&password=ru68ce48&php=");
     thePost.setHeaders(response.getAllHeaders());
     response.close();
     response = null;
     response = httpClient.execute(thePost);
     httpGet = new HttpGet("https://www.iptorrents.com/t?5");
     httpGet.setHeaders(response.getHeaders("set-cookie"));
     httpGet.addHeader(
         "accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
     httpGet.addHeader("accept-encoding", "gzip, deflate, sdch");
     httpGet.addHeader("accept-language", "en-US,en;q=0.8");
     httpGet.addHeader("dnt", "1");
     httpGet.addHeader("upgrade-insecure-requests", "1");
     httpGet.addHeader(
         "user-agent",
         "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36");
     response.close();
     response = null;
     response = httpClient.execute(httpGet);
     Header contentType = response.getFirstHeader("Content-Type");
     HttpEntity httpEntity = response.getEntity();
     String[] contentArray = contentType.getValue().split(";");
     String charset = "UTF-8";
     if (contentArray.length > 1 && contentArray[1].contains("=")) {
       charset = contentArray[1].trim().split("=")[1];
     }
     Document pageDoc = Jsoup.parse(httpEntity.getContent(), charset, httpGet.getURI().getPath());
     Elements results = pageDoc.getElementsByClass("torrents");
     response.close();
     Elements rawShowObjects = results.select("tr");
     IPTToTvShowEpisode makeShows = new IPTToTvShowEpisode();
     List<TvShowEpisode> theShows = makeShows.makeTSEBeans(rawShowObjects);
     DBActions.insertIPTTvEpisodes(theShows, "https://www.iptorrents.com/t?5");
   } catch (MalformedURLException MURLe) {
     MURLe.printStackTrace();
   } catch (Exception e) {
     e.printStackTrace();
   }
 }
  protected void parseLoginStep2(SimpleObject context) {
    String text = ContextUtil.getContent(context);
    if (text == null) {
      return;
    }
    String phone1 = phoneNo;
    String password1 = password;

    String n = StringUtil.subStr("strEnc(username,", ");", text).trim();
    if (!StringUtils.isBlank(n)) {
      String[] stra = n.trim().replaceAll("\'", "").split(",");
      // pwd, digit, f, s
      phone1 = executeJsFunc("des/tel_com_des.js", "strEnc", phoneNo, stra[0], stra[1], stra[2]);
      password1 =
          executeJsFunc("des/tel_com_des.js", "strEnc", password, stra[0], stra[1], stra[2]);
    }
    Document doc = ContextUtil.getDocumentOfContent(context);

    Elements form = doc.select("form#c2000004");

    Request req = new Request(fixedFullUrl(form.attr("action")));
    req.setMethod("POST");
    req.initNameValuePairs(12);

    req.addNameValuePairs("lt", form.select("input[name=lt]").attr("value"));
    req.addNameValuePairs("_eventId", "submit");
    req.addNameValuePairs("forbidpass", "null");

    req.addNameValuePairs("areaname", areaName);
    req.addNameValuePairs("password", password1);
    req.addNameValuePairs("authtype", "c2000004");

    req.addNameValuePairs("customFileld01", customField1);

    req.addNameValuePairs("customFileld02", customField2);
    req.addNameValuePairs("forbidaccounts", "null");
    req.addNameValuePairs("open_no", "c2000004");
    req.addNameValuePairs("username", phone1);
    req.addNameValuePairs("randomId", authCode == null ? "" : authCode);
    req.setCharset(UAM_CHAR_SET);
    req.addObjservers(
        new AbstractProcessorObserver(util, WaringConstaint.ZGDX_3) {
          @Override
          public void afterRequest(SimpleObject context) {
            parseLoginStep3(context);
          }
        });
    spider.addRequest(req);
  }
  private void parseLoginStep4(SimpleObject context) {
    Document doc = ContextUtil.getDocumentOfContent(context);
    Elements e1 = doc.select("form#c2000004");
    if (e1.size() > 0) {
      data.put("errMsg", e1.select("td#status2").text());
      setStatus(STAT_STOPPED_FAIL);
      notifyStatus();
      return;
    }
    e1 = doc.select("form#login_form");
    if (e1.size() > 0) {
      data.put("errMsg", "登录失败,请重试!");
      setStatus(STAT_STOPPED_FAIL);
      notifyStatus();
      return;
    }
    String text = ContextUtil.getContent(context);

    String url =
        StringUtil.subStr(
            "<script type='text/javascript'>location.replace('", "');</script>", text);
    if (StringUtils.isBlank(url.trim())) {

      if ("IBM HTTP Server".equalsIgnoreCase(doc.select("title").text())) {
        setStatus(STAT_LOGIN_SUC);
        // notifyStatus();
        ssoLogin(context);
      } else {
        data.put("fail", true);
        setStatus(STAT_STOPPED_FAIL);
        notifyStatus();
        logger.error("Login Fail.....");
      }

      return;
    }
    getUrl(
        url,
        null,
        new Object[] {UAM_CHAR_SET},
        new AbstractProcessorObserver(util, WaringConstaint.ZGDX_5) {
          @Override
          public void afterRequest(SimpleObject context) {
            setStatus(STAT_LOGIN_SUC);
            ssoLogin(context);
          }
        });
  }
  @Override
  public void initialize(URL location, ResourceBundle resources) {
    urlField.setOnAction(
        event -> {
          String text = urlField.getText();
          urlField.setText("tetetetetetetete");
          webView.getEngine().load(text);
        });

    webView
        .getEngine()
        .getLoadWorker()
        .stateProperty()
        .addListener(
            (ov, oldState, newState) -> {
              if (newState == State.SUCCEEDED) {
                String url = webView.getEngine().getLocation();
                urlField.setText(url);
                if (Pattern.compile("http://item.rakuten.co.jp/.*").matcher(url).find()) {
                  try {
                    Elements tmp;
                    Document document = Jsoup.connect(url).get();
                    tmp = document.select("input");
                    tmp = tmp.select("#etime");
                    if (tmp.size() != 0) {
                      if (!(Long.parseLong(tmp.first().val()) < new Date().getTime())) {
                        entryButton.setDisable(false);
                      }
                    } else {
                      entryButton.setDisable(false);
                    }
                  } catch (Exception e) {
                    // TODO 自動生成された catch ブロック
                    e.printStackTrace();
                  }
                }
              }
              ;
            });

    entryButton.setOnAction(
        event -> {
          urlField.setText("webView disable");
          sendEntryTaskController();
        });
  }
 /*
  * Connects Jsoup to each MET search page and gets the link for each painting
  * Sends the link to each painting to paintingScraper(link)
  *
  */
 public ArrayList<String> connector(String galleryURL) {
   Document doc;
   try {
     doc = Jsoup.connect(galleryURL).get();
   } catch (IOException e) {
     // TODO Auto-generated catch block
     doc = null;
     e.printStackTrace();
   }
   Elements grid = doc.getElementsByClass("grid-results-thumbnail");
   Elements linksHTML = grid.select("a[href]");
   ArrayList<String> linkList = new ArrayList<String>();
   for (Element link : linksHTML) {
     linkList.add(link.attr("href"));
   }
   return linkList;
 }
    private boolean updateDailyNews(Document doc, String dailyTitle, DailyNews dailyNews)
        throws JSONException {
      Elements viewMoreElements = doc.getElementsByClass("view-more");

      if (viewMoreElements.size() > 1) {
        dailyNews.setMulti(true);
        Elements questionTitleElements = doc.getElementsByClass("question-title");

        for (int j = 0; j < viewMoreElements.size(); j++) {
          if (questionTitleElements.get(j).text().length() == 0) {
            dailyNews.addQuestionTitle(dailyTitle);
          } else {
            dailyNews.addQuestionTitle(questionTitleElements.get(j).text());
          }

          Elements viewQuestionElement = viewMoreElements.get(j).select("a");

          if (viewQuestionElement.text().equals("查看知乎讨论")) {
            dailyNews.addQuestionUrl(viewQuestionElement.attr("href"));
          } else {
            return false;
          }
        }
      } else if (viewMoreElements.size() == 1) {
        dailyNews.setMulti(false);

        Elements viewQuestionElement = viewMoreElements.select("a");
        if (viewQuestionElement.text().equals("查看知乎讨论")) {
          dailyNews.setQuestionUrl(viewQuestionElement.attr("href"));
        } else {
          return false;
        }

        // Question title is the same with daily title
        if (doc.getElementsByClass("question-title").text().length() == 0) {
          dailyNews.setQuestionTitle(dailyTitle);
        } else {
          dailyNews.setQuestionTitle(doc.getElementsByClass("question-title").text());
        }
      } else {
        return false;
      }

      return true;
    }
示例#27
0
  public static Result textSelList(Element elsPar, ArrayList<String> jsoupSelectors) {
    Result res = new Result();
    Elements els;
    Elements elsTemp;

    els = elsPar.select(jsoupSelectors.remove(0));
    for (String sel : jsoupSelectors) {
      elsTemp = els.select(sel);
      if (elsTemp != null) els = elsTemp;
    }

    if (els == null || els.size() != 1) {
      log.error("jsoup selector on elements does not match 1");
      System.exit(1);
      return res.setContinua(false);
    }
    return res.setRetStr(els.get(0).text());
  }
示例#28
0
 @Override
 public String fire(String inputContent, String[] args) throws Exception {
   validateCSSSelectorRuleArgs(args);
   Document document = Jsoup.parse(inputContent);
   Elements elements = null;
   for (int i = 0; i < args.length; ++i) {
     if (i == 0) {
       elements = document.select(args[i]);
     } else {
       if (elements != null) {
         elements = elements.select(args[i]);
       } else {
         break;
       }
     }
   }
   return (elements != null && elements.size() > 0 ? elements.html().trim() : null);
 }
  @Override
  public NewsEntity parseNewsPerCategory(String newsURL) {
    // TODO Auto-generated method stub

    NewsEntity parsetData = null;

    try {
      Document doc = Jsoup.connect(newsURL).timeout(Constants.MAX_DELAY_TIME * 1000).get();
      doc.outputSettings().charset(Charset.forName("UTF-8"));
      doc.normalise();

      Elements titleElement = doc.select("title");
      String titleName = titleElement.text();

      if (titleName.contains("|")) {
        titleName = titleName.substring(0, titleName.indexOf("|")).trim();
      }

      Elements newsElements = doc.select("div[class=content]").select("p");
      newsElements.select("a, img, script, xml, input, label, textarea").remove();

      if (newsElements != null) {

        try {
          parsetData = new NewsEntity();
          parsetData.setNewsTitle(titleName);
          parsetData.setNewsBody(newsElements.text());
          parsetData.setNewsURL(newsURL);

          // System.out.println("URL: " + newsURL + " HASH: " +
          // NewsAggregatorUtility.StringToSHA1Hash(newsURL));
        } catch (Exception e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
      }

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }

    return parsetData;
  }
  @Test
  public void testGenerateManyToOneProperty() throws Exception {
    Map<String, Object> root =
        TestHelpers.createInspectionResultWrapper(ENTITY_NAME, MANY_TO_ONE_PROP);

    Resource<URL> templateResource =
        resourceFactory.create(
            getClass().getResource(Deployments.BASE_PACKAGE_PATH + Deployments.SEARCH_FORM_INPUT));
    TemplateProcessor processor =
        processorFactory.fromTemplate(new FreemarkerTemplate(templateResource));
    String output = processor.process(root);
    Document html = Jsoup.parseBodyFragment(output);
    assertThat(output.trim(), not(equalTo("")));

    Elements container = html.select("div.form-group");
    assertThat(container, notNullValue());

    Elements formInputElement = container.select("div.col-sm-10 > select");
    assertThat(formInputElement.attr("id"), equalTo("customer"));
    assertThat(formInputElement.attr("ng-model"), equalTo("search" + "." + "customer"));
  }