private Stack<Stop> fetchStopSequence(String key) throws IOException {
   if (stopSequences.containsKey(key)) {
     return stopSequences.get(key);
   }
   Stack<Stop> stopSequence = new Stack<Stop>();
   url =
       DBC.URL
           + '/'
           + DBC.GOOGLE_MAPS_EXTENSION
           + DBC.STOP_SEQUENCE_EXTENSION
           + routeShortName()
           + "&direction="
           + direction.toChar();
   Document doc = makeTmpJSoupDoc(url);
   Elements data = doc.getElementsByTag("data");
   if (!data.isEmpty()) {
     Elements pois = data.get(0).getElementsByTag("poi");
     for (Element poi : pois) {
       String address =
           poi.getElementsByTag("address").get(0).html()
               + ", "
               + poi.getElementsByTag("location").get(0).html();
       double lat = Double.parseDouble(poi.getElementsByTag("lat").get(0).html());
       double lon = Double.parseDouble(poi.getElementsByTag("lng").get(0).html());
       int code = Integer.parseInt(poi.getElementsByTag("stopnumber").get(0).html());
       Stop stop = new Stop(code, lat, lon, address, agency);
       if (!StopSiteCrawler.stopsFile.get("stop_code").contains(code)) {
         stop.write();
       }
       stopSequence.push(stop);
     }
   }
   stopSequences.put(key, stopSequence);
   return stopSequence;
 }
Beispiel #2
0
 public Collection<String> extractSubscribedUser(final String htmlContent) {
   // logger.debug("htmlContent:\n" + htmlContent);
   final List<String> result = new ArrayList<String>();
   final Document document = Jsoup.parse(htmlContent);
   final Elements tables = document.getElementsByTag("table");
   for (final Element table : tables) {
     if (isSubscriptTable(table)) {
       for (final Element tr : table.getElementsByTag("tr")) {
         final Elements tds = tr.getElementsByTag("td");
         if (!tds.isEmpty()) {
           final String name = tds.get(0).text();
           if (name != null) {
             final String nameTrimed = name;
             if (nameTrimed.length() > 1) {
               logger.debug("found subscription for user: '******'");
               result.add(nameTrimed);
             }
           }
         }
       }
     }
   }
   logger.debug("found " + result.size() + " subscribed users in htmlcontent");
   return result;
 }
    @Override
    protected Boolean doInBackground(String... mess) {

      try {
        Document page = Jsoup.connect("http://messmenu.snu.in/messMenu.php").get();

        Element menu;
        if (mess[0].equals("dh1")) menu = page.getElementsByTag("tbody").get(0);
        else menu = page.getElementsByTag("tbody").get(1);

        Elements breakfast_items = menu.getElementsByTag("td").get(1).children();
        Elements lunch_items = menu.getElementsByTag("td").get(2).children();
        Elements dinner_items = menu.getElementsByTag("td").get(3).children();

        for (Element item : breakfast_items) breakfast.add(item.text());
        for (Element item : lunch_items) lunch.add(item.text());
        for (Element item : dinner_items) dinner.add(item.text());

        return true;

      } catch (IOException | IndexOutOfBoundsException e) {
        e.printStackTrace();
      }

      return false;
    }
Beispiel #4
0
 @Override
 protected void parseRow(
     final String query, final int options, final Element tr, final List<Name> results) {
   final String thumbnailUrl =
       tr.getElementsByAttributeValue("class", "primary_photo")
           .first()
           .getElementsByTag("img")
           .first()
           .attr("src");
   final Element r = tr.getElementsByAttributeValue("class", "result_text").first();
   final Element a = r.getElementsByTag("a").first();
   final String url = Imdb.BASE_URL + a.attr("href");
   final String name = a.ownText();
   String job = "";
   Reference ref = null;
   final Elements smalls = r.getElementsByTag("small");
   if (!smalls.isEmpty()) {
     final String refUrl =
         Imdb.BASE_URL + smalls.first().getElementsByTag("a").first().attr("href");
     String desc = smalls.first().text();
     if (desc.startsWith("(") && desc.endsWith(")")) desc = desc.substring(1, desc.length() - 1);
     final int comma = desc.indexOf(',');
     if (comma != -1) {
       job = desc.substring(0, comma).trim();
       ref = new Reference(refUrl, desc.substring(comma + 1).trim());
     } else {
       if (desc.matches(".+\\(\\d+\\)"))
         ref = new Reference(refUrl, desc.substring(comma + 1).trim());
       else job = desc;
     }
   }
   results.add(new Name(url, thumbnailUrl, name, job, ref));
 }
  private JCas computeCommentCas(Element comment) throws UIMAException {
    JCas cCas = JCasFactory.createJCas();
    String cid = comment.attr("CID");
    String cuserid = comment.attr("CUSERID");
    // String cgold = comment.attr("CGOLD");
    // String cgold = getgold(comment.attr("CGOLD"));

    // String cgold_yn = comment.attr("CGOLD_YN");
    String csubject = comment.getElementsByTag("CSubject").get(0).text();
    String cbody = comment.getElementsByTag("CBody").get(0).text();

    /** Setup comment CAS */
    cCas.reset();
    cCas.setDocumentLanguage("en");
    String commentText =
        TextNormalizer.normalize(SubjectBodyAggregator.getCommentText(csubject, cbody));
    cCas.setDocumentText(commentText);
    // cCas.setDocumentText(csubject + ". " + cbody);

    /** Run the UIMA pipeline */
    SimplePipeline.runPipeline(cCas, this.analysisEngineList);

    // this.analyzer.analyze(commentCas, new SimpleContent("c-" + cid, csubject + ". " + cbody));
    return cCas;
  }
    @Override
    protected ArrayList<HashMap<String, String>> doInBackground(Void... params) {
      ArrayList<HashMap<String, String>> authors = new ArrayList<HashMap<String, String>>();
      try {
        char l = 'a';
        while (l <= 'a') {
          URL url = new URL("http://www.liberliber.it/audiolibri/" + l + "/index.htm");
          Document doc = Jsoup.parse(url, 5000);

          Element e = doc.getElementById("riga02_colonna02");
          e = e.getElementsByClass("contenuto_cornice").first();
          e = e.getElementsByTag("tbody").first();
          e = e.getElementsByTag("tr").get(1);
          e = e.getElementsByTag("td").get(1);
          e = e.getElementsByTag("ul").first();

          for (Element curr : e.getElementsByTag("li")) {
            HashMap<String, String> m = new HashMap<String, String>();
            Element el = curr.getAllElements().first();

            m.put("author", el.text());
            m.put("url", el.unwrap().absUrl("href"));

            authors.add(m);
          }

          l++;
        }
      } catch (Exception e) {
        e.printStackTrace();
      }

      return authors;
    }
Beispiel #7
0
 public static ArrayList<EntryModel> getPopularContent() {
   final ArrayList<EntryModel> result = new ArrayList<>();
   Thread thread =
       new Thread(
           () -> {
             try {
               Document document = Jsoup.connect("http://jkanime.net/").get();
               Elements elements = document.getElementsByClass("home_portada_bg");
               for (Element element : elements) {
                 result.add(
                     new EntryModel(
                         Constants.TYPE_SHOW,
                         element.getElementsByTag("a").first().text(),
                         element.getElementsByTag("a").first().attr("abs:href"),
                         element.getElementsByTag("img").first().attr("src")));
               }
             } catch (IOException e) {
               e.printStackTrace();
             }
           });
   thread.start();
   try {
     thread.join();
     return result;
   } catch (InterruptedException | NullPointerException e) {
     e.printStackTrace();
     return null;
   }
 }
  public List<Arrival> busTimetable(final Arrival arrival) throws Exception {
    final Calendar now = Calendar.getInstance(Locale.UK);

    final Uri url =
        Uri.parse("http://transportapi.com")
            .buildUpon()
            .path(
                String.format(
                    "v3/uk/bus/route/%s/%s/inbound/%s/%s/%s/timetable",
                    arrival.bus.operator,
                    arrival.bus.route,
                    arrival.stop.atcocode,
                    dateFormat.format(now.getTime()),
                    timeFormat.format(now.getTime())))
            .appendQueryParameter("api_key", apiKey)
            .appendQueryParameter("app_id", appId)
            .appendQueryParameter("group", "no")
            .build();
    Log.d("JSON API", String.format("Requesting %s", url));

    final HttpResponse response = http.execute(new HttpGet(url.toString()));
    final StatusLine status = response.getStatusLine();

    if (status.getStatusCode() != HttpStatus.SC_OK) {
      response.getEntity().getContent().close();
      throw new IOException(status.getReasonPhrase());
    }

    final Document doc = Jsoup.parse(EntityUtils.toString(response.getEntity()), url.toString());
    final Element stopList = doc.getElementsByClass("busroutelist").first();
    final Elements stopListItems = stopList.getElementsByTag("li");

    ArrayList<Arrival> result = new ArrayList<Arrival>();
    for (Element stopListItem : stopListItems) {
      String destcode;
      String destname;
      Time desttime;

      Element timeElement = stopListItem.getElementsByClass("routelist-time").first();
      desttime = parseSimpleTime(timeElement.text().substring(0, 5));

      Element destElement = stopListItem.getElementsByClass("routelist-destination").first();
      String href = destElement.getElementsByTag("a").first().attr("href");
      destcode = href;
      if (destcode.startsWith("/v3/uk/bus/stop/")) {
        destcode = destcode.substring("/v3/uk/bus/stop/".length());
      }
      if (destcode.indexOf('/') > 0) {
        destcode = destcode.substring(0, destcode.indexOf('/'));
      }

      destname = destElement.text();

      result.add(new Arrival(arrival.bus, new Stop(destcode, destname), desttime));
    }
    return result;
  }
Beispiel #9
0
  public List<MersHPVO> mersHPData() {
    List<MersHPVO> list = new ArrayList<MersHPVO>();
    try {
      Document doc = Jsoup.connect("http://www.cdc.go.kr/CDC/cms/content/16/63316_view.html").get();
      // System.out.println(doc);
      Elements trs = doc.select("table tbody tr");
      // System.out.println(trs);
      String data = "";

      for (Element tr : trs) {
        Iterator<Element> it = tr.getElementsByTag("td").iterator();
        int size = tr.getElementsByTag("td").size();
        MersHPVO vo = new MersHPVO();
        if (size == 5) {
          it.next().html();
          vo.setGugun(it.next().html());
          vo.setName(it.next().html());
          vo.setDuration(it.next().html());
          vo.setNum(it.next().html());
        } else {
          vo.setGugun(it.next().html());
          vo.setName(it.next().html());
          vo.setDuration(it.next().html());
          vo.setNum(it.next().html());
        }
        list.add(vo);
        // if(i==2) break;
        /*while(it.hasNext())
        {

         MersHPVO vo=new MersHPVO();
         String str=it.next().html();
         if(str.startsWith("<strong>"))
         {
          vo.setGugun(it.next().html());
          vo.setName(it.next().html());
          vo.setDuration(it.next().html());
          vo.setNum(it.next().html());
         }
         else
         {
          vo.setGugun(str);
          vo.setName(it.next().html());
          vo.setDuration(it.next().html());
          vo.setNum(it.next().html());
         }
         list.add(vo);
        }*/

      }
    } catch (Exception ex) {
      System.out.println(ex.getMessage());
    }
    return list;
  }
Beispiel #10
0
 private boolean isSubscriptTable(final Element table) {
   final Elements trs = table.getElementsByTag("tr");
   if (trs != null && !trs.isEmpty()) {
     final Element head = trs.get(0);
     final Elements tds = head.getElementsByTag("th");
     if (tds != null && !tds.isEmpty()) {
       final String text = tds.get(0).text();
       return text != null && text.contains("Teilnehmer");
     }
   }
   return false;
 }
  private String getColumnTextContent(Elements rowElements, int i) {
    Element content = rowElements.get(i);
    Element a = content.getElementsByTag("a").first();
    Element span = content.getElementsByTag("span").first();
    StringBuffer text = new StringBuffer();

    text.append(content.ownText());
    if (a != null) text.append(a.ownText());
    if (span != null) text.append(span.ownText());

    return text.toString();
  }
Beispiel #12
0
  private VSEStructureElement.StudyType handleProgramData(Document doc) throws Exception {
    VSEStructureElement.StudyType studyType = new VSEStructureElement.StudyType();
    Elements tables = doc.body().select("table");
    studyType.name = tables.get(0).select("tbody tr").get(3).select("td").get(1).text();

    Elements programmeRows = tables.get(1).select("tbody tr");

    for (Element row : programmeRows) {
      Elements cells = row.getElementsByTag("td");

      if (cells.get(0).hasAttr("width")) break;

      String link = cells.get(5).getElementsByTag("a").get(0).attr("href");
      Map<String, String> args = HttpRequestBuilder.getGetArguments(link, '=', ';');

      String programmeString = cells.get(1).text();

      final VSEStructureElement.Programme programme = new VSEStructureElement.Programme();
      int spaceIdx = programmeString.indexOf(" ");
      String[] codes = programmeString.substring(0, spaceIdx).split("-");
      programme.name = programmeString.substring(spaceIdx + 1);
      programme.addCode(codes[codes.length - 1]);

      studyType.addCode(codes[0]);

      runTask(
          args,
          new OnDocumentLoaded() {
            @Override
            public void loaded(Document document) throws Exception {
              programme.fields = handleFieldsData(document);
            }
          });

      studyType.programmes.add(programme);
    }

    Elements specializationRows = tables.get(2).select("tbody tr");
    for (Element row : specializationRows) {
      Elements cells = row.getElementsByTag("td");

      if (cells.get(0).hasAttr("width")) break;

      String specializationString = cells.get(1).text();
      String[] specializationParts = specializationString.split(" ", 2);
      VSEStructureElement specialization = new VSEStructureElement();
      specialization.addCode(specializationParts[0]);
      specialization.name = specializationParts[1];
      studyType.specializations.add(specialization);
    }

    return studyType;
  }
  /**
   * Parse nasdq page and write in hbase
   *
   * @param symbol
   */
  public static void parseUSSymbols(String symbol) {
    if (!Hbase.getData(symbol).equals("")) {
      // System.out.println(symbol + " Exists!");
      return;
    }
    String result =
        HttpRequest.sendPost(
            "http://www.nasdaq.com/symbol/" + symbol.toLowerCase() + "/historical",
            length + "|false|" + symbol);
    if (result.equals("")) {
      WriteError(symbol);
      System.out.println(symbol + " result error");
      return;
    }
    // System.out.println(result);
    Document doc = Jsoup.parse(result);
    JSONArray HistoricalData = new JSONArray();
    try {
      Element body = doc.getElementsByTag("tbody").get(0);
      // System.out.println(body.toString());
      Elements nodes = body.getElementsByTag("tr");
      if (nodes.size() == 0) {
        WriteError(symbol);
        System.out.println(symbol + " size 0");
        return;
      }
      // System.out.println(nodes.size());
      for (Element node : nodes) {
        JSONArray DailyData = new JSONArray();
        Elements units = node.getElementsByTag("td");
        for (Element unit : units) {
          if (!unit.text().equals("")) {
            DailyData.put(unit.text());
          }
        }
        if (DailyData.length() > 0) {
          HistoricalData.put(DailyData);
        }
      }
      Hbase.addData(symbol, type, HistoricalData.toString());
      // System.out.println(symbol + " done");
    } catch (Exception e) {
      if (handleError) {
        errors.add(symbol);
      } else {
        WriteError(symbol);
        System.out.println(symbol + " parsing error");
      }

      // TODO: handle exception
    }
  }
Beispiel #14
0
 /**
  * 从一个xmltxt中得到当前信息的程序
  *
  * @param element
  * @throws IllegalAccessException
  */
 public void dealelement(Element element) {
   Field[] fields = this.getClass().getDeclaredFields();
   for (int i = 0; i < fields.length; i++) {
     Field f = fields[i];
     String type = f.getGenericType().toString();
     if (type.equals("class java.lang.String")) {
       Element temp = element.getElementsByTag(f.getName()).first();
       if (temp != null) {
         try {
           f.set(this, temp.text());
         } catch (IllegalAccessException e) {
           e.printStackTrace();
         }
       }
     } else if (type.equals("class java.lang.Integer")) {
       Element temp = element.getElementsByTag(f.getName()).first();
       if (temp != null) {
         int txt = Integer.parseInt(temp.text());
         try {
           f.set(this, txt);
         } catch (IllegalAccessException e) {
           e.printStackTrace();
         }
       }
     } else if (type.equals("class java.lang.Float")) {
       Element temp = element.getElementsByTag(f.getName()).first();
       if (temp != null) {
         Float txt = Float.parseFloat(temp.text());
         try {
           f.set(this, txt);
         } catch (IllegalAccessException e) {
           e.printStackTrace();
         }
       }
     } else if (type.equals("java.util.List<java.lang.String>")) {
       Elements temp = element.getElementsByTag(f.getName());
       if (temp.size() > 0) {
         List<String> list = new ArrayList<>();
         for (Element ele : temp) {
           list.add(ele.text());
         }
         try {
           f.set(this, list);
         } catch (IllegalAccessException e) {
           e.printStackTrace();
         }
       }
     }
   }
 }
Beispiel #15
0
  private void getDatafromJsoup(String url) {
    // TODO Auto-generated method stub
    try {
      Document doc = Jsoup.connect(url).get();
      // Elements content = doc.getElementsByClass("cell item");

      Elements header = doc.getElementsByClass("topic_content");

      Log.e("topic_content", header.text());
      title = header.text();

      Elements content = doc.getElementsByTag("tbody");
      for (Element link : content) {

        DetailEntity entity = new DetailEntity();

        Elements avatar = link.getElementsByTag("img");
        {
          String avaterLink = avatar.attr("src");
          if (avaterLink.startsWith("//cdn.")) {
            entity.setAvater("http:" + avaterLink);
          }
        }

        Elements reply_content = link.getElementsByClass("reply_content");

        Log.e("reply_content", reply_content.text());

        entity.setReply_count(reply_content.text());

        Elements title = link.getElementsByTag("a");
        if (title.attr("href").startsWith("/member/")) {

          Log.e("title", title.text());
          entity.setTitle(title.text());
        }

        Log.e(
            "other",
            link.getElementsByClass("fade small").text()
                + link.getElementsByClass("small fade").text());
        if (!TextUtils.isEmpty(reply_content.text())) entities.add(entity);
      }

    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
Beispiel #16
0
 String parseArticleLink(Element element) {
   try {
     if (element.classNames().contains("m-hero__slot")) {
       Element a = element.getElementsByClass("m-hero__slot-link").first();
       return a.attr("href");
     } else if (element.classNames().contains("m-entry-slot")) {
       Element h3 = element.getElementsByTag("h3").first();
       Element a = h3.getElementsByTag("a").first();
       return a.attr("href");
     } else throw new NullPointerException();
   } catch (NullPointerException e) {
     e.printStackTrace();
     return null;
   }
 }
Beispiel #17
0
 String parseTitle(Element element) {
   try {
     if (element.classNames().contains("m-hero__slot")) {
       Element a = element.getElementsByClass("m-hero__slot-link").first();
       Element h2 = a.getElementsByTag("h2").first();
       return h2.text();
     } else if (element.classNames().contains("m-entry-slot")) {
       Element h3 = element.getElementsByTag("h3").first();
       return h3.text();
     } else throw new NullPointerException();
   } catch (NullPointerException e) {
     e.printStackTrace();
     return "Unknown title";
   }
 }
Beispiel #18
0
 // @PostConstruct
 public void init() {
   try {
     Document doc = Jsoup.connect(CURRENCY_POINT).get();
     Element oshadBankContainer = doc.select("#7oiylpmiow8iy1sma9a").first(); // Oshadbank id
     Element currenciesContainer = oshadBankContainer.getElementsByTag("currencies").first();
     for (Element currencyContainer : currenciesContainer.getElementsByTag("c")) {
       Currency currency = Currency.valueOf(currencyContainer.id());
       currency.setBuyRate(new BigDecimal(currencyContainer.attributes().get("ar")));
       currency.setSellRate(new BigDecimal(currencyContainer.attributes().get("br")));
       LOGGER.info(currency.string() + " was inited");
     }
   } catch (IOException e) {
     LOGGER.error(e);
   }
 }
Beispiel #19
0
 private Response postToLogin(String username, String password, String[] captchaData)
     throws ConnectionException {
   try {
     Map<String, String> data = new HashMap<>();
     Document loginDocument = Jsoup.connect(Endpoints.LOGIN_URL.url()).get();
     Element loginForm = loginDocument.getElementById("loginForm");
     for (Element input : loginForm.getElementsByTag("input")) {
       data.put(input.attr("name"), input.attr("value"));
     }
     Date now = new Date();
     data.put("timezone_field", new SimpleDateFormat("XXX").format(now).replace(':', '|'));
     data.put("username", username);
     data.put("password", password);
     data.put("js_time", String.valueOf(now.getTime() / 1000));
     if (captchaData.length > 0) {
       data.put("hip_solution", captchaData[0]);
       data.put("hip_token", captchaData[1]);
       data.put("fid", captchaData[2]);
       data.put("hip_type", "visual");
       data.put("captcha_provider", "Hip");
     } else {
       data.remove("hip_solution");
       data.remove("hip_token");
       data.remove("fid");
       data.remove("hip_type");
       data.remove("captcha_provider");
     }
     return Jsoup.connect(Endpoints.LOGIN_URL.url()).data(data).method(Method.POST).execute();
   } catch (IOException e) {
     throw ExceptionHandler.generateException("While submitting credentials", e);
   }
 }
  @Override
  protected void initialize(Element source) {
    Elements elements = source.getElementsByTag("td");

    Element element = elements.get(0).select("[data-sc-params]").get(0);
    String name =
        element
            .attr("data-sc-params")
            .replaceAll("\\{ 'name': '", "")
            .replaceAll("', 'magnet':.*", "")
            .replaceAll("%20", "\\.")
            .replaceAll("%5B.*", "");

    ShowData showData = ShowData.fromFilename(name);
    initialize(showData);

    seeds = Integer.parseInt(elements.get(4).text());
    peers = Integer.parseInt(elements.get(5).text());

    element = elements.get(0).select("div a[title=Download torrent file]").get(0);
    String[] array = element.attr("href").split("\\?");
    downloadLink = array[0].replaceAll("\\.torrent", "/temp\\.torrent");

    if (downloadLink.startsWith("//")) {
      downloadLink = "http:" + downloadLink;
    }
  }
Beispiel #21
0
 public static ArrayList<EntryModel> getSearchResults(final String query) {
   final ArrayList<EntryModel> result = new ArrayList<>();
   Thread thread =
       new Thread(
           () -> {
             try {
               Document document =
                   Jsoup.connect("http://jkanime.net/buscar/" + query.replace(" ", "_")).get();
               Elements elements = document.getElementsByClass("search");
               for (Element element : elements) {
                 String title = element.getElementsByClass("titl").first().text();
                 String url = element.getElementsByClass("titl").first().attr("abs:href");
                 String picUrl = element.getElementsByTag("img").first().attr("src");
                 result.add(new EntryModel(Constants.TYPE_SHOW, title, url, picUrl));
               }
             } catch (IOException e) {
               e.printStackTrace();
             }
           });
   thread.start();
   try {
     thread.join();
     return result;
   } catch (InterruptedException | NullPointerException e) {
     e.printStackTrace();
     return null;
   }
 }
  @BeforeClass
  public static void setUp() {
    File input =
        new File("src/test/java/org/jenkinsci/plugins/marketfeaturereport/market_features.html");
    Document doc = null;
    try {
      doc = Jsoup.parse(input, "UTF-8");
    } catch (IOException e) {
      e.printStackTrace();
    }
    assert doc != null;
    Element content = doc.getElementById("market-feature-header");
    Elements header = content.getElementsByClass("rTableHead");
    Elements failedHeader = content.getElementsByClass("rTableHeadFailed");
    Elements rows = content.getElementsByClass("rTableCell");
    Elements rows_failed = content.getElementsByClass("rTableCellFailed");
    int count_failed = 0, count = 0;

    for (Element element : header) {
      summary_table.put(element.text(), rows.get(count).text());
      ++count;
    }
    Elements link_error = content.getElementsByTag("a");
    for (Element element : failedHeader) {
      summary_table.put(element.text(), rows_failed.get(count_failed).text());
      String linkHref = link_error.get(count_failed).attr("href");
      summary_error_table.put(element.text(), linkHref);
      ++count_failed;
    }
  }
  private static Collection<Node> extractImageNodes(Element aInContent) {
    Collection<Node> lImageNodes = new LinkedList<>();

    Elements lImageElements = aInContent.getElementsByTag("img");
    if (!lImageElements.isEmpty()) {
      int i = 0;
      for (Element lImageElement : lImageElements) {
        i++;
        if (lImageElement.hasClass("float-left")) {
          if (!lImageElement.hasClass("alignleft")) {
            lImageElement.addClass("alignleft");
          }
        } else if (lImageElement.hasClass("float-right")) {
          if (!lImageElement.hasClass("alignright")) {
            lImageElement.addClass("alignright");
          }
        }

        if (i > 1) {
          lImageElement.removeAttr("width");
          lImageElement.removeAttr("height");
        }

        Node lThisNode = toNode(lImageElement);
        lImageNodes.add(lThisNode.clone());
      }
    }

    return lImageNodes;
  }
 private boolean jsoupImpl(InputStream is) throws Exception {
   Document doc = Jsoup.parse(inputStreamToStringBuilder(is).toString());
   Element element = doc.getElementById("FundHoldSharesTable");
   if (element == null) {
     return false;
   }
   element = element.getElementsByTag("tbody").first();
   Elements elements = element.getElementsByTag("tr");
   // for (Element node : elements) {
   //     System.out.println(node.text());
   // }
   for (int i = 1; i < elements.size(); i++) {
     String text = elements.get(i).text();
     map.put(text.split(" ")[0], text);
   }
   return true;
 }
 private void migratePrimaryCta(
     Element upperRightElement,
     Node midSizeUpperRightNode,
     String locale,
     Map<String, String> urlMap)
     throws PathNotFoundException, ValueFormatException, VersionException, LockException,
         ConstraintViolationException, RepositoryException {
   if (upperRightElement != null) {
     if (midSizeUpperRightNode.hasNode("primary_cta_v2")) {
       Element title = upperRightElement.getElementsByTag("h3").first();
       Element description = upperRightElement.getElementsByTag("p").first();
       Element link = upperRightElement.getElementsByTag("a").first();
       Node ctaNode = midSizeUpperRightNode.getNode("primary_cta_v2");
       if (title != null) {
         ctaNode.setProperty("title", title.text());
       } else {
         sb.append(Constants.PRIMARY_CTA_TITLE_ELEMENT_NOT_FOUND);
       }
       if (description != null) {
         ctaNode.setProperty("description", description.text());
       } else {
         sb.append(Constants.PRIMARY_CTA_DESCRIPTION_ELEMENT_NOT_FOUND);
       }
       if (link != null) {
         ctaNode.setProperty("linktext", link.text());
         if (ctaNode.hasNode("linkurl")) {
           String aUrl = link.absUrl("href");
           if (aUrl.equals("")) {
             aUrl = link.attr("href");
           }
           aUrl = FrameworkUtils.getLocaleReference(aUrl, urlMap, locale, sb);
           Node linkUrlNode = ctaNode.getNode("linkurl");
           linkUrlNode.setProperty("url", aUrl);
         } else {
           sb.append(Constants.PRIMARY_CTA_LINK_URL_NODE_NOT_FOUND);
         }
       } else {
         sb.append(Constants.PRIMARY_CTA_ANCHOR_ELEMENT_NOT_FOUND);
       }
     } else {
       sb.append(Constants.PRIMARY_CTA_COMPONENT_NOT_FOUND);
     }
   } else {
     sb.append(Constants.PRIMARY_CTA_COMPONENT_INWEB_NOT_FOUND);
   }
 }
Beispiel #26
0
  private boolean hasSeason() {
    Element section = getDoc().getElementById("titleOverview");

    for (Element a : section.getElementsByTag("a"))
      if (a.text().equalsIgnoreCase("Episode Guide") && a.attr("href").contains(id + ""))
        return true;

    return false;
  }
Beispiel #27
0
  @Test
  public void createsDocumentStructure() {
    String html =
        "<meta name=keywords /><link rel=stylesheet /><title>jsoup</title><p>Hello world</p>";
    Document doc = Jsoup.parse(html);
    Element head = doc.getHead();
    Element body = doc.getBody();

    assertEquals(2, doc.children().size());
    assertEquals(3, head.children().size());
    assertEquals(1, body.children().size());

    assertEquals("keywords", head.getElementsByTag("meta").get(0).attr("name"));
    assertEquals(0, body.getElementsByTag("meta").size());
    assertEquals("jsoup", doc.getTitle());
    assertEquals("Hello world", body.text());
    assertEquals("Hello world", body.children().get(0).text());
  }
Beispiel #28
0
  public static List<KoseYazisi> getKoseYazisi(KoseYazari koseYazari, int aySayisi) {
    List<KoseYazisi> koseYazilari = new ArrayList<KoseYazisi>();
    String linkHref = "";

    for (int i = 1; i < aySayisi * 2; i++) {
      String url = getolderUrl(i, koseYazari.getTumYazilariLink(), koseYazari.getId());

      Document doc;
      try {
        doc = Jsoup.connect(url).timeout(CUMHURIYET.timeout).get();

        Element element = doc.select("ul#article-list").first();

        Elements links = element.getElementsByTag("a");

        for (Element link : links) {
          linkHref = link.attr("href");
          String linkText = link.text();

          if (linkHref.contains("/haber/turkiye")
              || linkHref.contains("/haber/secim_2015")
              || linkHref.contains("/haber/diger")) {
            continue;
          }

          String plot = "";
          String dateString = "";
          String koseYazisiLink = "";

          String[] items = linkText.split(" ");

          for (int j = 0; j < 3; j++) {
            dateString = dateString + items[j] + " ";
          }
          dateString.trim();

          for (int j = 4; j < items.length; j++) {
            plot = plot + items[j] + " ";
          }
          plot.trim();

          koseYazisiLink = linkHref;

          KoseYazisi koseYazisi =
              new KoseYazisi(Utils.getIdFromLink(linkHref), dateString, plot, koseYazisiLink);
          koseYazisi.setYazarAdi(koseYazari.getKoseYazariAdi());
          koseYazilari.add(koseYazisi);
        }

      } catch (IOException e) {
        System.err.println("Yazar id : " + koseYazari.getId() + "Link = " + linkHref);
        e.printStackTrace();
      }
    }

    return koseYazilari;
  }
Beispiel #29
0
  public List<MersVO> mersData() {
    List<MersVO> list = new ArrayList<MersVO>();
    Date date = new Date();
    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-M-d");
    StringTokenizer st = new StringTokenizer(sdf.format(date), "-");
    int year = Integer.parseInt(st.nextToken());
    int month = Integer.parseInt(st.nextToken());
    int day = Integer.parseInt(st.nextToken());
    try {
      Document doc = Jsoup.connect("http://www.cdc.go.kr/CDC/cms/content/15/63315_view.html").get();
      // System.out.println(doc);
      Elements trs = doc.select("table tbody tr");
      // System.out.println(trs);
      String data = "";
      String[] temp = {"panel panel-primary", "panel panel-green", "panel panel-yellow"};
      int i = 0;
      for (Element tr : trs) {
        Iterator<Element> it = tr.getElementsByTag("td").iterator();

        // if(i==2) break;
        while (it.hasNext()) {

          MersVO vo = new MersVO();
          vo.setType(it.next().text());
          vo.setMers(it.next().text().replace("*", ""));
          /*vo.setYsum(it.next().text().replace(",", ""));
          vo.setPlus(it.next().text());
          vo.setMinus(it.next().text());*/

          if (data.equals("")) {
            data = it.next().text();
            vo.setIng(data);
          } else {
            vo.setIng(data);
          }
          vo.setNsum(it.next().text().replace(",", ""));
          vo.setHouse(it.next().text());
          vo.setOffice(it.next().text());
          vo.setDis(it.next().text().replace(",", ""));
          vo.setDiv1(temp[i]);
          vo.setYear(year);
          vo.setMonth(month);
          if (i == 2) {
            vo.setDay(day - 1);
          } else {
            vo.setDay(day);
          }
          list.add(vo);
          i++;
        }
      }
    } catch (Exception ex) {
      System.out.println(ex.getMessage());
    }
    return list;
  }
  public static LinkedList<String> getHuXiuNewsDataList(String newsUrl) throws IOException {
    LinkedList<String> data = null;
    Elements majorElements;
    Element majorElement = null;
    String content = "";
    Document document = Jsoup.connect(newsUrl).timeout(TIME_OUT).get();
    majorElements = document.getElementsByClass("textbox-content");

    if (!majorElements.isEmpty()) {
      data = new LinkedList<String>();
      majorElement = majorElements.get(0);
      majorElements = majorElement.getElementsByTag("p");
      if (!majorElements.isEmpty()) {
        for (int i = 0; i < majorElements.size(); i++) {
          majorElement = majorElements.get(i);
          Elements imgElements = majorElement.getElementsByTag("img");
          if (!imgElements.isEmpty()) {
            content = imgElements.get(0).attr("src");
          } else {
            if (content.contains("http://") || (content.contains("https://"))) {
              content = "";
            } else {
              content = majorElement.text();
              Elements bElements = majorElement.getElementsByTag("b");
              if (!bElements.isEmpty()) {
                String strongString = bElements.get(0).text();
                content = content.substring(strongString.length());
                content = "$" + strongString + "  $" + content;
              }
              content = FOUR_BLANK_SPACE + content;
            }
            // System.out.println("text =" + majorElement.text());
          }
          if (!TextUtils.isEmpty(content)) {
            if (!content.contains("readmore.gif")) {
              data.add(content);
            }
          }
        }
      }
    }
    return data;
  }