Example #1
0
  public static List<String> extractText(Reader reader) throws IOException {
    final ArrayList<String> list = new ArrayList<String>();

    ParserDelegator parserDelegator = new ParserDelegator();
    HTMLEditorKit.ParserCallback parserCallback =
        new HTMLEditorKit.ParserCallback() {
          public void handleText(final char[] data, final int pos) {
            list.add(new String(data));
          }

          public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribute, int pos) {}

          public void handleEndTag(HTML.Tag t, final int pos) {}

          public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, final int pos) {
            if (t.toString().equalsIgnoreCase("img")) {
              list.add("###");
            }
          }

          public void handleComment(final char[] data, final int pos) {}

          public void handleError(final java.lang.String errMsg, final int pos) {}
        };
    parserDelegator.parse(reader, parserCallback, true);
    return list;
  }
  public BoardList() {
    URL url = null;
    HttpURLConnection http_url_connection = null;
    int response_code;
    String response_message;
    InputStreamReader in = null;
    BufferedReader reader = null;
    ParserDelegator pd = null;
    try {
      // httpでhtmlファイルを取得する一連の処理
      url = new URL("http://menu.2ch.net/bbsmenu.html");
      http_url_connection = (HttpURLConnection) url.openConnection();
      http_url_connection.setRequestMethod("GET");
      http_url_connection.setInstanceFollowRedirects(false);
      http_url_connection.setRequestProperty("User-Agent", "Monazilla/1.00");
      response_code = http_url_connection.getResponseCode();
      response_message = http_url_connection.getResponseMessage();
      in = new InputStreamReader(http_url_connection.getInputStream(), "SJIS");
      reader = new BufferedReader(in);

      pd = new ParserDelegator();
      pd.parse(reader, cb, true);
      in.close();
      reader.close();
      http_url_connection.disconnect();
    } catch (IOException e1) {
      e1.printStackTrace();
    }
  }
Example #3
0
 /**
  * Fuction is used by the parser function. This function actually does parsing of the webpage.It
  * then removes the tags from the page returning only the text contained on the page.
  *
  * @param in Is an instance of the Reader class.
  * @throws IOException Throws IOException.
  */
 public void parse(Reader in) throws IOException {
   // Instance of StringBuffere is created to store consecutive lines.
   this.s = new StringBuffer();
   // ParseDelegator is instantiated to parse the content.
   ParserDelegator delegator = new ParserDelegator();
   // the third parameter is TRUE to ignore charset directive
   delegator.parse(in, this, true);
 }
Example #4
0
 public static SeriesInfo getAllChapters(String htmlLink) {
   try {
     URL url = new URL("http://www.onemanga.com" + htmlLink);
     InputStreamReader reader = new InputStreamReader(url.openStream());
     ParserDelegator parser = new ParserDelegator();
     OneMangaChapterSpiderCallback spider = new OneMangaChapterSpiderCallback();
     parser.parse(reader, spider, true);
     return spider.getSeriesInfo();
   } catch (MalformedURLException e) {
     return null;
   } catch (IOException e) {
     return null;
   }
 }
Example #5
0
 /**
  * Gets the all managas.
  *
  * @return the all managas
  */
 public static Map<String, String> getAllManagas() {
   try {
     URL url = new URL("http://www.onemanga.com/directory/");
     InputStreamReader reader = new InputStreamReader(url.openStream());
     ParserDelegator parser = new ParserDelegator();
     OneMangaSpiderCallback spider = new OneMangaSpiderCallback();
     parser.parse(reader, spider, true);
     return spider.getMap();
   } catch (MalformedURLException e) {
     return null;
   } catch (IOException e) {
     return null;
   }
 }
Example #6
0
  /**
   * Get the game information.
   *
   * <p>Returns the imported text.
   */
  private String getGameInfo() throws IOException {
    final StringBuffer gameInformation = new StringBuffer(16384);

    URL u = null;
    BufferedReader reader = null;
    try {
      u =
          new URL(
              "http://www.floc.net/observer.py?judge="
                  + judgeName
                  + "&game="
                  + gameName
                  + "&page=history&history_from=0&history_to=999999");

      fic.flocImportMessage(Utils.getLocalString(READING_CONTACT));

      // output is in HTML, so using the HTML editor kit parser removes
      // HTML cruft.
      //
      reader = new BufferedReader(new InputStreamReader(u.openStream()));

      if (!isInProgress) {
        return "";
      }

      ParserDelegator parser = new ParserDelegator();
      parser.parse(
          reader,
          new HTMLEditorKit.ParserCallback() {
            public void handleText(char[] text, int pos) {
              if (!isInProgress) {
                gameInformation.setLength(0); // abort!
                return;
              }

              fic.flocImportMessage(Utils.getLocalString(READING_FROM_NET));
              gameInformation.append(text);
              gameInformation.append("\n");
            } // handleText()
          },
          false);
    } finally {
      if (reader != null) {
        reader.close();
      }
    }

    return gameInformation.toString();
  } // getGameInfo()
Example #7
0
 static DTD createDTD() {
   DTD dtd = new DTDEx();
   dtd = ParserDelegator.createDTD(dtd, "html32");
   // add apos entity. see bug# 24533
   dtd.defEntity("apos", DTD.GENERAL, '\'');
   return dtd;
 }
 public void parse(Reader in) throws IOException {
   s = new StringBuffer();
   ParserDelegator delegator = new ParserDelegator();
   delegator.parse(in, this, Boolean.TRUE);
 }