public static List<String> extractText(Reader reader) throws IOException { final ArrayList<String> list = new ArrayList<String>(); ParserDelegator parserDelegator = new ParserDelegator(); HTMLEditorKit.ParserCallback parserCallback = new HTMLEditorKit.ParserCallback() { public void handleText(final char[] data, final int pos) { list.add(new String(data)); } public void handleStartTag(HTML.Tag tag, MutableAttributeSet attribute, int pos) {} public void handleEndTag(HTML.Tag t, final int pos) {} public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, final int pos) { if (t.toString().equalsIgnoreCase("img")) { list.add("###"); } } public void handleComment(final char[] data, final int pos) {} public void handleError(final java.lang.String errMsg, final int pos) {} }; parserDelegator.parse(reader, parserCallback, true); return list; }
public BoardList() { URL url = null; HttpURLConnection http_url_connection = null; int response_code; String response_message; InputStreamReader in = null; BufferedReader reader = null; ParserDelegator pd = null; try { // httpでhtmlファイルを取得する一連の処理 url = new URL("http://menu.2ch.net/bbsmenu.html"); http_url_connection = (HttpURLConnection) url.openConnection(); http_url_connection.setRequestMethod("GET"); http_url_connection.setInstanceFollowRedirects(false); http_url_connection.setRequestProperty("User-Agent", "Monazilla/1.00"); response_code = http_url_connection.getResponseCode(); response_message = http_url_connection.getResponseMessage(); in = new InputStreamReader(http_url_connection.getInputStream(), "SJIS"); reader = new BufferedReader(in); pd = new ParserDelegator(); pd.parse(reader, cb, true); in.close(); reader.close(); http_url_connection.disconnect(); } catch (IOException e1) { e1.printStackTrace(); } }
/** * Fuction is used by the parser function. This function actually does parsing of the webpage.It * then removes the tags from the page returning only the text contained on the page. * * @param in Is an instance of the Reader class. * @throws IOException Throws IOException. */ public void parse(Reader in) throws IOException { // Instance of StringBuffere is created to store consecutive lines. this.s = new StringBuffer(); // ParseDelegator is instantiated to parse the content. ParserDelegator delegator = new ParserDelegator(); // the third parameter is TRUE to ignore charset directive delegator.parse(in, this, true); }
public static SeriesInfo getAllChapters(String htmlLink) { try { URL url = new URL("http://www.onemanga.com" + htmlLink); InputStreamReader reader = new InputStreamReader(url.openStream()); ParserDelegator parser = new ParserDelegator(); OneMangaChapterSpiderCallback spider = new OneMangaChapterSpiderCallback(); parser.parse(reader, spider, true); return spider.getSeriesInfo(); } catch (MalformedURLException e) { return null; } catch (IOException e) { return null; } }
/** * Gets the all managas. * * @return the all managas */ public static Map<String, String> getAllManagas() { try { URL url = new URL("http://www.onemanga.com/directory/"); InputStreamReader reader = new InputStreamReader(url.openStream()); ParserDelegator parser = new ParserDelegator(); OneMangaSpiderCallback spider = new OneMangaSpiderCallback(); parser.parse(reader, spider, true); return spider.getMap(); } catch (MalformedURLException e) { return null; } catch (IOException e) { return null; } }
/** * Get the game information. * * <p>Returns the imported text. */ private String getGameInfo() throws IOException { final StringBuffer gameInformation = new StringBuffer(16384); URL u = null; BufferedReader reader = null; try { u = new URL( "http://www.floc.net/observer.py?judge=" + judgeName + "&game=" + gameName + "&page=history&history_from=0&history_to=999999"); fic.flocImportMessage(Utils.getLocalString(READING_CONTACT)); // output is in HTML, so using the HTML editor kit parser removes // HTML cruft. // reader = new BufferedReader(new InputStreamReader(u.openStream())); if (!isInProgress) { return ""; } ParserDelegator parser = new ParserDelegator(); parser.parse( reader, new HTMLEditorKit.ParserCallback() { public void handleText(char[] text, int pos) { if (!isInProgress) { gameInformation.setLength(0); // abort! return; } fic.flocImportMessage(Utils.getLocalString(READING_FROM_NET)); gameInformation.append(text); gameInformation.append("\n"); } // handleText() }, false); } finally { if (reader != null) { reader.close(); } } return gameInformation.toString(); } // getGameInfo()
public void parse(Reader in) throws IOException { s = new StringBuffer(); ParserDelegator delegator = new ParserDelegator(); delegator.parse(in, this, Boolean.TRUE); }