/** * Creates a new connection. * * @param url url to be handled; cannot be null. * @param configuration protocol configuration; cannot be null * @throws MalformedURLException - If url path is empty * @throws IOException - If cache name cannot be generated * @throws NullArgumentException - If url or configuration is null */ protected Connection(final URL url, final Configuration configuration) throws IOException { super(url); NullArgumentException.validateNotNull(url, "URL"); NullArgumentException.validateNotNull(configuration, "Configuration"); m_parser = new Parser(url.getPath()); m_configuration = configuration; m_cacheName = generateCacheName(m_parser.getUrl()); }
/** * 网页内容元素分离 * * @param usefulUrl 有效的URL * @return 返回数据模型实例 * @throws IOException */ public DataModle elementFilter(String usefulUrl) throws IOException { String url = ""; String title = ""; String author = ""; String source = ""; String releaseTime = ""; String finishTime = ""; String bodyAbstruct = ""; String classfication = ""; String tag = ""; String iconUrl = ""; int isFinished = 0; // 未处理,即未打标签,未归类 long clickTimes = 0; Parser ps = new Parser(); ps.getInformation(usefulUrl); url = ps.getUrl(); title = ps.getTitle(); source = ps.getAuthor(); releaseTime = formatDateTime(ps.getDate()); bodyAbstruct = ps.getContent().toString(); SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); finishTime = df.format(new Date()); if (releaseTime == "") { releaseTime = finishTime; } DataModle dm = new DataModle( url, title, author, source, releaseTime, finishTime, bodyAbstruct, classfication, tag, iconUrl, isFinished, clickTimes); // 构造一个数据模型对象dm return dm; }
/** * Returns the input stream denoted by the url. * * @return the input stream for the resource denoted by url * @throws java.io.IOException in case of an exception during accessing the resource * @see java.net.URLConnection#getInputStream() */ @Override public InputStream getInputStream() throws IOException { connect(); final File workingDir = m_configuration.getWorkingDirectory(); final File cacheMetaFile = new File(workingDir, m_cacheName + EXT_META); final File cacheDateFile = new File(workingDir, m_cacheName + EXT_DATA); final Properties cacheMeta = new Properties(); try { InputStream in = new FileInputStream(cacheMetaFile); try { cacheMeta.load(in); } finally { in.close(); } } catch (FileNotFoundException ignore) { // ignore } final String cacheUrl = cacheMeta.getProperty(META_URL); if (cacheUrl == null) { cacheMeta.setProperty(META_URL, url.getPath()); } final String cacheTime = cacheMeta.getProperty(META_CACHED_ON); if (cacheTime == null || !cacheDateFile.exists()) { StreamUtils.copyStream( m_parser.getUrl().openStream(), new BufferedOutputStream(new FileOutputStream(cacheDateFile)), true); cacheMeta.setProperty(META_CACHED_ON, String.valueOf(System.currentTimeMillis())); } OutputStream out = new FileOutputStream(cacheMetaFile); try { cacheMeta.store(out, null); } finally { out.close(); } return new BufferedInputStream(new FileInputStream(cacheDateFile)); }