@Override public void process(ResultItems resultItems, Task task) { System.out.println("get page: " + resultItems.getRequest().getUrl()); for (Map.Entry<String, Object> entry : resultItems.getAll().entrySet()) { System.out.println(entry.getKey() + ":\t" + entry.getValue()); } }
@Override public void process(ResultItems items, Task task) { // TODO Auto-generated method stub System.out.println("对应的页面: " + items.getRequest().getUrl()); /* * 抓取到的name和id * */ List<String> name = new ArrayList<String>(); List<String> aid = new ArrayList<String>(); List<String> url = new ArrayList<String>(); FileWriter fw = null; FileWriter fwPlayUrl = null; for (Map.Entry<String, Object> entry : items.getAll().entrySet()) { String key = (String) entry.getKey(); if (key.equals(CrawlerIqiyi.VIDEO_NAME)) { name = (List<String>) entry.getValue(); } else if (key.equals(CrawlerIqiyi.VIDEO_ID)) { aid = (List<String>) entry.getValue(); } else if (key.equals(CrawlerIqiyi.VIDEO_URL)) { url = (List<String>) entry.getValue(); } } try { fw = new FileWriter("./urls.txt", true); fwPlayUrl = new FileWriter("./playUrls.txt", true); for (int i = 0; i < name.size(); i++) { /* System.out.println(name.get(i)+" 指数页面为: "+ "http://index.iqiyi.com/q/?aid="+aid.get(i)+"&name=" + name.get(i));*/ fw.write( "http://uaa.iqiyi.com/video_index/v1/get_user_profile?album_id=" + aid.get(i) + "&album_name=" + name.get(i) + "&callback=window.Q.__callbacks__.cbgg1cdr"); fw.write("\n"); fwPlayUrl.write(url.get(i)); fwPlayUrl.write("\n"); } fw.close(); fwPlayUrl.close(); } catch (Exception e) { e.printStackTrace(); } }
public static void main(String[] args) { // single download Spider spider = Spider.create(new BaiduBaikePageProcessor()).thread(2); String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8"; ResultItems resultItems = spider.<ResultItems>get(String.format(urlTemplate, "水力发电")); System.out.println(resultItems); // multidownload List<String> list = new ArrayList<String>(); list.add(String.format(urlTemplate, "风力发电")); list.add(String.format(urlTemplate, "太阳能")); list.add(String.format(urlTemplate, "地热发电")); list.add(String.format(urlTemplate, "地热发电")); List<ResultItems> resultItemses = spider.<ResultItems>getAll(list); for (ResultItems resultItemse : resultItemses) { System.out.println(resultItemse.getAll()); } spider.close(); }
@Override public void process(ResultItems resultItems, Task task) { // TODO Auto-generated method stub List<Good> goodList = resultItems.get("goodList"); dao.addGoods(goodList); }