public static void main(String[] args) { ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml"); final ScreeningWeixinPipeline pipeline = applicationContext.getBean(ScreeningWeixinPipeline.class); OOSpider.create( Site.me().setSleepTime(1000).setCycleRetryTimes(30), pipeline, ScreeningWeixinModel.class) .addUrl( "http://m.wepiao.com/data/v5/cinemas/cities/10/sched_city_cinema_10_1003249.json?cityId=10&cinemaId=1003249") .thread(1) .run(); }
/** * @author [email protected] <br> * @since 0.4.0 */ public class BaiduBaikePageProcessor implements PageProcessor { private Site site = Site.me() // .setHttpProxy(new HttpHost("127.0.0.1",8888)) .setRetryTimes(3) .setSleepTime(1000) .setUseGzip(true); public static void main(String[] args) { // single download Spider spider = Spider.create(new BaiduBaikePageProcessor()).thread(2); String urlTemplate = "http://baike.baidu.com/search/word?word=%s&pic=1&sug=1&enc=utf8"; ResultItems resultItems = spider.<ResultItems>get(String.format(urlTemplate, "水力发电")); System.out.println(resultItems); // multidownload List<String> list = new ArrayList<String>(); list.add(String.format(urlTemplate, "风力发电")); list.add(String.format(urlTemplate, "太阳能")); list.add(String.format(urlTemplate, "地热发电")); list.add(String.format(urlTemplate, "地热发电")); List<ResultItems> resultItemses = spider.<ResultItems>getAll(list); for (ResultItems resultItemse : resultItemses) { System.out.println(resultItemse.getAll()); } spider.close(); } @Override public void process(Page page) { page.putField("name", page.getHtml().css("h1.title div.lemmaTitleH1", "text").toString()); page.putField( "description", page.getHtml().xpath("//div[@id='lemmaContent-0']//div[@class='para']/allText()")); } @Override public Site getSite() { return site; } }