@Test public void addSpider() { JobInfo jobInfo = job(); jobInfo.setJobModel(Constants.JobModel.addSpider); // jobManager.addSpider(jobInfo, "1"); jobManager.addSpider(jobInfo, "1"); }
public static JobInfo job() { JobInfo jobInfo = JobInfo.create("movie.mtime.com"); jobInfo.setMaxDepth(1); jobInfo.setJobThreadNum(5); jobInfo.setSleep(200L); jobInfo.setReset(true); jobInfo.getSeed().add("http://theater.mtime.com/China_Beijing/"); Orders orders = new Orders(); orders.setPathRegx("http://movie.mtime.com/[0-9]+/"); orders.setTargetRegx("^http://movie.mtime.com/[0-9]+/$"); orders.setFields( ImmutableMap.of( "title", new ExtratField("title", "<title>([^<]+)</title>", 1, Constants.FmtType.str) // "html", new ExtratField("html", ".*(<body>.*?</body>).*", 1), // <p class="mt15 ml25 newstime ">2015-08-12 15:44:03 <span class="ml15"> // "infoTime_dt", // new ExtratField("infoTime", // "<p\\s*class=\".*newstime\\s*\">([^<]+).*</p>", 1, Constants.FmtType.date) )); jobInfo.setOrders(orders); return jobInfo; }