private void initializeOnStartOrResume(String type, String crawlerId) { if (!CrawlerEvent.CRAWLER_STARTED.equals(type) && !CrawlerEvent.CRAWLER_RESUMED.equals(type)) { return; } // Create new file on crawler start/resume outputFile = new File( outputDir, fileNamePrefix + FileUtil.toSafeFileName(crawlerId) + "-" + System.currentTimeMillis() + ".tsv"); try { FileUtil.createDirsForFile(outputFile); } catch (IOException e) { throw new CollectorException("Cannot create output directory for file: " + outputFile, e); } writeLine("Referrer", "URL", "Status", "Reason", false); // Parse status codes if (StringUtils.isBlank(statusCodes)) { parsedCodes.clear(); return; } String[] ranges = statusCodes.split("\\s*,\\s*"); for (String range : ranges) { String[] endPoints = range.split("\\s*-\\s*"); if (endPoints.length == 1) { parsedCodes.add(toInt(endPoints[0])); } else if (endPoints.length == 2) { int start = toInt(endPoints[0]); int end = toInt(endPoints[1]); if (start >= end) { throw new IllegalArgumentException( "Invalid statusCode range: " + range + ". Start value must be higher than end value."); } while (start <= end) { parsedCodes.add(start); start++; } } else { throw new IllegalArgumentException("Invalid statusCode range: " + range); } } }
@Override public ICrawlDataStore createCrawlDataStore(ICrawlerConfig config, boolean resume) { String storeDir = config.getWorkDir().getPath() + "/crawlstore/mvstore/" + FileUtil.toSafeFileName(config.getId()) + "/"; return new MVStoreCrawlDataStore(storeDir, resume); }