/** * Handle the crawl-delay: directive * * @param state current parsing state * @param token data for directive * @return true to keep going, false if we're done */ private boolean handleCrawlDelay(ParseState state, RobotToken token) { state.setFinishedAgentFields(true); if (!state.isAddingRules()) { return true; } String delayString = token.getData(); if (delayString.length() > 0) { try { // Some sites use values like 0.5 for the delay. if (delayString.indexOf('.') != -1) { double delayValue = Double.parseDouble(delayString) * 1000.0; state.setCrawlDelay(Math.round(delayValue)); } else { long delayValue = Integer.parseInt(delayString) * 1000L; // sec to millisec state.setCrawlDelay(delayValue); } } catch (Exception e) { reportWarning( "Error parsing robots rules - can't decode crawl delay: " + delayString, state.getUrl()); } } return true; }
/** * Handle the allow: directive * * @param state current parsing state * @param token data for directive * @return true to keep going, false if we're done */ private boolean handleAllow(ParseState state, RobotToken token) { state.setFinishedAgentFields(true); if (!state.isAddingRules()) { return true; } String path = token.getData(); try { path = URLDecoder.decode(path, "UTF-8"); } catch (Exception e) { reportWarning("Error parsing robots rules - can't decode path: " + path, state.getUrl()); } if (path.length() == 0) { // Allow: <nothing> => allow all. state.clearRules(); } else { state.addRule(path, true); } return true; }