public SearcherResult findRuleMatchesOnIndex(PatternRule rule, Language language) throws IOException, UnsupportedPatternRuleException { // it seems wasteful to re-open the index every time, but I had strange problems (OOM, Array out // of bounds, ...) // when not doing so... open(); try { final PatternRuleQueryBuilder patternRuleQueryBuilder = new PatternRuleQueryBuilder(language); final Query query = patternRuleQueryBuilder.buildRelaxedQuery(rule); if (query == null) { throw new NullPointerException("Cannot search on null query for rule: " + rule.getId()); } final SearchRunnable runnable = new SearchRunnable(indexSearcher, query, language, rule); final Thread searchThread = new Thread(runnable); searchThread.start(); try { // using a TimeLimitingCollector is not enough, as it doesn't cover all time required to // search for a complicated regex, so interrupt the whole thread instead: if (limitSearch) { // FIXME: I don't know a simpler way to achieve this searchThread.join(maxSearchTimeMillis); } else { searchThread.join(Integer.MAX_VALUE); } searchThread.interrupt(); } catch (InterruptedException e) { throw new RuntimeException("Search thread got interrupted for query " + query, e); } if (searchThread.isInterrupted()) { throw new SearchTimeoutException( "Search timeout of " + maxSearchTimeMillis + "ms reached for query " + query); } final Exception exception = runnable.getException(); if (exception != null) { if (exception instanceof SearchTimeoutException) { throw (SearchTimeoutException) exception; } throw new RuntimeException( "Exception during search for query " + query + " on rule " + rule.getId(), exception); } final List<MatchingSentence> matchingSentences = runnable.getMatchingSentences(); final int sentencesChecked = getSentenceCheckCount(query, indexSearcher); final SearcherResult searcherResult = new SearcherResult(matchingSentences, sentencesChecked, query); searcherResult.setHasTooManyLuceneMatches(runnable.hasTooManyLuceneMatches()); searcherResult.setLuceneMatchCount(runnable.getLuceneMatchCount()); if (runnable.hasTooManyLuceneMatches()) { // more potential matches than we can check in an acceptable time :-( searcherResult.setDocCount(maxHits); } else { searcherResult.setDocCount(getDocCount(indexSearcher)); } // TODO: the search itself could also timeout, don't just ignore that: // searcherResult.setResultIsTimeLimited(limitedTopDocs.resultIsTimeLimited); return searcherResult; } finally { close(); } }
public static void main(String[] args) throws Exception { ensureCorrectUsageOrExit(args); final long startTime = System.currentTimeMillis(); final String[] ruleIds = args[0].split(","); final File ruleFile = new File(args[1]); final String languageCode = args[2]; final Language language = Language.getLanguageForShortName(languageCode); final File indexDir = new File(args[3]); if (args.length > 4 && "--no_limit".equals(args[4])) { limitSearch = false; } final Searcher searcher = new Searcher(new SimpleFSDirectory(indexDir)); if (!limitSearch) { searcher.setMaxHits(100000); } for (String ruleId : ruleIds) { final long ruleStartTime = System.currentTimeMillis(); for (PatternRule rule : searcher.getRuleById(ruleId, ruleFile)) { final SearcherResult searcherResult = searcher.findRuleMatchesOnIndex(rule, language); int i = 1; if (searcherResult.getMatchingSentences().size() == 0) { System.out.println("[no matches]"); } for (MatchingSentence ruleMatch : searcherResult.getMatchingSentences()) { System.out.println( i + ": " + ruleMatch.getSentence() + " (Source: " + ruleMatch.getSource() + ")"); i++; } System.out.println("Time: " + (System.currentTimeMillis() - ruleStartTime) + "ms"); System.out.println("=============================================================="); } } System.out.println("Total time: " + (System.currentTimeMillis() - startTime) + "ms"); }