/** * Initializes the service for new ingest run Sets up threads, timers, retrieves settings, keyword * lists to run on * * @param managerProxy */ @Override public void init(IngestManagerProxy managerProxy) { logger.log(Level.INFO, "init()"); initialized = false; caseHandle = Case.getCurrentCase().getSleuthkitCase(); this.managerProxy = managerProxy; Server solrServer = KeywordSearch.getServer(); ingester = solrServer.getIngester(); ingestStatus = new HashMap<Long, IngestStatus>(); keywords = new ArrayList<Keyword>(); keywordLists = new ArrayList<String>(); keywordToList = new HashMap<String, KeywordSearchList>(); initKeywords(); if (keywords.isEmpty() || keywordLists.isEmpty()) { managerProxy.postMessage( IngestMessage.createWarningMessage( ++messageID, instance, "No keywords in keyword list.", "Only indexing will be done and and keyword search will be skipped (it can be executed later again as ingest or using toolbar search feature).")); } processedFiles = false; finalSearcherDone = false; searcherDone = true; // make sure to start the initial currentSearcher // keeps track of all results per run not to repeat reporting the same hits currentResults = new HashMap<Keyword, List<ContentHit>>(); indexer = new Indexer(); final int updateIntervalMs = managerProxy.getUpdateFrequency() * 60 * 1000; logger.log(Level.INFO, "Using commit interval (ms): " + updateIntervalMs); logger.log(Level.INFO, "Using searcher interval (ms): " + updateIntervalMs); commitTimer = new Timer(updateIntervalMs, new CommitTimerAction()); searchTimer = new Timer(updateIntervalMs, new SearchTimerAction()); initialized = true; commitTimer.start(); searchTimer.start(); managerProxy.postMessage( IngestMessage.createMessage(++messageID, MessageType.INFO, this, "Started")); }
/** Helper method to notify listeners on index update */ private void indexChangeNotify() { // signal a potential change in number of indexed files try { final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); KeywordSearch.changeSupport.firePropertyChange( KeywordSearch.NUM_FILES_CHANGE_EVT, null, new Integer(numIndexedFiles)); } catch (NoOpenCoreException ex) { logger.log( Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex); } catch (SolrServerException se) { logger.log( Level.WARNING, "Error executing Solr query to check number of indexed files: ", se); } }
/** * Get extracted content for a node from Solr * * @param cNode a node that has extracted content in Solr (check with solrHasContent(ContentNode)) * @return the extracted content * @throws SolrServerException if something goes wrong */ private String getSolrContent(Node node, int currentPage, boolean hasChunks) throws SolrServerException { Content contentObj = node.getLookup().lookup(Content.class); final Server solrServer = KeywordSearch.getServer(); int chunkId = 0; if (hasChunks) { chunkId = currentPage; } String content = null; try { content = (String) solrServer.getSolrContent(contentObj, chunkId); } catch (NoOpenCoreException ex) { logger.log(Level.WARNING, "Couldn't get text content.", ex); return ""; } return content; }
/** * Check if Solr has extracted content for a given node * * @param node * @return true if Solr has content, else false */ private boolean solrHasContent(Node node) { Content content = node.getLookup().lookup(Content.class); if (content == null) { return false; } final Server solrServer = KeywordSearch.getServer(); final long contentID = content.getId(); try { return solrServer.queryIsIndexed(contentID); } catch (NoOpenCoreException ex) { logger.log(Level.WARNING, "Couldn't determine whether content is supported.", ex); return false; } catch (SolrServerException ex) { logger.log(Level.WARNING, "Couldn't determine whether content is supported.", ex); return false; } }
@Override public QueryResults performQuery() throws NoOpenCoreException { /* * Execute the regex query to get a list of terms that match the regex. * Note that the field that is being searched is tokenized based on * whitespace. */ // create the query final SolrQuery q = new SolrQuery(); q.setRequestHandler(TERMS_HANDLER); q.setTerms(true); q.setTermsRegexFlag(CASE_INSENSITIVE); q.setTermsRegex(escapedQuery); q.addTermsField(TERMS_SEARCH_FIELD); q.setTimeAllowed(TERMS_TIMEOUT); q.setShowDebugInfo(DEBUG); q.setTermsLimit(MAX_TERMS_RESULTS); LOGGER.log(Level.INFO, "Query: {0}", q.toString()); // NON-NLS // execute the query List<Term> terms = null; try { terms = KeywordSearch.getServer().queryTerms(q).getTerms(TERMS_SEARCH_FIELD); } catch (KeywordSearchModuleException ex) { LOGGER.log( Level.SEVERE, "Error executing the regex terms query: " + keyword.getQuery(), ex); // NON-NLS // TODO: this is almost certainly wrong and guaranteed to throw a NPE at some point!!!! } /* * For each term that matched the regex, query for full set of document * hits for that term. */ QueryResults results = new QueryResults(this, keywordList); int resultSize = 0; for (Term term : terms) { final String termStr = KeywordSearchUtil.escapeLuceneQuery(term.getTerm()); if (keyword.getType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { // If the keyword is a credit card number, pass it through luhn validator Matcher matcher = CCN_PATTERN.matcher(term.getTerm()); matcher.find(); final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn")); if (false == LUHN_CHECK.isValid(ccn)) { continue; // if the hit does not pass the luhn check, skip it. } } /* * Note: we can't set filter query on terms query but setting filter * query on fileResults query will yield the same result */ LuceneQuery filesQuery = new LuceneQuery(keywordList, new Keyword(termStr, true)); filters.forEach(filesQuery::addFilter); try { QueryResults fileQueryResults = filesQuery.performQuery(); Set<KeywordHit> filesResults = new HashSet<>(); for (Keyword key : fileQueryResults.getKeywords()) { // flatten results into a single list List<KeywordHit> keyRes = fileQueryResults.getResults(key); resultSize += keyRes.size(); filesResults.addAll(keyRes); } results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(filesResults)); } catch (NoOpenCoreException | RuntimeException e) { LOGGER.log(Level.WARNING, "Error executing Solr query,", e); // NON-NLS throw e; } } // TODO limit how many results we store, not to hit memory limits LOGGER.log(Level.INFO, "Regex # results: {0}", resultSize); // NON-NLS return results; }