/**
   * Initializes the service for new ingest run Sets up threads, timers, retrieves settings, keyword
   * lists to run on
   *
   * @param managerProxy
   */
  @Override
  public void init(IngestManagerProxy managerProxy) {
    logger.log(Level.INFO, "init()");
    initialized = false;

    caseHandle = Case.getCurrentCase().getSleuthkitCase();

    this.managerProxy = managerProxy;

    Server solrServer = KeywordSearch.getServer();

    ingester = solrServer.getIngester();

    ingestStatus = new HashMap<Long, IngestStatus>();

    keywords = new ArrayList<Keyword>();
    keywordLists = new ArrayList<String>();
    keywordToList = new HashMap<String, KeywordSearchList>();

    initKeywords();

    if (keywords.isEmpty() || keywordLists.isEmpty()) {
      managerProxy.postMessage(
          IngestMessage.createWarningMessage(
              ++messageID,
              instance,
              "No keywords in keyword list.",
              "Only indexing will be done and and keyword search will be skipped (it can be executed later again as ingest or using toolbar search feature)."));
    }

    processedFiles = false;
    finalSearcherDone = false;
    searcherDone = true; // make sure to start the initial currentSearcher
    // keeps track of all results per run not to repeat reporting the same hits
    currentResults = new HashMap<Keyword, List<ContentHit>>();

    indexer = new Indexer();

    final int updateIntervalMs = managerProxy.getUpdateFrequency() * 60 * 1000;
    logger.log(Level.INFO, "Using commit interval (ms): " + updateIntervalMs);
    logger.log(Level.INFO, "Using searcher interval (ms): " + updateIntervalMs);

    commitTimer = new Timer(updateIntervalMs, new CommitTimerAction());
    searchTimer = new Timer(updateIntervalMs, new SearchTimerAction());

    initialized = true;

    commitTimer.start();
    searchTimer.start();

    managerProxy.postMessage(
        IngestMessage.createMessage(++messageID, MessageType.INFO, this, "Started"));
  }
 /** Helper method to notify listeners on index update */
 private void indexChangeNotify() {
   // signal a potential change in number of indexed files
   try {
     final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
     KeywordSearch.changeSupport.firePropertyChange(
         KeywordSearch.NUM_FILES_CHANGE_EVT, null, new Integer(numIndexedFiles));
   } catch (NoOpenCoreException ex) {
     logger.log(
         Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex);
   } catch (SolrServerException se) {
     logger.log(
         Level.WARNING, "Error executing Solr query to check number of indexed files: ", se);
   }
 }
  /**
   * Get extracted content for a node from Solr
   *
   * @param cNode a node that has extracted content in Solr (check with solrHasContent(ContentNode))
   * @return the extracted content
   * @throws SolrServerException if something goes wrong
   */
  private String getSolrContent(Node node, int currentPage, boolean hasChunks)
      throws SolrServerException {
    Content contentObj = node.getLookup().lookup(Content.class);

    final Server solrServer = KeywordSearch.getServer();

    int chunkId = 0;
    if (hasChunks) {
      chunkId = currentPage;
    }

    String content = null;
    try {
      content = (String) solrServer.getSolrContent(contentObj, chunkId);
    } catch (NoOpenCoreException ex) {
      logger.log(Level.WARNING, "Couldn't get text content.", ex);
      return "";
    }
    return content;
  }
  /**
   * Check if Solr has extracted content for a given node
   *
   * @param node
   * @return true if Solr has content, else false
   */
  private boolean solrHasContent(Node node) {
    Content content = node.getLookup().lookup(Content.class);
    if (content == null) {
      return false;
    }

    final Server solrServer = KeywordSearch.getServer();

    final long contentID = content.getId();

    try {
      return solrServer.queryIsIndexed(contentID);
    } catch (NoOpenCoreException ex) {
      logger.log(Level.WARNING, "Couldn't determine whether content is supported.", ex);
      return false;
    } catch (SolrServerException ex) {
      logger.log(Level.WARNING, "Couldn't determine whether content is supported.", ex);
      return false;
    }
  }
示例#5
0
  @Override
  public QueryResults performQuery() throws NoOpenCoreException {
    /*
     * Execute the regex query to get a list of terms that match the regex.
     * Note that the field that is being searched is tokenized based on
     * whitespace.
     */
    // create the query
    final SolrQuery q = new SolrQuery();
    q.setRequestHandler(TERMS_HANDLER);
    q.setTerms(true);
    q.setTermsRegexFlag(CASE_INSENSITIVE);
    q.setTermsRegex(escapedQuery);
    q.addTermsField(TERMS_SEARCH_FIELD);
    q.setTimeAllowed(TERMS_TIMEOUT);
    q.setShowDebugInfo(DEBUG);
    q.setTermsLimit(MAX_TERMS_RESULTS);
    LOGGER.log(Level.INFO, "Query: {0}", q.toString()); // NON-NLS

    // execute the query
    List<Term> terms = null;
    try {
      terms = KeywordSearch.getServer().queryTerms(q).getTerms(TERMS_SEARCH_FIELD);
    } catch (KeywordSearchModuleException ex) {
      LOGGER.log(
          Level.SEVERE,
          "Error executing the regex terms query: " + keyword.getQuery(),
          ex); // NON-NLS
      // TODO: this is almost certainly wrong and guaranteed to throw a NPE at some point!!!!
    }

    /*
     * For each term that matched the regex, query for full set of document
     * hits for that term.
     */
    QueryResults results = new QueryResults(this, keywordList);
    int resultSize = 0;

    for (Term term : terms) {
      final String termStr = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());

      if (keyword.getType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) {
        // If the keyword is a credit card number, pass it through luhn validator
        Matcher matcher = CCN_PATTERN.matcher(term.getTerm());
        matcher.find();
        final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn"));
        if (false == LUHN_CHECK.isValid(ccn)) {
          continue; // if the hit does not pass the luhn check, skip it.
        }
      }

      /*
       * Note: we can't set filter query on terms query but setting filter
       * query on fileResults query will yield the same result
       */
      LuceneQuery filesQuery = new LuceneQuery(keywordList, new Keyword(termStr, true));
      filters.forEach(filesQuery::addFilter);

      try {
        QueryResults fileQueryResults = filesQuery.performQuery();
        Set<KeywordHit> filesResults = new HashSet<>();
        for (Keyword key : fileQueryResults.getKeywords()) { // flatten results into a single list
          List<KeywordHit> keyRes = fileQueryResults.getResults(key);
          resultSize += keyRes.size();
          filesResults.addAll(keyRes);
        }
        results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(filesResults));
      } catch (NoOpenCoreException | RuntimeException e) {
        LOGGER.log(Level.WARNING, "Error executing Solr query,", e); // NON-NLS
        throw e;
      }
    }

    // TODO limit how many results we store, not to hit memory limits
    LOGGER.log(Level.INFO, "Regex # results: {0}", resultSize); // NON-NLS

    return results;
  }