/** Performs a regular expression query to the SOLR/Lucene instance. */ final class TermComponentQuery implements KeywordSearchQuery { private static final Logger LOGGER = Logger.getLogger(TermComponentQuery.class.getName()); private static final boolean DEBUG = Version.Type.DEVELOPMENT.equals(Version.getBuildType()); private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); private static final BlackboardAttribute.Type KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID); // TODO: move these regex and the luhn check to a new class, something like: // CreditCardNumberValidator /* * Track 2 is numeric plus six punctuation symbolls :;<=>? * * This regex matches 12-19 digit ccns embeded in a track 2 formated string. * This regex matches (and extracts groups) even if the entire track is not * present as long as the part that is conforms to the track format. * */ private static final Pattern TRACK2_PATTERN = Pattern.compile( "[:;<=>?]?" // (optional)start sentinel //NON-NLS + "(?<accountNumber>[3456]([ -]?\\d){11,18})" // 12-19 digits, with possible single // spaces or dashes in between. first // digit is 3,4,5, or 6 //NON-NLS + "(?:[:;<=>?]" // separator //NON-NLS + "(?:(?<expiration>\\d{4})" // 4 digit expiration date YYMM //NON-NLS + "(?:(?<serviceCode>\\d{3})" // 3 digit service code //NON-NLS + "(?:(?<discretionary>[^:;<=>?]*)" // discretionary data, not containing punctuation // marks //NON-NLS + "(?:[:;<=>?]" // end sentinel //NON-NLS + "(?<LRC>.)" // longitudinal redundancy check //NON-NLS + "?)?)?)?)?)?"); // close nested optional groups //NON-NLS /* * Track 1 is alphanumeric. * * This regex matches 12-19 digit ccns embeded in a track 1 formated string. * This regex matches (and extracts groups) even if the entire track is not * present as long as the part that is conforms to the track format. */ private static final Pattern TRACK1_PATTERN = Pattern.compile( "(?:" // begin nested optinal group //NON-NLS + "%?" // optional start sentinal: % //NON-NLS + "B)?" // format code //NON-NLS + "(?<accountNumber>[3456]([ -]?\\d){11,18})" // 12-19 digits, with possible single // spaces or dashes in between. first // digit is 3,4,5, or 6 //NON-NLS + "\\^" // separator //NON-NLS + "(?<name>[^^]{2,26})" // 2-26 charachter name, not containing ^ //NON-NLS + "(?:\\^" // separator //NON-NLS + "(?:(?:\\^|(?<expiration>\\d{4}))" // separator or 4 digit expiration YYMM //NON-NLS + "(?:(?:\\^|(?<serviceCode>\\d{3}))" // separator or 3 digit service code //NON-NLS + "(?:(?<discretionary>[^?]*)" // discretionary data not containing separator // //NON-NLS + "(?:\\?" // end sentinal: ? //NON-NLS + "(?<LRC>.)" // longitudinal redundancy check //NON-NLS + "?)?)?)?)?)?"); // close nested optional groups //NON-NLS private static final Pattern CCN_PATTERN = Pattern.compile( "(?<ccn>[3456]([ -]?\\d){11,18})"); // 12-19 digits, with possible single spaces or dashes // in between. first digit is 3,4,5, or 6 //NON-NLS private static final LuhnCheckDigit LUHN_CHECK = new LuhnCheckDigit(); // corresponds to field in Solr schema, analyzed with white-space tokenizer only private static final String TERMS_SEARCH_FIELD = Server.Schema.CONTENT_WS.toString(); private static final String TERMS_HANDLER = "/terms"; // NON-NLS private static final int TERMS_TIMEOUT = 90 * 1000; // in ms private static final String CASE_INSENSITIVE = "case_insensitive"; // NON-NLS private static final int MAX_TERMS_RESULTS = 20000; private String escapedQuery; private final KeywordList keywordList; private final Keyword keyword; private boolean isEscaped; private final List<KeywordQueryFilter> filters = new ArrayList<>(); TermComponentQuery(KeywordList keywordList, Keyword keyword) { this.keyword = keyword; this.keywordList = keywordList; this.escapedQuery = keyword.getQuery(); } @Override public void addFilter(KeywordQueryFilter filter) { this.filters.add(filter); } /** * @param field * @deprecated This method is unused and no-op */ @Override @Deprecated public void setField(String field) {} @Override public void setSubstringQuery() { escapedQuery = ".*" + escapedQuery + ".*"; } @Override public void escape() { escapedQuery = Pattern.quote(keyword.getQuery()); isEscaped = true; } @Override public boolean validate() { if (escapedQuery.isEmpty()) { return false; } try { Pattern.compile(escapedQuery); return true; } catch (IllegalArgumentException ex) { return false; } } @Override public boolean isEscaped() { return isEscaped; } @Override public boolean isLiteral() { return false; } @Override public String getEscapedQueryString() { return this.escapedQuery; } @Override public String getQueryString() { return keyword.getQuery(); } @Override public KeywordCachedArtifact writeSingleFileHitsToBlackBoard( String termHit, KeywordHit hit, String snippet, String listName) { BlackboardArtifact newArtifact; Collection<BlackboardAttribute> attributes = new ArrayList<>(); if (keyword.getType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_ACCOUNT_TYPE, MODULE_NAME, Account.Type.CREDIT_CARD.name())); Map<BlackboardAttribute.Type, BlackboardAttribute> parsedTrackAttributeMap = new HashMap<>(); // try to match it against the track 1 regex Matcher matcher = TRACK1_PATTERN.matcher(hit.getSnippet()); if (matcher.find()) { parseTrack1Data(parsedTrackAttributeMap, matcher); } // then try to match it against the track 2 regex matcher = TRACK2_PATTERN.matcher(hit.getSnippet()); if (matcher.find()) { parseTrack2Data(parsedTrackAttributeMap, matcher); } // if we couldn't parse the CCN abort this artifact final BlackboardAttribute ccnAttribute = parsedTrackAttributeMap.get(new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)); if (ccnAttribute == null || StringUtils.isBlank(ccnAttribute.getValueString())) { if (hit.isArtifactHit()) { LOGGER.log( Level.SEVERE, String.format( "Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", termHit, hit.getSnippet(), hit.getArtifact().getArtifactID())); } else { LOGGER.log( Level.SEVERE, String.format( "Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", termHit, hit.getSnippet(), hit.getContent().getId())); } return null; } attributes.addAll(parsedTrackAttributeMap.values()); // look up the bank name, schem, etc from the BIN final int bin = Integer.parseInt(ccnAttribute.getValueString().substring(0, 8)); CreditCards.BankIdentificationNumber binInfo = CreditCards.getBINInfo(bin); if (binInfo != null) { binInfo .getScheme() .ifPresent( scheme -> attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_CARD_SCHEME, MODULE_NAME, scheme))); binInfo .getCardType() .ifPresent( cardType -> attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_CARD_TYPE, MODULE_NAME, cardType))); binInfo .getBrand() .ifPresent( brand -> attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_BRAND_NAME, MODULE_NAME, brand))); binInfo .getBankName() .ifPresent( bankName -> attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_BANK_NAME, MODULE_NAME, bankName))); binInfo .getBankPhoneNumber() .ifPresent( phoneNumber -> attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_PHONE_NUMBER, MODULE_NAME, phoneNumber))); binInfo .getBankURL() .ifPresent( url -> attributes.add( new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL, MODULE_NAME, url))); binInfo .getCountry() .ifPresent( country -> attributes.add( new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_COUNTRY, MODULE_NAME, country))); binInfo .getBankCity() .ifPresent( city -> attributes.add( new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_CITY, MODULE_NAME, city))); } /* if the hit is from unused or unalocated blocks, record the * KEYWORD_SEARCH_DOCUMENT_ID, so we can show just that chunk in the * UI */ if (hit.getContent() instanceof AbstractFile) { AbstractFile file = (AbstractFile) hit.getContent(); if (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS || file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) { attributes.add( new BlackboardAttribute( KEYWORD_SEARCH_DOCUMENT_ID, MODULE_NAME, hit.getSolrDocumentId())); } } // make account artifact try { newArtifact = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_ACCOUNT); } catch (TskCoreException tskCoreException) { LOGGER.log( Level.SEVERE, "Error adding bb artifact for account", tskCoreException); // NON-NLS return null; } } else { // regex match attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD, MODULE_NAME, termHit)); // regex keyword attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP, MODULE_NAME, keyword.getQuery())); // make keyword hit artifact try { newArtifact = hit.getContent().newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT); } catch (TskCoreException tskCoreException) { LOGGER.log( Level.SEVERE, "Error adding bb artifact for keyword hit", tskCoreException); // NON-NLS return null; } } if (StringUtils.isNotBlank(listName)) { attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SET_NAME, MODULE_NAME, listName)); } // preview if (snippet != null) { attributes.add( new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW, MODULE_NAME, snippet)); } if (hit.isArtifactHit()) { attributes.add( new BlackboardAttribute( ATTRIBUTE_TYPE.TSK_ASSOCIATED_ARTIFACT, MODULE_NAME, hit.getArtifact().getArtifactID())); } try { // TODO: do we still/really need this KeywordCachedArtifact class? newArtifact.addAttributes(attributes); KeywordCachedArtifact writeResult = new KeywordCachedArtifact(newArtifact); writeResult.add(attributes); return writeResult; } catch (TskCoreException e) { LOGGER.log( Level.SEVERE, "Error adding bb attributes for terms search artifact", e); // NON-NLS return null; } } @Override public QueryResults performQuery() throws NoOpenCoreException { /* * Execute the regex query to get a list of terms that match the regex. * Note that the field that is being searched is tokenized based on * whitespace. */ // create the query final SolrQuery q = new SolrQuery(); q.setRequestHandler(TERMS_HANDLER); q.setTerms(true); q.setTermsRegexFlag(CASE_INSENSITIVE); q.setTermsRegex(escapedQuery); q.addTermsField(TERMS_SEARCH_FIELD); q.setTimeAllowed(TERMS_TIMEOUT); q.setShowDebugInfo(DEBUG); q.setTermsLimit(MAX_TERMS_RESULTS); LOGGER.log(Level.INFO, "Query: {0}", q.toString()); // NON-NLS // execute the query List<Term> terms = null; try { terms = KeywordSearch.getServer().queryTerms(q).getTerms(TERMS_SEARCH_FIELD); } catch (KeywordSearchModuleException ex) { LOGGER.log( Level.SEVERE, "Error executing the regex terms query: " + keyword.getQuery(), ex); // NON-NLS // TODO: this is almost certainly wrong and guaranteed to throw a NPE at some point!!!! } /* * For each term that matched the regex, query for full set of document * hits for that term. */ QueryResults results = new QueryResults(this, keywordList); int resultSize = 0; for (Term term : terms) { final String termStr = KeywordSearchUtil.escapeLuceneQuery(term.getTerm()); if (keyword.getType() == ATTRIBUTE_TYPE.TSK_CARD_NUMBER) { // If the keyword is a credit card number, pass it through luhn validator Matcher matcher = CCN_PATTERN.matcher(term.getTerm()); matcher.find(); final String ccn = CharMatcher.anyOf(" -").removeFrom(matcher.group("ccn")); if (false == LUHN_CHECK.isValid(ccn)) { continue; // if the hit does not pass the luhn check, skip it. } } /* * Note: we can't set filter query on terms query but setting filter * query on fileResults query will yield the same result */ LuceneQuery filesQuery = new LuceneQuery(keywordList, new Keyword(termStr, true)); filters.forEach(filesQuery::addFilter); try { QueryResults fileQueryResults = filesQuery.performQuery(); Set<KeywordHit> filesResults = new HashSet<>(); for (Keyword key : fileQueryResults.getKeywords()) { // flatten results into a single list List<KeywordHit> keyRes = fileQueryResults.getResults(key); resultSize += keyRes.size(); filesResults.addAll(keyRes); } results.addResult(new Keyword(term.getTerm(), false), new ArrayList<>(filesResults)); } catch (NoOpenCoreException | RuntimeException e) { LOGGER.log(Level.WARNING, "Error executing Solr query,", e); // NON-NLS throw e; } } // TODO limit how many results we store, not to hit memory limits LOGGER.log(Level.INFO, "Regex # results: {0}", resultSize); // NON-NLS return results; } @Override public KeywordList getKeywordList() { return keywordList; } /** * Add an attribute of the the given type to the given artifact with the value taken from the * matcher. If an attribute of the given type already exists on the artifact or if the value is * null, no attribute is added. * * @param attributeMap * @param attrType * @param groupName * @param matcher * */ private static void addAttributeIfNotAlreadyCaptured( Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, ATTRIBUTE_TYPE attrType, String groupName, Matcher matcher) { BlackboardAttribute.Type type = new BlackboardAttribute.Type(attrType); attributeMap.computeIfAbsent( type, (BlackboardAttribute.Type t) -> { String value = matcher.group(groupName); if (attrType.equals(ATTRIBUTE_TYPE.TSK_CARD_NUMBER)) { value = CharMatcher.anyOf(" -").removeFrom(value); } if (StringUtils.isNotBlank(value)) { return new BlackboardAttribute(attrType, MODULE_NAME, value); } return null; }); } /** * Parse the track 2 data from a KeywordHit and add it to the given artifact. * * @param attributeMAp * @param matcher */ private static void parseTrack2Data( Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMAp, Matcher matcher) { // try to add all the attrributes common to track 1 and 2 addAttributeIfNotAlreadyCaptured( attributeMAp, ATTRIBUTE_TYPE.TSK_CARD_NUMBER, "accountNumber", matcher); addAttributeIfNotAlreadyCaptured( attributeMAp, ATTRIBUTE_TYPE.TSK_CARD_EXPIRATION, "expiration", matcher); addAttributeIfNotAlreadyCaptured( attributeMAp, ATTRIBUTE_TYPE.TSK_CARD_SERVICE_CODE, "serviceCode", matcher); addAttributeIfNotAlreadyCaptured( attributeMAp, ATTRIBUTE_TYPE.TSK_CARD_DISCRETIONARY, "discretionary", matcher); addAttributeIfNotAlreadyCaptured(attributeMAp, ATTRIBUTE_TYPE.TSK_CARD_LRC, "LRC", matcher); } /** * Parse the track 1 data from a KeywordHit and add it to the given artifact. * * @param attributeMap * @param matcher */ private static void parseTrack1Data( Map<BlackboardAttribute.Type, BlackboardAttribute> attributeMap, Matcher matcher) { // track 1 has all the fields present in track 2 parseTrack2Data(attributeMap, matcher); // plus it also has the account holders name addAttributeIfNotAlreadyCaptured(attributeMap, ATTRIBUTE_TYPE.TSK_NAME_PERSON, "name", matcher); } }