@Override public void index(Document document) { if (log.isTraceEnabled()) { log.trace( "Indexing " + sql + " id=" + JdbcIndexDefinition.this.identifier + ", key = " + JdbcIndexDefinition.this.key); } String id = getIdentifier(); if (id != null) { document.add( new Field( "builder", "VIRTUAL BUILDER", Field.Store.YES, Field.Index.NOT_ANALYZED)); // keyword document.add( new Field( "number", getIdentifier(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // keyword } try { for (int i = 1; i <= meta.getColumnCount(); i++) { String value = org.mmbase.util.Casting.toString(results.getString(i)); if (log.isTraceEnabled()) { log.trace( "Indexing " + value + " for " + meta.getColumnName(i) + " on " + getIdentifier()); } String fieldName = meta.getColumnName(i); if (keyWords.contains(fieldName)) { Indexer.addField( document, new Field(fieldName, value, Field.Store.YES, Field.Index.NOT_ANALYZED), nonDefaultMultiples.get(fieldName)); // keyword } else { Field field = new Field(fieldName, value, Field.Store.YES, Field.Index.ANALYZED); Float boost = boosts.get(fieldName); if (boost != null) { field.setBoost(boost); } Indexer.addField(document, field, nonDefaultMultiples.get(fieldName)); Field fullText = new Field("fulltext", value, Field.Store.YES, Field.Index.ANALYZED); if (boost != null) { fullText.setBoost(boost); } document.add(fullText); } } } catch (SQLException sqe) { log.error(sqe.getMessage(), sqe); } }
public Map<String, Integer> getLexiconCounts(Indexer indexer, boolean originalContentOnly) { Map<String, Integer> map = new LinkedHashMap<String, Integer>(); String[] captions = captionToExpandedQuery.keySet().toArray(new String[captionToExpandedQuery.size()]); for (String caption : captions) { String query = captionToExpandedQuery.get(caption); if (query == null) { log.warn("Skipping unknown caption '" + caption + "'"); continue; } Integer cnt = 0; try { if (originalContentOnly) cnt = indexer.getTotalHits(query, false, Indexer.QueryType.ORIGINAL); else cnt = indexer.getTotalHits(query, false, Indexer.QueryType.FULL); } catch (Exception e) { Util.print_exception("Exception while collecting lexicon counts", e, log); } map.put(caption, cnt); } return map; }
/** * main entry point: returns a category -> docs map for each (non-zero) category in the current * captionToQueryMap. * * @indexer must already have run * @docs results are restrictes to these docs. assumes all docs if docs is null or empty. * @captions (null/none = all) * <p>vihari This is a weird name for a method that returns documents with emotions instead * of emotions. */ public Map<String, Collection<Document>> getEmotions( Indexer indexer, Collection<Document> docs, boolean originalContentOnly, String... captions) { Map<String, Collection<Document>> result = new LinkedHashMap<String, Collection<Document>>(); Set<Document> docs_set = Util.castOrCloneAsSet(docs); // for (String[] emotion: emotionsData) String[] selected_captions = captions.length > 0 ? captions : captionToExpandedQuery.keySet().toArray(new String[0]); for (String caption : selected_captions) { String query = captionToExpandedQuery.get(caption); if (query == null) { log.warn("Skipping unknown caption '" + caption + "'"); continue; } // query is simply word1|word2|word3 etc for that sentiment // the -1 indicates that we want all docs in the indexer that match the query int threshold = 1; Indexer.QueryOptions options = new Indexer.QueryOptions(); options.setThreshold(threshold); options.setQueryType(Indexer.QueryType.ORIGINAL); Collection<Document> docsForCaption = indexer.docsForQuery(query, options); /* log.info (docsForCaption.size() + " before"); threshold = 2; docsForCaption = indexer.docsForQuery(query, -1, threshold); log.info (docsForCaption.size() + " after"); */ // Set<Document> docs = indexer.docsWithPhraseThreshold(query, -1, 2); // in future, we // may have a higher threshold for sentiment matching // if @param docs is present, retain only those docs that match, otherwise retain all if (!Util.nullOrEmpty(docs_set)) // docsForCaption.retainAll(docs_set); docsForCaption = Util.listIntersection(docsForCaption, docs_set); // put it in the result only if at least 1 doc matches if (docsForCaption.size() > 0) result.put(caption, docsForCaption); } return result; }