@Override
 public void index(Document document) {
   if (log.isTraceEnabled()) {
     log.trace(
         "Indexing "
             + sql
             + " id="
             + JdbcIndexDefinition.this.identifier
             + ", key = "
             + JdbcIndexDefinition.this.key);
   }
   String id = getIdentifier();
   if (id != null) {
     document.add(
         new Field(
             "builder",
             "VIRTUAL BUILDER",
             Field.Store.YES,
             Field.Index.NOT_ANALYZED)); // keyword
     document.add(
         new Field(
             "number", getIdentifier(), Field.Store.YES, Field.Index.NOT_ANALYZED)); // keyword
   }
   try {
     for (int i = 1; i <= meta.getColumnCount(); i++) {
       String value = org.mmbase.util.Casting.toString(results.getString(i));
       if (log.isTraceEnabled()) {
         log.trace(
             "Indexing " + value + " for " + meta.getColumnName(i) + " on " + getIdentifier());
       }
       String fieldName = meta.getColumnName(i);
       if (keyWords.contains(fieldName)) {
         Indexer.addField(
             document,
             new Field(fieldName, value, Field.Store.YES, Field.Index.NOT_ANALYZED),
             nonDefaultMultiples.get(fieldName)); // keyword
       } else {
         Field field = new Field(fieldName, value, Field.Store.YES, Field.Index.ANALYZED);
         Float boost = boosts.get(fieldName);
         if (boost != null) {
           field.setBoost(boost);
         }
         Indexer.addField(document, field, nonDefaultMultiples.get(fieldName));
         Field fullText = new Field("fulltext", value, Field.Store.YES, Field.Index.ANALYZED);
         if (boost != null) {
           fullText.setBoost(boost);
         }
         document.add(fullText);
       }
     }
   } catch (SQLException sqe) {
     log.error(sqe.getMessage(), sqe);
   }
 }
Exemple #2
0
 public Map<String, Integer> getLexiconCounts(Indexer indexer, boolean originalContentOnly) {
   Map<String, Integer> map = new LinkedHashMap<String, Integer>();
   String[] captions =
       captionToExpandedQuery.keySet().toArray(new String[captionToExpandedQuery.size()]);
   for (String caption : captions) {
     String query = captionToExpandedQuery.get(caption);
     if (query == null) {
       log.warn("Skipping unknown caption '" + caption + "'");
       continue;
     }
     Integer cnt = 0;
     try {
       if (originalContentOnly)
         cnt = indexer.getTotalHits(query, false, Indexer.QueryType.ORIGINAL);
       else cnt = indexer.getTotalHits(query, false, Indexer.QueryType.FULL);
     } catch (Exception e) {
       Util.print_exception("Exception while collecting lexicon counts", e, log);
     }
     map.put(caption, cnt);
   }
   return map;
 }
Exemple #3
0
    /**
     * main entry point: returns a category -> docs map for each (non-zero) category in the current
     * captionToQueryMap.
     *
     * @indexer must already have run
     * @docs results are restrictes to these docs. assumes all docs if docs is null or empty.
     * @captions (null/none = all)
     *     <p>vihari This is a weird name for a method that returns documents with emotions instead
     *     of emotions.
     */
    public Map<String, Collection<Document>> getEmotions(
        Indexer indexer,
        Collection<Document> docs,
        boolean originalContentOnly,
        String... captions) {
      Map<String, Collection<Document>> result = new LinkedHashMap<String, Collection<Document>>();
      Set<Document> docs_set = Util.castOrCloneAsSet(docs);
      //			for (String[] emotion: emotionsData)
      String[] selected_captions =
          captions.length > 0 ? captions : captionToExpandedQuery.keySet().toArray(new String[0]);
      for (String caption : selected_captions) {
        String query = captionToExpandedQuery.get(caption);
        if (query == null) {
          log.warn("Skipping unknown caption '" + caption + "'");
          continue;
        }

        // query is simply word1|word2|word3 etc for that sentiment
        // the -1 indicates that we want all docs in the indexer that match the query
        int threshold = 1;
        Indexer.QueryOptions options = new Indexer.QueryOptions();
        options.setThreshold(threshold);
        options.setQueryType(Indexer.QueryType.ORIGINAL);
        Collection<Document> docsForCaption = indexer.docsForQuery(query, options);
        /*
        log.info (docsForCaption.size() + " before");
        threshold = 2;
        docsForCaption = indexer.docsForQuery(query, -1, threshold);
        log.info (docsForCaption.size() + " after");
        */
        //				Set<Document> docs = indexer.docsWithPhraseThreshold(query, -1, 2); // in future, we
        // may have a higher threshold for sentiment matching
        // if @param docs is present, retain only those docs that match, otherwise retain all
        if (!Util.nullOrEmpty(docs_set))
          // docsForCaption.retainAll(docs_set);
          docsForCaption = Util.listIntersection(docsForCaption, docs_set);

        // put it in the result only if at least 1 doc matches
        if (docsForCaption.size() > 0) result.put(caption, docsForCaption);
      }
      return result;
    }