Пример #1
0
  /**
   * Return a query that will return docs like the passed lucene document ID.
   *
   * @param docNum the documentID of the lucene doc to generate the 'More Like This" query for.
   * @return a query that will return docs like the passed lucene document ID.
   */
  public Query like(int docNum) throws IOException {
    if (fieldNames == null) {
      // gather list of valid fields from lucene
      Collection<String> fields = MultiFields.getIndexedFields(ir);
      fieldNames = fields.toArray(new String[fields.size()]);
    }

    return createQuery(retrieveTerms(docNum));
  }
Пример #2
0
  private void createRetrieveDocs(ResponseBuilder rb) {

    // TODO: in a system with nTiers > 2, we could be passed "ids" here
    // unless those requests always go to the final destination shard

    // for each shard, collect the documents for that shard.
    HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>();
    for (ShardDoc sdoc : rb.resultIds.values()) {
      Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
      if (shardDocs == null) {
        shardDocs = new ArrayList<ShardDoc>();
        shardMap.put(sdoc.shard, shardDocs);
      }
      shardDocs.add(sdoc);
    }

    SchemaField uniqueField = rb.req.getSchema().getUniqueKeyField();

    // Now create a request for each shard to retrieve the stored fields
    for (Collection<ShardDoc> shardDocs : shardMap.values()) {
      ShardRequest sreq = new ShardRequest();
      sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;

      sreq.shards = new String[] {shardDocs.iterator().next().shard};

      sreq.params = new ModifiableSolrParams();

      // add original params
      sreq.params.add(rb.req.getParams());

      // no need for a sort, we already have order
      sreq.params.remove(CommonParams.SORT);

      // we already have the field sort values
      sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES);

      // make sure that the id is returned for correlation.
      String fl = sreq.params.get(CommonParams.FL);
      if (fl != null) {
        fl = fl.trim();
        // currently, "score" is synonymous with "*,score" so
        // don't add "id" if the fl is empty or "score" or it would change the meaning.
        if (fl.length() != 0 && !"score".equals(fl) && !"*".equals(fl)) {
          sreq.params.set(CommonParams.FL, fl + ',' + uniqueField.getName());
        }
      }

      ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
      for (ShardDoc shardDoc : shardDocs) {
        // TODO: depending on the type, we may need more tha a simple toString()?
        ids.add(shardDoc.id.toString());
      }
      sreq.params.add(ShardParams.IDS, StrUtils.join(ids, ','));

      rb.addRequest(this, sreq);
    }
  }
 private static Object getReferencedId(DataRecord next, ReferenceFieldMetadata field) {
   DataRecord record = (DataRecord) next.get(field);
   if (record != null) {
     Collection<FieldMetadata> keyFields = record.getType().getKeyFields();
     if (keyFields.size() == 1) {
       return record.get(keyFields.iterator().next());
     } else {
       List<Object> compositeKeyValues = new ArrayList<Object>(keyFields.size());
       for (FieldMetadata keyField : keyFields) {
         compositeKeyValues.add(record.get(keyField));
       }
       return compositeKeyValues;
     }
   } else {
     return StringUtils.EMPTY;
   }
 }
Пример #4
0
 @SuppressWarnings("unchecked")
 private static BytesRef[] getBytesRefs(Collection values, TermBuilder termBuilder) {
   BytesRef[] terms = new BytesRef[values.size()];
   int i = 0;
   for (Object value : values) {
     terms[i] = termBuilder.term(value);
     i++;
   }
   return terms;
 }
 @Override
 public StorageResults visit(Select select) {
   // TMDM-4654: Checks if entity has a composite PK.
   Set<ComplexTypeMetadata> compositeKeyTypes = new HashSet<ComplexTypeMetadata>();
   // TMDM-7496: Search should include references to reused types
   Collection<ComplexTypeMetadata> types =
       new HashSet<ComplexTypeMetadata>(select.accept(new SearchTransitiveClosure()));
   for (ComplexTypeMetadata type : types) {
     if (type.getKeyFields().size() > 1) {
       compositeKeyTypes.add(type);
     }
   }
   if (!compositeKeyTypes.isEmpty()) {
     StringBuilder message = new StringBuilder();
     Iterator it = compositeKeyTypes.iterator();
     while (it.hasNext()) {
       ComplexTypeMetadata compositeKeyType = (ComplexTypeMetadata) it.next();
       message.append(compositeKeyType.getName());
       if (it.hasNext()) {
         message.append(',');
       }
     }
     throw new FullTextQueryCompositeKeyException(message.toString());
   }
   // Removes Joins and joined fields.
   List<Join> joins = select.getJoins();
   if (!joins.isEmpty()) {
     Set<ComplexTypeMetadata> joinedTypes = new HashSet<ComplexTypeMetadata>();
     for (Join join : joins) {
       joinedTypes.add(join.getRightField().getFieldMetadata().getContainingType());
     }
     for (ComplexTypeMetadata joinedType : joinedTypes) {
       types.remove(joinedType);
     }
     List<TypedExpression> filteredFields = new LinkedList<TypedExpression>();
     for (TypedExpression expression : select.getSelectedFields()) {
       if (expression instanceof Field) {
         FieldMetadata fieldMetadata = ((Field) expression).getFieldMetadata();
         if (joinedTypes.contains(fieldMetadata.getContainingType())) {
           TypeMapping mapping =
               mappings.getMappingFromDatabase(fieldMetadata.getContainingType());
           filteredFields.add(
               new Alias(
                   new StringConstant(StringUtils.EMPTY),
                   mapping.getUser(fieldMetadata).getName()));
         } else {
           filteredFields.add(expression);
         }
       } else {
         filteredFields.add(expression);
       }
     }
     selectedFields.clear();
     selectedFields.addAll(filteredFields);
   }
   // Handle condition
   Condition condition = select.getCondition();
   if (condition == null) {
     throw new IllegalArgumentException("Expected a condition in select clause but got 0.");
   }
   // Create Lucene query (concatenates all sub queries together).
   FullTextSession fullTextSession = Search.getFullTextSession(session);
   Query parsedQuery = select.getCondition().accept(new LuceneQueryGenerator(types));
   // Create Hibernate Search query
   Set<Class> classes = new HashSet<Class>();
   for (ComplexTypeMetadata type : types) {
     String className = ClassCreator.getClassName(type.getName());
     try {
       ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
       classes.add(contextClassLoader.loadClass(className));
     } catch (ClassNotFoundException e) {
       throw new RuntimeException("Could not find class '" + className + "'.", e);
     }
   }
   FullTextQuery fullTextQuery =
       fullTextSession.createFullTextQuery(
           parsedQuery, classes.toArray(new Class<?>[classes.size()]));
   // Very important to leave this null (would disable ability to search across different types)
   fullTextQuery.setCriteriaQuery(null);
   fullTextQuery.setSort(Sort.RELEVANCE); // Default sort (if no order by specified).
   query =
       EntityFinder.wrap(
           fullTextQuery,
           (HibernateStorage) storage,
           session); // ensures only MDM entity objects are returned.
   // Order by
   for (OrderBy current : select.getOrderBy()) {
     current.accept(this);
   }
   // Paging
   Paging paging = select.getPaging();
   paging.accept(this);
   pageSize = paging.getLimit();
   boolean hasPaging = pageSize < Integer.MAX_VALUE;
   if (!hasPaging) {
     return createResults(query.scroll(ScrollMode.FORWARD_ONLY));
   } else {
     return createResults(query.list());
   }
 }
Пример #6
0
  /**
   * Indexes page using the given IndexWriter.
   *
   * @param page WikiPage
   * @param text Page text to index
   * @param writer The Lucene IndexWriter to use for indexing
   * @return the created index Document
   * @throws IOException If there's an indexing problem
   */
  protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer)
      throws IOException {
    if (log.isDebugEnabled()) log.debug("Indexing " + page.getName() + "...");

    // make a new, empty document
    Document doc = new Document();

    if (text == null) return doc;

    // Raw name is the keyword we'll use to refer to this document for updates.
    Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED);
    doc.add(field);

    // Body text.  It is stored in the doc for search contexts.
    field =
        new Field(
            LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
    doc.add(field);

    // Allow searching by page name. Both beautified and raw
    String unTokenizedTitle =
        StringUtils.replaceChars(
            page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED, c_punctuationSpaces);

    field =
        new Field(
            LUCENE_PAGE_NAME,
            TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle,
            Field.Store.YES,
            Field.Index.ANALYZED,
            Field.TermVector.NO);
    doc.add(field);

    // Allow searching by authorname

    if (page.getAuthor() != null) {
      field =
          new Field(
              LUCENE_AUTHOR,
              page.getAuthor(),
              Field.Store.YES,
              Field.Index.ANALYZED,
              Field.TermVector.NO);
      doc.add(field);
    }

    // Now add the names of the attachments of this page
    try {
      Collection attachments = m_engine.getAttachmentManager().listAttachments(page);
      String attachmentNames = "";

      for (Iterator it = attachments.iterator(); it.hasNext(); ) {
        Attachment att = (Attachment) it.next();
        attachmentNames += att.getName() + ";";
      }
      field =
          new Field(
              LUCENE_ATTACHMENTS,
              attachmentNames,
              Field.Store.YES,
              Field.Index.ANALYZED,
              Field.TermVector.NO);
      doc.add(field);

    } catch (ProviderException e) {
      // Unable to read attachments
      log.error("Failed to get attachments for page", e);
    }
    writer.addDocument(doc);

    return doc;
  }
Пример #7
0
  /**
   * Performs a full Lucene reindex, if necessary.
   *
   * @throws IOException If there's a problem during indexing
   */
  protected void doFullLuceneReindex() throws IOException {
    File dir = new File(m_luceneDirectory);

    String[] filelist = dir.list();

    if (filelist == null) {
      throw new IOException(
          "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath());
    }

    try {
      if (filelist.length == 0) {
        //
        //  No files? Reindex!
        //
        Date start = new Date();
        IndexWriter writer = null;

        log.info("Starting Lucene reindexing, this can take a couple minutes...");

        Directory luceneDir = new SimpleFSDirectory(dir, null);

        try {
          writer = getIndexWriter(luceneDir);
          Collection allPages = m_engine.getPageManager().getAllPages();

          for (Iterator iterator = allPages.iterator(); iterator.hasNext(); ) {
            WikiPage page = (WikiPage) iterator.next();

            try {
              String text =
                  m_engine
                      .getPageManager()
                      .getPageText(page.getName(), WikiProvider.LATEST_VERSION);
              luceneIndexPage(page, text, writer);
            } catch (IOException e) {
              log.warn("Unable to index page " + page.getName() + ", continuing to next ", e);
            }
          }

          Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments();
          for (Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) {
            Attachment att = (Attachment) iterator.next();

            try {
              String text = getAttachmentContent(att.getName(), WikiProvider.LATEST_VERSION);
              luceneIndexPage(att, text, writer);
            } catch (IOException e) {
              log.warn("Unable to index attachment " + att.getName() + ", continuing to next", e);
            }
          }

        } finally {
          close(writer);
        }

        Date end = new Date();
        log.info(
            "Full Lucene index finished in "
                + (end.getTime() - start.getTime())
                + " milliseconds.");
      } else {
        log.info("Files found in Lucene directory, not reindexing.");
      }
    } catch (NoClassDefFoundError e) {
      log.info("Lucene libraries do not exist - not using Lucene.");
    } catch (IOException e) {
      log.error("Problem while creating Lucene index - not using Lucene.", e);
    } catch (ProviderException e) {
      log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e);
      throw new IllegalArgumentException("unable to create Lucene index");
    } catch (Exception e) {
      log.error("Unable to start lucene", e);
    }
  }