/** * Return a query that will return docs like the passed lucene document ID. * * @param docNum the documentID of the lucene doc to generate the 'More Like This" query for. * @return a query that will return docs like the passed lucene document ID. */ public Query like(int docNum) throws IOException { if (fieldNames == null) { // gather list of valid fields from lucene Collection<String> fields = MultiFields.getIndexedFields(ir); fieldNames = fields.toArray(new String[fields.size()]); } return createQuery(retrieveTerms(docNum)); }
private void createRetrieveDocs(ResponseBuilder rb) { // TODO: in a system with nTiers > 2, we could be passed "ids" here // unless those requests always go to the final destination shard // for each shard, collect the documents for that shard. HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>(); for (ShardDoc sdoc : rb.resultIds.values()) { Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard); if (shardDocs == null) { shardDocs = new ArrayList<ShardDoc>(); shardMap.put(sdoc.shard, shardDocs); } shardDocs.add(sdoc); } SchemaField uniqueField = rb.req.getSchema().getUniqueKeyField(); // Now create a request for each shard to retrieve the stored fields for (Collection<ShardDoc> shardDocs : shardMap.values()) { ShardRequest sreq = new ShardRequest(); sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS; sreq.shards = new String[] {shardDocs.iterator().next().shard}; sreq.params = new ModifiableSolrParams(); // add original params sreq.params.add(rb.req.getParams()); // no need for a sort, we already have order sreq.params.remove(CommonParams.SORT); // we already have the field sort values sreq.params.remove(ResponseBuilder.FIELD_SORT_VALUES); // make sure that the id is returned for correlation. String fl = sreq.params.get(CommonParams.FL); if (fl != null) { fl = fl.trim(); // currently, "score" is synonymous with "*,score" so // don't add "id" if the fl is empty or "score" or it would change the meaning. if (fl.length() != 0 && !"score".equals(fl) && !"*".equals(fl)) { sreq.params.set(CommonParams.FL, fl + ',' + uniqueField.getName()); } } ArrayList<String> ids = new ArrayList<String>(shardDocs.size()); for (ShardDoc shardDoc : shardDocs) { // TODO: depending on the type, we may need more tha a simple toString()? ids.add(shardDoc.id.toString()); } sreq.params.add(ShardParams.IDS, StrUtils.join(ids, ',')); rb.addRequest(this, sreq); } }
private static Object getReferencedId(DataRecord next, ReferenceFieldMetadata field) { DataRecord record = (DataRecord) next.get(field); if (record != null) { Collection<FieldMetadata> keyFields = record.getType().getKeyFields(); if (keyFields.size() == 1) { return record.get(keyFields.iterator().next()); } else { List<Object> compositeKeyValues = new ArrayList<Object>(keyFields.size()); for (FieldMetadata keyField : keyFields) { compositeKeyValues.add(record.get(keyField)); } return compositeKeyValues; } } else { return StringUtils.EMPTY; } }
@SuppressWarnings("unchecked") private static BytesRef[] getBytesRefs(Collection values, TermBuilder termBuilder) { BytesRef[] terms = new BytesRef[values.size()]; int i = 0; for (Object value : values) { terms[i] = termBuilder.term(value); i++; } return terms; }
@Override public StorageResults visit(Select select) { // TMDM-4654: Checks if entity has a composite PK. Set<ComplexTypeMetadata> compositeKeyTypes = new HashSet<ComplexTypeMetadata>(); // TMDM-7496: Search should include references to reused types Collection<ComplexTypeMetadata> types = new HashSet<ComplexTypeMetadata>(select.accept(new SearchTransitiveClosure())); for (ComplexTypeMetadata type : types) { if (type.getKeyFields().size() > 1) { compositeKeyTypes.add(type); } } if (!compositeKeyTypes.isEmpty()) { StringBuilder message = new StringBuilder(); Iterator it = compositeKeyTypes.iterator(); while (it.hasNext()) { ComplexTypeMetadata compositeKeyType = (ComplexTypeMetadata) it.next(); message.append(compositeKeyType.getName()); if (it.hasNext()) { message.append(','); } } throw new FullTextQueryCompositeKeyException(message.toString()); } // Removes Joins and joined fields. List<Join> joins = select.getJoins(); if (!joins.isEmpty()) { Set<ComplexTypeMetadata> joinedTypes = new HashSet<ComplexTypeMetadata>(); for (Join join : joins) { joinedTypes.add(join.getRightField().getFieldMetadata().getContainingType()); } for (ComplexTypeMetadata joinedType : joinedTypes) { types.remove(joinedType); } List<TypedExpression> filteredFields = new LinkedList<TypedExpression>(); for (TypedExpression expression : select.getSelectedFields()) { if (expression instanceof Field) { FieldMetadata fieldMetadata = ((Field) expression).getFieldMetadata(); if (joinedTypes.contains(fieldMetadata.getContainingType())) { TypeMapping mapping = mappings.getMappingFromDatabase(fieldMetadata.getContainingType()); filteredFields.add( new Alias( new StringConstant(StringUtils.EMPTY), mapping.getUser(fieldMetadata).getName())); } else { filteredFields.add(expression); } } else { filteredFields.add(expression); } } selectedFields.clear(); selectedFields.addAll(filteredFields); } // Handle condition Condition condition = select.getCondition(); if (condition == null) { throw new IllegalArgumentException("Expected a condition in select clause but got 0."); } // Create Lucene query (concatenates all sub queries together). FullTextSession fullTextSession = Search.getFullTextSession(session); Query parsedQuery = select.getCondition().accept(new LuceneQueryGenerator(types)); // Create Hibernate Search query Set<Class> classes = new HashSet<Class>(); for (ComplexTypeMetadata type : types) { String className = ClassCreator.getClassName(type.getName()); try { ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); classes.add(contextClassLoader.loadClass(className)); } catch (ClassNotFoundException e) { throw new RuntimeException("Could not find class '" + className + "'.", e); } } FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery( parsedQuery, classes.toArray(new Class<?>[classes.size()])); // Very important to leave this null (would disable ability to search across different types) fullTextQuery.setCriteriaQuery(null); fullTextQuery.setSort(Sort.RELEVANCE); // Default sort (if no order by specified). query = EntityFinder.wrap( fullTextQuery, (HibernateStorage) storage, session); // ensures only MDM entity objects are returned. // Order by for (OrderBy current : select.getOrderBy()) { current.accept(this); } // Paging Paging paging = select.getPaging(); paging.accept(this); pageSize = paging.getLimit(); boolean hasPaging = pageSize < Integer.MAX_VALUE; if (!hasPaging) { return createResults(query.scroll(ScrollMode.FORWARD_ONLY)); } else { return createResults(query.list()); } }
/** * Indexes page using the given IndexWriter. * * @param page WikiPage * @param text Page text to index * @param writer The Lucene IndexWriter to use for indexing * @return the created index Document * @throws IOException If there's an indexing problem */ protected Document luceneIndexPage(WikiPage page, String text, IndexWriter writer) throws IOException { if (log.isDebugEnabled()) log.debug("Indexing " + page.getName() + "..."); // make a new, empty document Document doc = new Document(); if (text == null) return doc; // Raw name is the keyword we'll use to refer to this document for updates. Field field = new Field(LUCENE_ID, page.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(field); // Body text. It is stored in the doc for search contexts. field = new Field( LUCENE_PAGE_CONTENTS, text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); // Allow searching by page name. Both beautified and raw String unTokenizedTitle = StringUtils.replaceChars( page.getName(), MarkupParser.PUNCTUATION_CHARS_ALLOWED, c_punctuationSpaces); field = new Field( LUCENE_PAGE_NAME, TextUtil.beautifyString(page.getName()) + " " + unTokenizedTitle, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); // Allow searching by authorname if (page.getAuthor() != null) { field = new Field( LUCENE_AUTHOR, page.getAuthor(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); } // Now add the names of the attachments of this page try { Collection attachments = m_engine.getAttachmentManager().listAttachments(page); String attachmentNames = ""; for (Iterator it = attachments.iterator(); it.hasNext(); ) { Attachment att = (Attachment) it.next(); attachmentNames += att.getName() + ";"; } field = new Field( LUCENE_ATTACHMENTS, attachmentNames, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); doc.add(field); } catch (ProviderException e) { // Unable to read attachments log.error("Failed to get attachments for page", e); } writer.addDocument(doc); return doc; }
/** * Performs a full Lucene reindex, if necessary. * * @throws IOException If there's a problem during indexing */ protected void doFullLuceneReindex() throws IOException { File dir = new File(m_luceneDirectory); String[] filelist = dir.list(); if (filelist == null) { throw new IOException( "Invalid Lucene directory: cannot produce listing: " + dir.getAbsolutePath()); } try { if (filelist.length == 0) { // // No files? Reindex! // Date start = new Date(); IndexWriter writer = null; log.info("Starting Lucene reindexing, this can take a couple minutes..."); Directory luceneDir = new SimpleFSDirectory(dir, null); try { writer = getIndexWriter(luceneDir); Collection allPages = m_engine.getPageManager().getAllPages(); for (Iterator iterator = allPages.iterator(); iterator.hasNext(); ) { WikiPage page = (WikiPage) iterator.next(); try { String text = m_engine .getPageManager() .getPageText(page.getName(), WikiProvider.LATEST_VERSION); luceneIndexPage(page, text, writer); } catch (IOException e) { log.warn("Unable to index page " + page.getName() + ", continuing to next ", e); } } Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments(); for (Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) { Attachment att = (Attachment) iterator.next(); try { String text = getAttachmentContent(att.getName(), WikiProvider.LATEST_VERSION); luceneIndexPage(att, text, writer); } catch (IOException e) { log.warn("Unable to index attachment " + att.getName() + ", continuing to next", e); } } } finally { close(writer); } Date end = new Date(); log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds."); } else { log.info("Files found in Lucene directory, not reindexing."); } } catch (NoClassDefFoundError e) { log.info("Lucene libraries do not exist - not using Lucene."); } catch (IOException e) { log.error("Problem while creating Lucene index - not using Lucene.", e); } catch (ProviderException e) { log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e); throw new IllegalArgumentException("unable to create Lucene index"); } catch (Exception e) { log.error("Unable to start lucene", e); } }