/** * Add a file to the Lucene index (and generate a xref file) * * @param file The file to add * @param path The path to the file (from source root) * @throws java.io.IOException if an error occurs */ private void addFile(File file, String path) throws IOException { try (InputStream in = new BufferedInputStream(new FileInputStream(file))) { FileAnalyzer fa = AnalyzerGuru.getAnalyzer(in, path); for (IndexChangedListener listener : listeners) { listener.fileAdd(path, fa.getClass().getSimpleName()); } fa.setCtags(ctags); fa.setProject(Project.getProject(path)); Document d; try { d = analyzerGuru.getDocument(file, in, path, fa); } catch (Exception e) { log.log( Level.INFO, "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.", path); StringBuilder stack = new StringBuilder(); for (StackTraceElement ste : e.getStackTrace()) { stack.append(ste.toString()).append(System.lineSeparator()); } StringBuilder sstack = new StringBuilder(); for (Throwable t : e.getSuppressed()) { for (StackTraceElement ste : t.getStackTrace()) { sstack.append(ste.toString()).append(System.lineSeparator()); } } log.log( Level.FINE, "Exception from analyzer {0}: {1} {2}{3}{4}{5}{6}", new String[] { fa.getClass().getName(), e.toString(), System.lineSeparator(), stack.toString(), System.lineSeparator(), sstack.toString() }); return; } writer.addDocument(d, fa); Genre g = fa.getFactory().getGenre(); if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) { File xrefFile = new File(xrefDir, path); // If mkdirs() returns false, the failure is most likely // because the file already exists. But to check for the // file first and only add it if it doesn't exists would // only increase the file IO... if (!xrefFile.getParentFile().mkdirs()) { assert xrefFile.getParentFile().exists(); } fa.writeXref(xrefDir, path); } setDirty(); for (IndexChangedListener listener : listeners) { listener.fileAdded(path, fa.getClass().getSimpleName()); } } }
static String reason(FixedBitSet actual, FixedBitSet expected, IndexSearcher indexSearcher) throws IOException { StringBuilder builder = new StringBuilder(); builder.append("expected cardinality:").append(expected.cardinality()).append('\n'); DocIdSetIterator iterator = expected.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { builder .append("Expected doc[") .append(doc) .append("] with id value ") .append(indexSearcher.doc(doc).get(UidFieldMapper.NAME)) .append('\n'); } builder.append("actual cardinality: ").append(actual.cardinality()).append('\n'); iterator = actual.iterator(); for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) { builder .append("Actual doc[") .append(doc) .append("] with id value ") .append(indexSearcher.doc(doc).get(UidFieldMapper.NAME)) .append('\n'); } return builder.toString(); }
public String toString(String field) { StringBuilder sb = new StringBuilder(); sb.append("score_child[") .append(childType) .append("/") .append(parentType) .append("](") .append(originalChildQuery.toString(field)) .append(')'); sb.append(ToStringUtils.boost(getBoost())); return sb.toString(); }
/** * Provides basic search functions ... * * @param img * @param indexPath * @return * @throws IOException */ public TopDocs search(BufferedImage img, String indexPath) throws IOException { ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); ImageSearchHits hits = searcher.search(img, DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")))); StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } return scoreDocs(sb.toString(), DirectoryReader.open(FSDirectory.open(new File(indexPath)))); }
/** * We assume that the initial indexing has been done and a set of reference objects has been found * and indexed in the separate directory. However further documents were added and they now need * to get a ranked list of reference objects. So we (i) get all these new documents missing the * field "ro-order" and (ii) add this field. * * @param indexPath the index to update * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
@Override public String toString() { StringBuilder sb = new StringBuilder("name: ").append(name).append(" tv_with:"); if (storedPayloads) { sb.append("payloads,"); } if (storedOffset) { sb.append("offsets,"); } if (storedPositions) { sb.append("positions,"); } return sb.toString(); }
@Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); buffer.append(hashFieldName); buffer.append(","); buffer.append(Arrays.toString(hashes)); buffer.append(","); buffer.append(maxResult); buffer.append(","); buffer.append(luceneFieldName); buffer.append(","); buffer.append(lireFeature.getClass().getSimpleName()); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); }
/** Describe the parameters that control how the "more like this" query is formed. */ public String describeParams() { StringBuilder sb = new StringBuilder(); sb.append("\t").append("maxQueryTerms : ").append(maxQueryTerms).append("\n"); sb.append("\t").append("minWordLen : ").append(minWordLen).append("\n"); sb.append("\t").append("maxWordLen : ").append(maxWordLen).append("\n"); sb.append("\t").append("fieldNames : "); String delim = ""; for (String fieldName : fieldNames) { sb.append(delim).append(fieldName); delim = ", "; } sb.append("\n"); sb.append("\t").append("boost : ").append(boost).append("\n"); sb.append("\t").append("minTermFreq : ").append(minTermFreq).append("\n"); sb.append("\t").append("minDocFreq : ").append(minDocFreq).append("\n"); return sb.toString(); }
@Override public String toString() { StringBuilder sb = new StringBuilder("index:") .append(index) .append(" type:") .append(type) .append(" id:") .append(id); for (int i = 0; i < fieldSettings.length; i++) { TestFieldSetting f = fieldSettings[i]; sb.append("\n").append("Field: ").append(f).append("\n content:").append(fieldContent[i]); } sb.append("\n"); return sb.toString(); }
/** * Provides basic search functions ... * * @param d * @param indexPath * @return * @throws IOException */ public TopDocs search(Document d, String indexPath) throws IOException { if (d.getField("ro-order") != null) // if the document already contains the information on reference object // neighbourhood return scoreDocs( d.getValues("ro-order")[0], DirectoryReader.open(FSDirectory.open(new File(indexPath)))); else { // if not we just create it :) ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); ImageSearchHits hits = searcher.search(d, IndexReader.open(FSDirectory.open(new File(indexPath + "-ro")))); StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } return scoreDocs(sb.toString(), IndexReader.open(FSDirectory.open(new File(indexPath)))); } }
public static void commit(SolrClient solr, Logger logger, String... messages) { try { solr.commit(); } catch (SolrServerException e) { StringBuilder message = new StringBuilder("FAILED TO COMMIT TO SOLR: "); message .append(Arrays.toString(messages)) .append("\n") .append(ExceptionUtils.getStackTrace(e)) .append("\n"); logger.severe(message.toString()); } catch (IOException e) { StringBuilder message = new StringBuilder("FAILED TO COMMIT TO SOLR: "); message .append(Arrays.toString(messages)) .append("\n") .append(ExceptionUtils.getStackTrace(e)) .append("\n"); logger.severe(message.toString()); } }
public static Terms getTermVector( int docId, String fieldname, SolrIndexSearcher solrIndexSearcher) throws JATEException { try { Terms vector = solrIndexSearcher.getLeafReader().getTermVector(docId, fieldname); if (vector == null) throw new JATEException("Cannot find expected field: " + fieldname); return vector; } catch (IOException ioe) { StringBuilder sb = new StringBuilder( String.format("Cannot find expected field: %s. Error stacktrack:\n", fieldname)); sb.append(org.apache.commons.lang.exception.ExceptionUtils.getFullStackTrace(ioe)); throw new JATEException(sb.toString()); } }
/** * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then * creates ordered lists of reference object positions for each data item in the index with given * feature. Finally a new index (name "<indexPath>-ms") is created where all the original * documents as well as the new data are stored. * * @param indexPath the path to the original index * @throws IOException */ public void createIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); if (numDocs < numReferenceObjects) { throw new UnsupportedOperationException("Too few documents in index."); } // progress report progress.setNumDocsAll(numDocs); progress.setCurrentState(State.RoSelection); boolean hasDeletions = reader.hasDeletions(); // init reference objects: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true); HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects); double numDocsDouble = (double) numDocs; while (referenceObjsIds.size() < numReferenceObjects) { referenceObjsIds.add((int) (numDocsDouble * Math.random())); } int count = 0; if (hasDeletions) { System.err.println( "WARNING: There are deleted docs in your index. You should " + "optimize your index before using this method."); } // progress report progress.setCurrentState(State.RoIndexing); // find them in the index and put them into a separate index: for (int i : referenceObjsIds) { count++; Document document = reader.document(i); document.add(new Field("ro-id", count + "", StringField.TYPE_STORED)); iw.addDocument(document); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Indexing); // now find the reference objects for each entry ;) IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField); iw = new IndexWriter( FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument( new Term( DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Idle); }