예제 #1
1
  /**
   * Add a file to the Lucene index (and generate a xref file)
   *
   * @param file The file to add
   * @param path The path to the file (from source root)
   * @throws java.io.IOException if an error occurs
   */
  private void addFile(File file, String path) throws IOException {
    try (InputStream in = new BufferedInputStream(new FileInputStream(file))) {
      FileAnalyzer fa = AnalyzerGuru.getAnalyzer(in, path);
      for (IndexChangedListener listener : listeners) {
        listener.fileAdd(path, fa.getClass().getSimpleName());
      }
      fa.setCtags(ctags);
      fa.setProject(Project.getProject(path));

      Document d;
      try {
        d = analyzerGuru.getDocument(file, in, path, fa);
      } catch (Exception e) {
        log.log(
            Level.INFO,
            "Skipped file ''{0}'' because the analyzer didn''t " + "understand it.",
            path);
        StringBuilder stack = new StringBuilder();
        for (StackTraceElement ste : e.getStackTrace()) {
          stack.append(ste.toString()).append(System.lineSeparator());
        }
        StringBuilder sstack = new StringBuilder();
        for (Throwable t : e.getSuppressed()) {
          for (StackTraceElement ste : t.getStackTrace()) {
            sstack.append(ste.toString()).append(System.lineSeparator());
          }
        }
        log.log(
            Level.FINE,
            "Exception from analyzer {0}: {1} {2}{3}{4}{5}{6}",
            new String[] {
              fa.getClass().getName(),
              e.toString(),
              System.lineSeparator(),
              stack.toString(),
              System.lineSeparator(),
              sstack.toString()
            });
        return;
      }

      writer.addDocument(d, fa);
      Genre g = fa.getFactory().getGenre();
      if (xrefDir != null && (g == Genre.PLAIN || g == Genre.XREFABLE)) {
        File xrefFile = new File(xrefDir, path);
        // If mkdirs() returns false, the failure is most likely
        // because the file already exists. But to check for the
        // file first and only add it if it doesn't exists would
        // only increase the file IO...
        if (!xrefFile.getParentFile().mkdirs()) {
          assert xrefFile.getParentFile().exists();
        }
        fa.writeXref(xrefDir, path);
      }
      setDirty();
      for (IndexChangedListener listener : listeners) {
        listener.fileAdded(path, fa.getClass().getSimpleName());
      }
    }
  }
 static String reason(FixedBitSet actual, FixedBitSet expected, IndexSearcher indexSearcher)
     throws IOException {
   StringBuilder builder = new StringBuilder();
   builder.append("expected cardinality:").append(expected.cardinality()).append('\n');
   DocIdSetIterator iterator = expected.iterator();
   for (int doc = iterator.nextDoc();
       doc != DocIdSetIterator.NO_MORE_DOCS;
       doc = iterator.nextDoc()) {
     builder
         .append("Expected doc[")
         .append(doc)
         .append("] with id value ")
         .append(indexSearcher.doc(doc).get(UidFieldMapper.NAME))
         .append('\n');
   }
   builder.append("actual cardinality: ").append(actual.cardinality()).append('\n');
   iterator = actual.iterator();
   for (int doc = iterator.nextDoc();
       doc != DocIdSetIterator.NO_MORE_DOCS;
       doc = iterator.nextDoc()) {
     builder
         .append("Actual doc[")
         .append(doc)
         .append("] with id value ")
         .append(indexSearcher.doc(doc).get(UidFieldMapper.NAME))
         .append('\n');
   }
   return builder.toString();
 }
 public String toString(String field) {
   StringBuilder sb = new StringBuilder();
   sb.append("score_child[")
       .append(childType)
       .append("/")
       .append(parentType)
       .append("](")
       .append(originalChildQuery.toString(field))
       .append(')');
   sb.append(ToStringUtils.boost(getBoost()));
   return sb.toString();
 }
예제 #4
0
파일: SolrUtil.java 프로젝트: mayr/jate
 public static void commit(SolrClient solr, Logger logger, String... messages) {
   try {
     solr.commit();
   } catch (SolrServerException e) {
     StringBuilder message = new StringBuilder("FAILED TO COMMIT TO SOLR: ");
     message
         .append(Arrays.toString(messages))
         .append("\n")
         .append(ExceptionUtils.getStackTrace(e))
         .append("\n");
     logger.severe(message.toString());
   } catch (IOException e) {
     StringBuilder message = new StringBuilder("FAILED TO COMMIT TO SOLR: ");
     message
         .append(Arrays.toString(messages))
         .append("\n")
         .append(ExceptionUtils.getStackTrace(e))
         .append("\n");
     logger.severe(message.toString());
   }
 }
 /**
  * Provides basic search functions ...
  *
  * @param img
  * @param indexPath
  * @return
  * @throws IOException
  */
 public TopDocs search(BufferedImage img, String indexPath) throws IOException {
   ImageSearcher searcher =
       new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
   ImageSearchHits hits =
       searcher.search(img, DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))));
   StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4);
   for (int j = 0; j < numReferenceObjectsUsed; j++) {
     sb.append(hits.doc(j).getValues("ro-id")[0]);
     sb.append(' ');
   }
   return scoreDocs(sb.toString(), DirectoryReader.open(FSDirectory.open(new File(indexPath))));
 }
  /**
   * We assume that the initial indexing has been done and a set of reference objects has been found
   * and indexed in the separate directory. However further documents were added and they now need
   * to get a ranked list of reference objects. So we (i) get all these new documents missing the
   * field "ro-order" and (ii) add this field.
   *
   * @param indexPath the index to update
   * @throws IOException
   */
  public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      if (document.getField("ro-order") == null) { // if the field is not here we create it.
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
          sb.append(hits.doc(j).getValues("ro-id")[0]);
          sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(
            new Term(
                DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
            document);
        countUpdated++;
      }

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

      // debug:
      System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
  }
예제 #7
0
파일: SolrUtil.java 프로젝트: mayr/jate
 public static Terms getTermVector(
     int docId, String fieldname, SolrIndexSearcher solrIndexSearcher) throws JATEException {
   try {
     Terms vector = solrIndexSearcher.getLeafReader().getTermVector(docId, fieldname);
     if (vector == null) throw new JATEException("Cannot find expected field: " + fieldname);
     return vector;
   } catch (IOException ioe) {
     StringBuilder sb =
         new StringBuilder(
             String.format("Cannot find expected field: %s. Error stacktrack:\n", fieldname));
     sb.append(org.apache.commons.lang.exception.ExceptionUtils.getFullStackTrace(ioe));
     throw new JATEException(sb.toString());
   }
 }
 @Override
 public String toString() {
   StringBuilder sb = new StringBuilder("name: ").append(name).append(" tv_with:");
   if (storedPayloads) {
     sb.append("payloads,");
   }
   if (storedOffset) {
     sb.append("offsets,");
   }
   if (storedPositions) {
     sb.append("positions,");
   }
   return sb.toString();
 }
 @Override
 public String toString(String field) {
   StringBuilder buffer = new StringBuilder();
   buffer.append(hashFieldName);
   buffer.append(",");
   buffer.append(Arrays.toString(hashes));
   buffer.append(",");
   buffer.append(maxResult);
   buffer.append(",");
   buffer.append(luceneFieldName);
   buffer.append(",");
   buffer.append(lireFeature.getClass().getSimpleName());
   buffer.append(ToStringUtils.boost(getBoost()));
   return buffer.toString();
 }
예제 #10
0
 /** Describe the parameters that control how the "more like this" query is formed. */
 public String describeParams() {
   StringBuilder sb = new StringBuilder();
   sb.append("\t").append("maxQueryTerms  : ").append(maxQueryTerms).append("\n");
   sb.append("\t").append("minWordLen     : ").append(minWordLen).append("\n");
   sb.append("\t").append("maxWordLen     : ").append(maxWordLen).append("\n");
   sb.append("\t").append("fieldNames     : ");
   String delim = "";
   for (String fieldName : fieldNames) {
     sb.append(delim).append(fieldName);
     delim = ", ";
   }
   sb.append("\n");
   sb.append("\t").append("boost          : ").append(boost).append("\n");
   sb.append("\t").append("minTermFreq    : ").append(minTermFreq).append("\n");
   sb.append("\t").append("minDocFreq     : ").append(minDocFreq).append("\n");
   return sb.toString();
 }
    @Override
    public String toString() {

      StringBuilder sb =
          new StringBuilder("index:")
              .append(index)
              .append(" type:")
              .append(type)
              .append(" id:")
              .append(id);
      for (int i = 0; i < fieldSettings.length; i++) {
        TestFieldSetting f = fieldSettings[i];
        sb.append("\n").append("Field: ").append(f).append("\n  content:").append(fieldContent[i]);
      }
      sb.append("\n");

      return sb.toString();
    }
 /**
  * Provides basic search functions ...
  *
  * @param d
  * @param indexPath
  * @return
  * @throws IOException
  */
 public TopDocs search(Document d, String indexPath) throws IOException {
   if (d.getField("ro-order")
       != null) // if the document already contains the information on reference object
     // neighbourhood
     return scoreDocs(
         d.getValues("ro-order")[0], DirectoryReader.open(FSDirectory.open(new File(indexPath))));
   else { // if not we just create it :)
     ImageSearcher searcher =
         new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
     ImageSearchHits hits =
         searcher.search(d, IndexReader.open(FSDirectory.open(new File(indexPath + "-ro"))));
     StringBuilder sb = new StringBuilder(numReferenceObjectsUsed * 4);
     for (int j = 0; j < numReferenceObjectsUsed; j++) {
       sb.append(hits.doc(j).getValues("ro-id")[0]);
       sb.append(' ');
     }
     return scoreDocs(sb.toString(), IndexReader.open(FSDirectory.open(new File(indexPath))));
   }
 }
  /**
   * Creates a set of reference objects and stores it in a new index (name "<indexPath>-ro"). Then
   * creates ordered lists of reference object positions for each data item in the index with given
   * feature. Finally a new index (name "<indexPath>-ms") is created where all the original
   * documents as well as the new data are stored.
   *
   * @param indexPath the path to the original index
   * @throws IOException
   */
  public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
      throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
      referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
      System.err.println(
          "WARNING: There are deleted docs in your index. You should "
              + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
      count++;
      Document document = reader.document(i);
      document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
      iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher =
        new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper =
        new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw =
        new IndexWriter(
            FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
      if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it.
      Document document = reader.document(i);
      ImageSearchHits hits = searcher.search(document, readerRo);
      sb.delete(0, sb.length());
      for (int j = 0; j < numReferenceObjectsUsed; j++) {
        sb.append(hits.doc(j).getValues("ro-id")[0]);
        sb.append(' ');
      }
      // System.out.println(sb.toString());
      document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
      iw.updateDocument(
          new Term(
              DocumentBuilder.FIELD_NAME_IDENTIFIER,
              document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
          document);

      // progress report
      progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);
  }