public static SimpleOrderedMap<Object> getIndexInfo(IndexReader reader, boolean countTerms)
      throws IOException {
    Directory dir = reader.directory();
    SimpleOrderedMap<Object> indexInfo = new SimpleOrderedMap<Object>();

    indexInfo.add("numDocs", reader.numDocs());
    indexInfo.add("maxDoc", reader.maxDoc());

    if (countTerms) {
      TermEnum te = null;
      try {
        te = reader.terms();
        int numTerms = 0;
        while (te.next()) {
          numTerms++;
        }
        indexInfo.add("numTerms", numTerms);
      } finally {
        if (te != null) te.close();
      }
    }

    indexInfo.add(
        "version",
        reader.getVersion()); // TODO? Is this different then: IndexReader.getCurrentVersion( dir )?
    indexInfo.add("optimized", reader.isOptimized());
    indexInfo.add("current", reader.isCurrent());
    indexInfo.add("hasDeletions", reader.hasDeletions());
    indexInfo.add("directory", dir);
    indexInfo.add("lastModified", new Date(IndexReader.lastModified(dir)));
    return indexInfo;
  }
Example #2
0
  /**
   * @param reader
   * @param facetHandlers
   * @param facetHandlerFactories
   * @param workArea
   * @param useSubReaders true => we create a MultiReader of all the leaf sub-readers as the inner
   *     reader. false => we use the given reader as the inner reader.
   * @throws IOException
   */
  protected BoboIndexReader(
      IndexReader reader,
      Collection<FacetHandler<?>> facetHandlers,
      Collection<RuntimeFacetHandlerFactory<?, ?>> facetHandlerFactories,
      WorkArea workArea,
      boolean useSubReaders)
      throws IOException {
    super(useSubReaders ? new MultiReader(createSubReaders(reader, workArea), false) : reader);
    if (useSubReaders) {
      _dir = reader.directory();
      BoboIndexReader[] subReaders = (BoboIndexReader[]) in.getSequentialSubReaders();
      if (subReaders != null && subReaders.length > 0) {
        _subReaders = subReaders;

        int maxDoc = 0;
        _starts = new int[_subReaders.length + 1];
        for (int i = 0; i < _subReaders.length; i++) {
          _subReaders[i]._dir = _dir;
          if (facetHandlers != null) _subReaders[i].setFacetHandlers(facetHandlers);
          _starts[i] = maxDoc;
          maxDoc += _subReaders[i].maxDoc();
        }
        _starts[_subReaders.length] = maxDoc;
      }
    }
    _runtimeFacetHandlerFactories = facetHandlerFactories;
    _runtimeFacetHandlerFactoryMap = new HashMap<String, RuntimeFacetHandlerFactory<?, ?>>();
    if (_runtimeFacetHandlerFactories != null) {
      for (RuntimeFacetHandlerFactory<?, ?> factory : _runtimeFacetHandlerFactories) {
        _runtimeFacetHandlerFactoryMap.put(factory.getName(), factory);
      }
    }
    _facetHandlers = facetHandlers;
    _workArea = workArea;
  }
  public ImageDuplicates findDuplicates(IndexReader reader) throws IOException {
    // get the first document:
    if (!IndexReader.indexExists(reader.directory()))
      throw new FileNotFoundException("No index found at this specific location.");
    Document doc = reader.document(0);
    ScalableColor sc = null;
    ColorLayout cl = null;
    EdgeHistogram eh = null;

    String[] cls = doc.getValues(DocumentBuilder.FIELD_NAME_COLORLAYOUT);
    if (cls != null && cls.length > 0) {
      cl = new ColorLayout();
      cl.setStringRepresentation(cls[0]);
    }
    String[] scs = doc.getValues(DocumentBuilder.FIELD_NAME_SCALABLECOLOR);
    if (scs != null && scs.length > 0) {
      sc = new ScalableColor();
      sc.setStringRepresentation(scs[0]);
    }
    String[] ehs = doc.getValues(DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM);
    if (ehs != null && ehs.length > 0) {
      eh = new EdgeHistogram();
      eh.setStringRepresentation(ehs[0]);
    }

    HashMap<Float, List<String>> duplicates = new HashMap<Float, List<String>>();

    // find duplicates ...
    boolean hasDeletions = reader.hasDeletions();

    int docs = reader.numDocs();
    int numDuplicates = 0;
    for (int i = 0; i < docs; i++) {
      if (hasDeletions && reader.isDeleted(i)) {
        continue;
      }
      Document d = reader.document(i);
      float distance = getDistance(d, cl, sc, eh);

      if (!duplicates.containsKey(distance)) {
        duplicates.put(distance, new LinkedList<String>());
      } else {
        numDuplicates++;
      }
      duplicates.get(distance).add(d.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue());
    }

    if (numDuplicates == 0) return null;

    LinkedList<List<String>> results = new LinkedList<List<String>>();
    for (float f : duplicates.keySet()) {
      if (duplicates.get(f).size() > 1) {
        results.add(duplicates.get(f));
      }
    }
    return new SimpleImageDuplicates(results);
  }
 /** @see LuceneIndexReader#directory() */
 public Directory directory() {
   return indexReader.directory();
 }