/**
   * Search pictures that appear in all features
   *
   * @param source
   * @param maxHist
   * @param featureEnumerates
   * @return
   * @throws IOException
   */
  public static List<LirePictureSortable> searchAllFeatures(
      byte[] source, int maxHist, List<FeatureEnumerate> featureEnumerates) throws IOException {

    /* extract pictures sortable info from Lucene indexes */
    /* only from extract features from picture search */
    List<LirePictureSortable> result = new ArrayList<>();
    for (FeatureEnumerate feature : featureEnumerates) {
      result.addAll(LireBuilder.search(source, maxHist, feature, result));
    }

    /* save UUID from pictures ordered by score */
    Collections.sort(result);

    return result;
  }
예제 #2
0
 private static String[] getStopWords(String path) {
   try {
     List<String> list = FileUtils.readLines(new File(path), "utf-8");
     System.out.println("==============================================================");
     System.out.println("getStopWords " + list.get(159));
     System.out.println("==============================================================");
     return list.toArray(new String[list.size()]);
   } catch (FileNotFoundException e) {
     e.printStackTrace();
   } catch (IOException e) {
     e.printStackTrace();
   } finally {
   }
   return null;
 }
  /**
   * Search a picture
   *
   * @param imageBytes
   * @param maxHits
   * @param feature
   * @param pictures
   * @return
   * @throws IOException
   */
  public static List<LirePictureSortable> search(
      byte[] imageBytes, int maxHits, FeatureEnumerate feature, List<LirePictureSortable> pictures)
      throws IOException {
    File path = getPath(feature.getText());
    log.debug("reading from indexed path " + path.getAbsolutePath());
    List<LirePictureSortable> result = new ArrayList<>();

    try {
      IndexReader ir = DirectoryReader.open(FSDirectory.open(path));
      ImageSearcher searcher = new GenericFastImageSearcher(maxHits, feature.getValueClass());

      // searching with a image file ...
      InputStream in = new ByteArrayInputStream(imageBytes);

      ImageSearchHits hits = searcher.search(in, ir);

      float score = 0.0F;
      for (int i = 0; i < hits.length(); i++) {
        score = hits.score(i);
        LirePictureSortable lp =
            new LirePictureSortable(
                UUID.fromString(hits.doc(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]),
                score,
                feature);

        /* check is its a picture was repeated by picture UUID */
        if (pictures.contains(lp)) {
          lp = pictures.get(pictures.indexOf(lp));
          lp.addDescriptor(feature, score);
          lp.addScore(score);
        } else {
          result.add(lp);
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    }

    return result;
  }
  private static void duelFieldDataGeoPoint(
      Random random,
      AtomicReaderContext context,
      IndexGeoPointFieldData left,
      IndexGeoPointFieldData right,
      Distance precision)
      throws Exception {
    AtomicGeoPointFieldData leftData =
        random.nextBoolean() ? left.load(context) : left.loadDirect(context);
    AtomicGeoPointFieldData rightData =
        random.nextBoolean() ? right.load(context) : right.loadDirect(context);

    int numDocs = context.reader().maxDoc();
    MultiGeoPointValues leftValues = leftData.getGeoPointValues();
    MultiGeoPointValues rightValues = rightData.getGeoPointValues();
    for (int i = 0; i < numDocs; ++i) {
      leftValues.setDocument(i);
      final int numValues = leftValues.count();
      rightValues.setDocument(i);
      ;
      assertEquals(numValues, rightValues.count());
      List<GeoPoint> leftPoints = Lists.newArrayList();
      List<GeoPoint> rightPoints = Lists.newArrayList();
      for (int j = 0; j < numValues; ++j) {
        GeoPoint l = leftValues.valueAt(j);
        leftPoints.add(new GeoPoint(l.getLat(), l.getLon()));
        GeoPoint r = rightValues.valueAt(j);
        rightPoints.add(new GeoPoint(r.getLat(), r.getLon()));
      }
      for (GeoPoint l : leftPoints) {
        assertTrue(
            "Couldn't find " + l + " among " + rightPoints, contains(l, rightPoints, precision));
      }
      for (GeoPoint r : rightPoints) {
        assertTrue(
            "Couldn't find " + r + " among " + leftPoints, contains(r, leftPoints, precision));
      }
    }
  }
예제 #5
0
  @Test
  public void testRecoveryDiff() throws IOException, InterruptedException {
    int numDocs = 2 + random().nextInt(100);
    List<Document> docs = new ArrayList<>();
    for (int i = 0; i < numDocs; i++) {
      Document doc = new Document();
      doc.add(
          new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new TextField(
              "body",
              TestUtil.randomRealisticUnicodeString(random()),
              random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
      doc.add(
          new SortedDocValuesField(
              "dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
      docs.add(doc);
    }
    long seed = random().nextLong();
    Store.MetadataSnapshot first;
    {
      Random random = new Random(seed);
      IndexWriterConfig iwc =
          new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
      iwc.setMergePolicy(NoMergePolicy.INSTANCE);
      iwc.setUseCompoundFile(random.nextBoolean());
      iwc.setMaxThreadStates(1);
      final ShardId shardId = new ShardId(new Index("index"), 1);
      DirectoryService directoryService = new LuceneManagedDirectoryService(random);
      Store store =
          new Store(
              shardId,
              ImmutableSettings.EMPTY,
              directoryService,
              randomDistributor(random, directoryService),
              new DummyShardLock(shardId));
      IndexWriter writer = new IndexWriter(store.directory(), iwc);
      final boolean lotsOfSegments = rarely(random);
      for (Document d : docs) {
        writer.addDocument(d);
        if (lotsOfSegments && random.nextBoolean()) {
          writer.commit();
        } else if (rarely(random)) {
          writer.commit();
        }
      }
      writer.commit();
      writer.close();
      first = store.getMetadata();
      assertDeleteContent(store, directoryService);
      store.close();
    }
    long time = new Date().getTime();
    while (time == new Date().getTime()) {
      Thread.sleep(10); // bump the time
    }
    Store.MetadataSnapshot second;
    Store store;
    {
      Random random = new Random(seed);
      IndexWriterConfig iwc =
          new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
      iwc.setMergePolicy(NoMergePolicy.INSTANCE);
      iwc.setUseCompoundFile(random.nextBoolean());
      iwc.setMaxThreadStates(1);
      final ShardId shardId = new ShardId(new Index("index"), 1);
      DirectoryService directoryService = new LuceneManagedDirectoryService(random);
      store =
          new Store(
              shardId,
              ImmutableSettings.EMPTY,
              directoryService,
              randomDistributor(random, directoryService),
              new DummyShardLock(shardId));
      IndexWriter writer = new IndexWriter(store.directory(), iwc);
      final boolean lotsOfSegments = rarely(random);
      for (Document d : docs) {
        writer.addDocument(d);
        if (lotsOfSegments && random.nextBoolean()) {
          writer.commit();
        } else if (rarely(random)) {
          writer.commit();
        }
      }
      writer.commit();
      writer.close();
      second = store.getMetadata();
    }
    Store.RecoveryDiff diff = first.recoveryDiff(second);
    assertThat(first.size(), equalTo(second.size()));
    for (StoreFileMetaData md : first) {
      assertThat(second.get(md.name()), notNullValue());
      // si files are different - containing timestamps etc
      assertThat(second.get(md.name()).isSame(md), equalTo(false));
    }
    assertThat(diff.different.size(), equalTo(first.size()));
    assertThat(
        diff.identical.size(),
        equalTo(0)); // in lucene 5 nothing is identical - we use random ids in file headers
    assertThat(diff.missing, empty());

    // check the self diff
    Store.RecoveryDiff selfDiff = first.recoveryDiff(first);
    assertThat(selfDiff.identical.size(), equalTo(first.size()));
    assertThat(selfDiff.different, empty());
    assertThat(selfDiff.missing, empty());

    // lets add some deletes
    Random random = new Random(seed);
    IndexWriterConfig iwc =
        new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setUseCompoundFile(random.nextBoolean());
    iwc.setMaxThreadStates(1);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    IndexWriter writer = new IndexWriter(store.directory(), iwc);
    writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs))));
    writer.commit();
    writer.close();
    Store.MetadataSnapshot metadata = store.getMetadata();
    StoreFileMetaData delFile = null;
    for (StoreFileMetaData md : metadata) {
      if (md.name().endsWith(".liv")) {
        delFile = md;
        break;
      }
    }
    Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second);
    if (delFile != null) {
      assertThat(
          afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file
      assertThat(afterDeleteDiff.different.size(), equalTo(0));
      assertThat(afterDeleteDiff.missing.size(), equalTo(2));
    } else {
      // an entire segment must be missing (single doc segment got dropped)
      assertThat(afterDeleteDiff.identical.size(), greaterThan(0));
      assertThat(afterDeleteDiff.different.size(), equalTo(0));
      assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different
    }

    // check the self diff
    selfDiff = metadata.recoveryDiff(metadata);
    assertThat(selfDiff.identical.size(), equalTo(metadata.size()));
    assertThat(selfDiff.different, empty());
    assertThat(selfDiff.missing, empty());

    // add a new commit
    iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(actualDefaultCodec());
    iwc.setMergePolicy(NoMergePolicy.INSTANCE);
    iwc.setUseCompoundFile(
        true); // force CFS - easier to test here since we know it will add 3 files
    iwc.setMaxThreadStates(1);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    writer = new IndexWriter(store.directory(), iwc);
    writer.addDocument(docs.get(0));
    writer.close();

    Store.MetadataSnapshot newCommitMetaData = store.getMetadata();
    Store.RecoveryDiff newCommitDiff = newCommitMetaData.recoveryDiff(metadata);
    if (delFile != null) {
      assertThat(
          newCommitDiff.identical.size(),
          equalTo(
              newCommitMetaData.size()
                  - 5)); // segments_N, del file, cfs, cfe, si for the new segment
      assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different
      assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv"));
      assertThat(
          newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment
    } else {
      assertThat(
          newCommitDiff.identical.size(),
          equalTo(newCommitMetaData.size() - 4)); // segments_N, cfs, cfe, si for the new segment
      assertThat(newCommitDiff.different.size(), equalTo(0));
      assertThat(
          newCommitDiff.missing.size(),
          equalTo(
              4)); // an entire segment must be missing (single doc segment got dropped)  plus the
                   // commit is different
    }

    store.deleteContent();
    IOUtils.close(store);
  }