/** * Overwrite this method if you want to filter the input, apply hashing, etc. * * @param feature the current feature. * @param document the current document. * @param featureFieldName the field hashFunctionsFileName of the feature. */ protected void addToDocument(LireFeature feature, Document document, String featureFieldName) { if (run == 0) { } // just count documents else if (run == 1) { // Select the representatives ... if (representativesID.contains(docCount) && feature .getClass() .getCanonicalName() .equals(featureClass.getCanonicalName())) { // it's a representative. // put it into a temporary data structure ... representatives.add(feature); } } else if (run == 2) { // actual hashing: find the nearest representatives and put those as a hash into a // document. if (feature .getClass() .getCanonicalName() .equals(featureClass.getCanonicalName())) { // it's a feature to be hashed int[] hashes = getHashes(feature); document.add( new TextField( featureFieldName + "_hash", createDocumentString(hashes, hashes.length), Field.Store.YES)); document.add( new TextField( featureFieldName + "_hash_q", createDocumentString(hashes, 10), Field.Store.YES)); } document.add(new StoredField(featureFieldName, feature.getByteArrayRepresentation())); } }
private int[] getHashes(LireFeature feature) { // result = new int[maximumHits]; hashingResultScoreDocs.clear(); maxDistance = 0f; tmpScore = 0f; int rep = 0; LireFeature tmpFeature; for (Iterator<LireFeature> iterator = representatives.iterator(); iterator.hasNext(); ) { tmpFeature = iterator.next(); tmpScore = tmpFeature.getDistance(feature); if (hashingResultScoreDocs.size() < maximumHits) { hashingResultScoreDocs.add(new SimpleResult(tmpScore, null, rep)); maxDistance = Math.max(maxDistance, tmpScore); } else if (tmpScore < maxDistance) { hashingResultScoreDocs.add(new SimpleResult(tmpScore, null, rep)); } while (hashingResultScoreDocs.size() > maximumHits) { hashingResultScoreDocs.remove(hashingResultScoreDocs.last()); maxDistance = hashingResultScoreDocs.last().getDistance(); } rep++; } rep = 0; for (Iterator<SimpleResult> iterator = hashingResultScoreDocs.iterator(); iterator.hasNext(); ) { SimpleResult next = iterator.next(); result[rep] = next.getIndexNumber(); rep++; } return result; }
protected TestConfig[] generateTestConfigs( int numberOfTests, TestDoc[] testDocs, TestFieldSetting[] fieldSettings) { ArrayList<TestConfig> configs = new ArrayList<TestConfig>(); for (int i = 0; i < numberOfTests; i++) { ArrayList<String> selectedFields = null; if (randomBoolean()) { // used field selection selectedFields = new ArrayList<String>(); if (randomBoolean()) { selectedFields.add("Doesnt_exist"); // this will be ignored. } for (TestFieldSetting field : fieldSettings) if (randomBoolean()) { selectedFields.add(field.name); } if (selectedFields.size() == 0) { selectedFields = null; // 0 length set is not supported. } } TestConfig config = new TestConfig( testDocs[randomInt(testDocs.length - 1)], selectedFields == null ? null : selectedFields.toArray(new String[] {}), randomBoolean(), randomBoolean(), randomBoolean()); configs.add(config); } // always adds a test that fails configs.add( new TestConfig( new TestDoc("doesnt_exist", new TestFieldSetting[] {}, new String[] {}) .index("doesn't_exist"), new String[] {"doesnt_exist"}, true, true, true) .expectedException(IndexMissingException.class)); refresh(); return configs.toArray(new TestConfig[] {}); }