/** * Overwrite this method if you want to filter the input, apply hashing, etc. * * @param feature the current feature. * @param document the current document. * @param featureFieldName the field hashFunctionsFileName of the feature. */ protected void addToDocument(LireFeature feature, Document document, String featureFieldName) { if (run == 0) { } // just count documents else if (run == 1) { // Select the representatives ... if (representativesID.contains(docCount) && feature .getClass() .getCanonicalName() .equals(featureClass.getCanonicalName())) { // it's a representative. // put it into a temporary data structure ... representatives.add(feature); } } else if (run == 2) { // actual hashing: find the nearest representatives and put those as a hash into a // document. if (feature .getClass() .getCanonicalName() .equals(featureClass.getCanonicalName())) { // it's a feature to be hashed int[] hashes = getHashes(feature); document.add( new TextField( featureFieldName + "_hash", createDocumentString(hashes, hashes.length), Field.Store.YES)); document.add( new TextField( featureFieldName + "_hash_q", createDocumentString(hashes, 10), Field.Store.YES)); } document.add(new StoredField(featureFieldName, feature.getByteArrayRepresentation())); } }
/** * Reads data from a file and writes it to an index. * * @param indexWriter the index to write to. * @param inputFile the input data for the process. * @throws IOException * @throws InstantiationException * @throws IllegalAccessException * @throws ClassNotFoundException */ private void readFile(IndexWriter indexWriter, File inputFile) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException { BufferedInputStream in = new BufferedInputStream(new FileInputStream(inputFile)); byte[] tempInt = new byte[4]; int tmp, tmpFeature, count = 0; byte[] temp = new byte[100 * 1024]; // read file hashFunctionsFileName length: while (in.read(tempInt, 0, 4) > 0) { Document d = new Document(); tmp = SerializationUtils.toInt(tempInt); // read file hashFunctionsFileName: in.read(temp, 0, tmp); String filename = new String(temp, 0, tmp); // normalize Filename to full path. filename = inputFile .getCanonicalPath() .substring(0, inputFile.getCanonicalPath().lastIndexOf(inputFile.getName())) + filename; d.add(new StringField(DocumentBuilder.FIELD_NAME_IDENTIFIER, filename, Field.Store.YES)); // System.out.print(filename); while ((tmpFeature = in.read()) < 255) { // System.out.print(", " + tmpFeature); LireFeature f = (LireFeature) Class.forName(Extractor.features[tmpFeature]).newInstance(); // byte[] length ... in.read(tempInt, 0, 4); tmp = SerializationUtils.toInt(tempInt); // read feature byte[] in.read(temp, 0, tmp); f.setByteArrayRepresentation(temp, 0, tmp); addToDocument(f, d, Extractor.featureFieldNames[tmpFeature]); // d.add(new StoredField(Extractor.featureFieldNames[tmpFeature], // f.getByteArrayRepresentation())); } if (run == 2) indexWriter.addDocument(d); docCount++; // if (count%1000==0) System.out.print('.'); // if (count%10000==0) System.out.println(" " + count); } in.close(); }