/** @see IndexReader#getFieldNames(boolean) */ public Collection getFieldNames(boolean indexed) throws IOException { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed == indexed) fieldSet.add(fi.name); } return fieldSet; }
private final void openNorms(Directory cfsDir) throws IOException { for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed) { String fileName = segment + ".f" + fi.number; // look first for re-written file, then in compound format Directory d = directory().fileExists(fileName) ? directory() : cfsDir; norms.put(fi.name, new Norm(d.openFile(fileName), fi.number)); } } }
/** * @param storedTermVector if true, returns only Indexed fields that have term vector info, else * only indexed fields without term vector info * @return Collection of Strings indicating the names of the fields */ public Collection getIndexedFieldNames(boolean storedTermVector) { // maintain a unique set of field names Set fieldSet = new HashSet(); for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed == true && fi.storeTermVector == storedTermVector) { fieldSet.add(fi.name); } } return fieldSet; }
private void initialize(SegmentInfo si) throws IOException { segment = si.name; // Use compound file directory for some files, if it exists Directory cfsDir = directory(); if (directory().fileExists(segment + ".cfs")) { cfsReader = new CompoundFileReader(directory(), segment + ".cfs"); cfsDir = cfsReader; } // No compound file exists - use the multi-file format fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos); tis = new TermInfosReader(cfsDir, segment, fieldInfos); // NOTE: the bitvector is stored using the regular directory, not cfs if (hasDeletions(si)) deletedDocs = new BitVector(directory(), segment + ".del"); // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them freqStream = cfsDir.openFile(segment + ".frq"); proxStream = cfsDir.openFile(segment + ".prx"); openNorms(cfsDir); if (fieldInfos.hasVectors()) { // open term vector files only as needed termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos); } }
/** * Return a term frequency vector for the specified document and field. The vector returned * contains term numbers and frequencies for all terms in the specified field of this document, if * the field had storeTermVector flag set. If the flag was not set, the method returns null. */ public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { // Check if this field is invalid or has no stored term vector FieldInfo fi = fieldInfos.fieldInfo(field); if (fi == null || !fi.storeTermVector) return null; return termVectorsReader.get(docNumber, field); }
/** * Start processing a field. This can be followed by a number of calls to addTerm, and a final * call to closeField to indicate the end of processing of this field. If a field was previously * open, it is closed automatically. */ public final void openField(String field) throws IOException { FieldInfo fieldInfo = fieldInfos.fieldInfo(field); openField( fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector); }
/** * Add a complete document specified by all its term vectors. If document has no term vectors, add * value for tvx. * * @param vectors * @throws IOException */ public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException { openDocument(); if (vectors != null) { for (int i = 0; i < vectors.length; i++) { boolean storePositionWithTermVector = false; boolean storeOffsetWithTermVector = false; try { TermPositionVector tpVector = (TermPositionVector) vectors[i]; if (tpVector.size() > 0 && tpVector.getTermPositions(0) != null) storePositionWithTermVector = true; if (tpVector.size() > 0 && tpVector.getOffsets(0) != null) storeOffsetWithTermVector = true; FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField()); openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tpVector.size(); j++) addTermInternal( tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j), tpVector.getOffsets(j)); closeField(); } catch (ClassCastException ignore) { TermFreqVector tfVector = vectors[i]; FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField()); openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector); for (int j = 0; j < tfVector.size(); j++) addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null); closeField(); } } } closeDocument(); }
final Vector files() throws IOException { Vector files = new Vector(16); final String ext[] = new String[] { "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp" }; for (int i = 0; i < ext.length; i++) { String name = segment + "." + ext[i]; if (directory().fileExists(name)) files.addElement(name); } for (int i = 0; i < fieldInfos.size(); i++) { FieldInfo fi = fieldInfos.fieldInfo(i); if (fi.isIndexed) files.addElement(segment + ".f" + i); } return files; }
public TermVectorsWriter(Directory directory, String segment, FieldInfos fieldInfos) throws IOException { // Open files for TermVector storage tvx = directory.createOutput(segment + TVX_EXTENSION); tvx.writeInt(FORMAT_VERSION); tvd = directory.createOutput(segment + TVD_EXTENSION); tvd.writeInt(FORMAT_VERSION); tvf = directory.createOutput(segment + TVF_EXTENSION); tvf.writeInt(FORMAT_VERSION); this.fieldInfos = fieldInfos; fields = new Vector(fieldInfos.size()); terms = new Vector(); }
final Document doc(int n) throws IOException { indexStream.seek(n * 8L); long position = indexStream.readLong(); fieldsStream.seek(position); Document doc = new Document(); int numFields = fieldsStream.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); byte bits = fieldsStream.readByte(); doc.add( new Field( fi.name, // name fieldsStream.readString(), // read value true, // stored fi.isIndexed, // indexed (bits & 1) != 0)); // tokenized } return doc; }