예제 #1
0
 /** @see IndexReader#getFieldNames(boolean) */
 public Collection getFieldNames(boolean indexed) throws IOException {
   // maintain a unique set of field names
   Set fieldSet = new HashSet();
   for (int i = 0; i < fieldInfos.size(); i++) {
     FieldInfo fi = fieldInfos.fieldInfo(i);
     if (fi.isIndexed == indexed) fieldSet.add(fi.name);
   }
   return fieldSet;
 }
예제 #2
0
 private final void openNorms(Directory cfsDir) throws IOException {
   for (int i = 0; i < fieldInfos.size(); i++) {
     FieldInfo fi = fieldInfos.fieldInfo(i);
     if (fi.isIndexed) {
       String fileName = segment + ".f" + fi.number;
       // look first for re-written file, then in compound format
       Directory d = directory().fileExists(fileName) ? directory() : cfsDir;
       norms.put(fi.name, new Norm(d.openFile(fileName), fi.number));
     }
   }
 }
예제 #3
0
 /**
  * @param storedTermVector if true, returns only Indexed fields that have term vector info, else
  *     only indexed fields without term vector info
  * @return Collection of Strings indicating the names of the fields
  */
 public Collection getIndexedFieldNames(boolean storedTermVector) {
   // maintain a unique set of field names
   Set fieldSet = new HashSet();
   for (int i = 0; i < fieldInfos.size(); i++) {
     FieldInfo fi = fieldInfos.fieldInfo(i);
     if (fi.isIndexed == true && fi.storeTermVector == storedTermVector) {
       fieldSet.add(fi.name);
     }
   }
   return fieldSet;
 }
예제 #4
0
  private void initialize(SegmentInfo si) throws IOException {
    segment = si.name;

    // Use compound file directory for some files, if it exists
    Directory cfsDir = directory();
    if (directory().fileExists(segment + ".cfs")) {
      cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
      cfsDir = cfsReader;
    }

    // No compound file exists - use the multi-file format
    fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
    fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

    tis = new TermInfosReader(cfsDir, segment, fieldInfos);

    // NOTE: the bitvector is stored using the regular directory, not cfs
    if (hasDeletions(si)) deletedDocs = new BitVector(directory(), segment + ".del");

    // make sure that all index files have been read or are kept open
    // so that if an index update removes them we'll still have them
    freqStream = cfsDir.openFile(segment + ".frq");
    proxStream = cfsDir.openFile(segment + ".prx");
    openNorms(cfsDir);

    if (fieldInfos.hasVectors()) { // open term vector files only as needed
      termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
    }
  }
예제 #5
0
  /**
   * Return a term frequency vector for the specified document and field. The vector returned
   * contains term numbers and frequencies for all terms in the specified field of this document, if
   * the field had storeTermVector flag set. If the flag was not set, the method returns null.
   */
  public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
    // Check if this field is invalid or has no stored term vector
    FieldInfo fi = fieldInfos.fieldInfo(field);
    if (fi == null || !fi.storeTermVector) return null;

    return termVectorsReader.get(docNumber, field);
  }
예제 #6
0
 /**
  * Start processing a field. This can be followed by a number of calls to addTerm, and a final
  * call to closeField to indicate the end of processing of this field. If a field was previously
  * open, it is closed automatically.
  */
 public final void openField(String field) throws IOException {
   FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
   openField(
       fieldInfo.number,
       fieldInfo.storePositionWithTermVector,
       fieldInfo.storeOffsetWithTermVector);
 }
예제 #7
0
  /**
   * Add a complete document specified by all its term vectors. If document has no term vectors, add
   * value for tvx.
   *
   * @param vectors
   * @throws IOException
   */
  public final void addAllDocVectors(TermFreqVector[] vectors) throws IOException {
    openDocument();

    if (vectors != null) {
      for (int i = 0; i < vectors.length; i++) {
        boolean storePositionWithTermVector = false;
        boolean storeOffsetWithTermVector = false;

        try {

          TermPositionVector tpVector = (TermPositionVector) vectors[i];

          if (tpVector.size() > 0 && tpVector.getTermPositions(0) != null)
            storePositionWithTermVector = true;
          if (tpVector.size() > 0 && tpVector.getOffsets(0) != null)
            storeOffsetWithTermVector = true;

          FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField());
          openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);

          for (int j = 0; j < tpVector.size(); j++)
            addTermInternal(
                tpVector.getTerms()[j],
                tpVector.getTermFrequencies()[j],
                tpVector.getTermPositions(j),
                tpVector.getOffsets(j));

          closeField();

        } catch (ClassCastException ignore) {

          TermFreqVector tfVector = vectors[i];

          FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField());
          openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);

          for (int j = 0; j < tfVector.size(); j++)
            addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null);

          closeField();
        }
      }
    }

    closeDocument();
  }
예제 #8
0
  final Vector files() throws IOException {
    Vector files = new Vector(16);
    final String ext[] =
        new String[] {
          "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del", "tvx", "tvd", "tvf", "tvp"
        };

    for (int i = 0; i < ext.length; i++) {
      String name = segment + "." + ext[i];
      if (directory().fileExists(name)) files.addElement(name);
    }

    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed) files.addElement(segment + ".f" + i);
    }
    return files;
  }
예제 #9
0
  public TermVectorsWriter(Directory directory, String segment, FieldInfos fieldInfos)
      throws IOException {
    // Open files for TermVector storage
    tvx = directory.createOutput(segment + TVX_EXTENSION);
    tvx.writeInt(FORMAT_VERSION);
    tvd = directory.createOutput(segment + TVD_EXTENSION);
    tvd.writeInt(FORMAT_VERSION);
    tvf = directory.createOutput(segment + TVF_EXTENSION);
    tvf.writeInt(FORMAT_VERSION);

    this.fieldInfos = fieldInfos;
    fields = new Vector(fieldInfos.size());
    terms = new Vector();
  }
예제 #10
0
  final Document doc(int n) throws IOException {
    indexStream.seek(n * 8L);
    long position = indexStream.readLong();
    fieldsStream.seek(position);

    Document doc = new Document();
    int numFields = fieldsStream.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = fieldsStream.readVInt();
      FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);

      byte bits = fieldsStream.readByte();

      doc.add(
          new Field(
              fi.name, // name
              fieldsStream.readString(), // read value
              true, // stored
              fi.isIndexed, // indexed
              (bits & 1) != 0)); // tokenized
    }

    return doc;
  }