Ejemplo n.º 1
0
  /**
   * Default constructor
   *
   * @param indexDir : The root directory to be sued for indexing
   */
  public IndexWriter(String indexDir) {

    if (factory == null) {
      factory = AnalyzerFactory.getInstance();
      tizer = new Tokenizer();
    }

    termIdx = new HashMap<String, List<String>>(50000);
    authorIdx = new HashMap<String, List<String>>(1000);
    categoryIdx = new HashMap<String, List<String>>(100);
    placeIdx = new HashMap<String, List<String>>(1000);
    idxDir = indexDir;
    seenDocs = new HashSet<Integer>();
  }
Ejemplo n.º 2
0
  /**
   * Method to add the given Document to the index This method should take care of reading the filed
   * values, passing them through corresponding analyzers and then indexing the results for each
   * indexable field within the document.
   *
   * @param d : The Document to be added
   * @throws IndexerException : In case any error occurs
   */
  public void addDocument(Document d) throws IndexerException {
    String[] values;
    TokenStream stream;
    Analyzer analyzer;
    String fileId = d.getField(FieldNames.FILEID)[0];
    int fid = Integer.parseInt(fileId);
    authorOrg = null;

    try {
      if (seenDocs.contains(fid)) {
        stream = tizer.consume(d.getField(FieldNames.CATEGORY)[0]);
        addToIndex(stream, FieldNames.CATEGORY, fileId); // only add to category idx

      } else {
        for (FieldNames fn : FieldNames.values()) {
          if (fn == FieldNames.FILEID) {
            continue;
          } else if (fn == FieldNames.AUTHOR) {
            values = d.getField(FieldNames.AUTHORORG);
            if (values != null) {
              authorOrg = values[0];
            }
          }

          values = d.getField(fn);

          if (values != null) {
            for (String v : values) {
              stream = tizer.consume(v);
              analyzer = factory.getAnalyzerForField(fn, stream);

              if (analyzer != null) {
                while (analyzer.increment()) {}

                stream = analyzer.getStream();
              }

              addToIndex(stream, fn, fileId);
            }
          }
        }

        seenDocs.add(fid);
      }
    } catch (TokenizerException e) {

    }
  }