/** * Default constructor * * @param indexDir : The root directory to be sued for indexing */ public IndexWriter(String indexDir) { if (factory == null) { factory = AnalyzerFactory.getInstance(); tizer = new Tokenizer(); } termIdx = new HashMap<String, List<String>>(50000); authorIdx = new HashMap<String, List<String>>(1000); categoryIdx = new HashMap<String, List<String>>(100); placeIdx = new HashMap<String, List<String>>(1000); idxDir = indexDir; seenDocs = new HashSet<Integer>(); }
/** * Method to add the given Document to the index This method should take care of reading the filed * values, passing them through corresponding analyzers and then indexing the results for each * indexable field within the document. * * @param d : The Document to be added * @throws IndexerException : In case any error occurs */ public void addDocument(Document d) throws IndexerException { String[] values; TokenStream stream; Analyzer analyzer; String fileId = d.getField(FieldNames.FILEID)[0]; int fid = Integer.parseInt(fileId); authorOrg = null; try { if (seenDocs.contains(fid)) { stream = tizer.consume(d.getField(FieldNames.CATEGORY)[0]); addToIndex(stream, FieldNames.CATEGORY, fileId); // only add to category idx } else { for (FieldNames fn : FieldNames.values()) { if (fn == FieldNames.FILEID) { continue; } else if (fn == FieldNames.AUTHOR) { values = d.getField(FieldNames.AUTHORORG); if (values != null) { authorOrg = values[0]; } } values = d.getField(fn); if (values != null) { for (String v : values) { stream = tizer.consume(v); analyzer = factory.getAnalyzerForField(fn, stream); if (analyzer != null) { while (analyzer.increment()) {} stream = analyzer.getStream(); } addToIndex(stream, fn, fileId); } } } seenDocs.add(fid); } } catch (TokenizerException e) { } }