public static Document Document(File f) throws IOException, InterruptedException { // make a new, empty document Document doc = new Document(); // Add the url as a field named "url". Use an UnIndexed field, so // that the url is just stored with the document, but is not searchable. doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/'))); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.add(Field.Keyword("modified", DateField.timeToString(f.lastModified()))); // Add the uid as a field, so that index can be incrementally maintained. // This field is not stored with document, it is indexed, but it is not // tokenized prior to indexing. doc.add(new Field("uid", uid(f), false, true, false)); HTMLParser parser = new HTMLParser(f); // Add the tag-stripped contents as a Reader-valued Text field so it will // get tokenized and indexed. doc.add(Field.Text("contents", parser.getReader())); // Add the summary as an UnIndexed field, so that it is stored and returned // with hit documents for display. doc.add(Field.UnIndexed("summary", parser.getSummary())); // Add the title as a separate Text field, so that it can be searched // separately. doc.add(Field.Text("title", parser.getTitle())); // return the document return doc; }
/** for testing DateTools support */ private String getDate(Date d, DateTools.Resolution resolution) throws Exception { if (resolution == null) { return DateField.dateToString(d); } else { return DateTools.dateToString(d, resolution); } }
public static String uid(File f) { // Append path and date into a string in such a way that lexicographic // sorting gives the same results as a walk of the file hierarchy. Thus // null (\u0000) is used both to separate directory components and to // separate the path from the date. return f.getPath().replace(dirSep, '\u0000') + "\u0000" + DateField.timeToString(f.lastModified()); }
/** for testing legacy DateField support */ public void testLegacyDateRange() throws Exception { String startDate = getLocalizedDate(2002, 1, 1, false); String endDate = getLocalizedDate(2002, 1, 4, false); Calendar endDateExpected = new GregorianCalendar(); endDateExpected.set(2002, 1, 4, 23, 59, 59); endDateExpected.set(Calendar.MILLISECOND, 999); assertQueryEquals( "[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, "[" + getLegacyDate(startDate) + " TO " + DateField.dateToString(endDateExpected.getTime()) + "]"); assertQueryEquals( "{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null, "{" + getLegacyDate(startDate) + " TO " + getLegacyDate(endDate) + "}"); }
private static void addDateDoc( String content, int year, int month, int day, int hour, int minute, int second, IndexWriter iw) throws IOException { Document d = new Document(); d.add(new Field("f", content, Field.Store.YES, Field.Index.ANALYZED)); Calendar cal = Calendar.getInstance(Locale.ENGLISH); cal.set(year, month - 1, day, hour, minute, second); d.add( new Field( "date", DateField.dateToString(cal.getTime()), Field.Store.YES, Field.Index.NOT_ANALYZED)); iw.addDocument(d); }
/** for testing legacy DateField support */ private String getLegacyDate(String s) throws Exception { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); return DateField.dateToString(df.parse(s)); }