/** * Gets the number. * * @param field the field * @param number the number * @return the number */ public static NumericField getNumber(String field, String number) { NumericField numericField = new NumericField(field, Field.Store.YES, true); numericField.setLongValue(GetterUtil.getLong(number)); return numericField; }
public SearchItem toResult(int documentId) throws IOException { Document document = searcher.doc(documentId); String type = document.getFieldable(FieldNames.TYPE).stringValue(); NumericField date = (NumericField) document.getFieldable(FieldNames.DATE); Fieldable path = document.getFieldable(FieldNames.PATH); NumericField version = (NumericField) document.getFieldable(FieldNames.VERSION); return new SearchItem( Integer.parseInt(type), path.stringValue(), (version != null) ? version.getNumericValue().intValue() : -1, new Date(date.getNumericValue().longValue())); }
@Before public void init() throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriter indexWriter = getWriter(); for (int i = 0; i < ids.length; i++) { Document doc = new Document(); doc.add(new Field("ids", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("titles", titles[i], Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("titles2", titles[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add( new Field( "contents", contents[i], Field.Store.YES, Field.Index.ANALYZED, TermVector.YES)); NumericField numField = new NumericField("price", Field.Store.YES, true); numField.setIntValue(prices[i]); doc.add(numField); indexWriter.addDocument(doc); } // indexWriter.commit(); indexWriter.close(); }
@Override public Fieldable createField(SchemaField field, String externalVal, float boost) { boolean indexed = field.indexed(); boolean stored = field.stored(); if (!indexed && !stored) { if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field); return null; } final NumericField f = new NumericField( field.getName(), precisionStep, stored ? Field.Store.YES : Field.Store.NO, indexed); switch (type) { case INTEGER: f.setIntValue(Integer.parseInt(externalVal)); break; case FLOAT: f.setFloatValue(Float.parseFloat(externalVal)); break; case LONG: f.setLongValue(Long.parseLong(externalVal)); break; case DOUBLE: f.setDoubleValue(Double.parseDouble(externalVal)); break; case DATE: f.setLongValue(dateField.parseMath(null, externalVal).getTime()); break; default: throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type); } f.setOmitNorms(field.omitNorms()); f.setIndexOptions(getIndexOptions(field, externalVal)); f.setBoost(boost); return f; }
// create a doc // use only part of the body, modify it to keep the rest (or use all if size==0). // reset the docdata properties so they are not added more than once. private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException { final DocState ds = getDocState(); final Document doc = reuseFields ? ds.doc : new Document(); doc.getFields().clear(); // Set ID_FIELD Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal); int id; if (r != null) { id = r.nextInt(updateDocIDLimit); } else { id = docData.getID(); if (id == -1) { id = numDocsCreated.getAndIncrement(); } } idField.setValue(Integer.toString(id)); doc.add(idField); // Set NAME_FIELD String name = docData.getName(); if (name == null) name = ""; name = cnt < 0 ? name : name + "_" + cnt; Field nameField = ds.getField(NAME_FIELD, storeVal, indexVal, termVecVal); nameField.setValue(name); doc.add(nameField); // Set DATE_FIELD DateUtil util = dateParsers.get(); if (util == null) { util = new DateUtil(); dateParsers.set(util); } Date date = null; String dateString = docData.getDate(); if (dateString != null) { util.pos.setIndex(0); date = util.parser.parse(dateString, util.pos); // System.out.println(dateString + " parsed to " + date); } else { dateString = ""; } Field dateStringField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal); dateStringField.setValue(dateString); doc.add(dateStringField); if (date == null) { // just set to right now date = new Date(); } NumericField dateField = ds.getNumericField(DATE_MSEC_FIELD); dateField.setLongValue(date.getTime()); doc.add(dateField); util.cal.setTime(date); final int sec = util.cal.get(Calendar.HOUR_OF_DAY) * 3600 + util.cal.get(Calendar.MINUTE) * 60 + util.cal.get(Calendar.SECOND); NumericField timeSecField = ds.getNumericField(TIME_SEC_FIELD); timeSecField.setIntValue(sec); doc.add(timeSecField); // Set TITLE_FIELD String title = docData.getTitle(); Field titleField = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal); titleField.setValue(title == null ? "" : title); doc.add(titleField); String body = docData.getBody(); if (body != null && body.length() > 0) { String bdy; if (size <= 0 || size >= body.length()) { bdy = body; // use all docData.setBody(""); // nothing left } else { // attempt not to break words - if whitespace found within next 20 chars... for (int n = size - 1; n < size + 20 && n < body.length(); n++) { if (Character.isWhitespace(body.charAt(n))) { size = n; break; } } bdy = body.substring(0, size); // use part docData.setBody(body.substring(size)); // some left } Field bodyField = ds.getField(BODY_FIELD, bodyStoreVal, bodyIndexVal, termVecVal); bodyField.setValue(bdy); doc.add(bodyField); if (storeBytes) { Field bytesField = ds.getField(BYTES_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO); bytesField.setValue(bdy.getBytes("UTF-8")); doc.add(bytesField); } } if (indexProperties) { Properties props = docData.getProps(); if (props != null) { for (final Map.Entry<Object, Object> entry : props.entrySet()) { Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal); f.setValue((String) entry.getValue()); doc.add(f); } docData.setProps(null); } } // System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n=========="); return doc; }
static { try { // set the theoretical maximum term count for 8bit (see docs for the number) BooleanQuery.setMaxClauseCount(7 * 255 * 2 + 255); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, MaxFieldLength.UNLIMITED); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), field6 = new NumericField("field6", 6, Field.Store.YES, true), field4 = new NumericField("field4", 4, Field.Store.YES, true), field2 = new NumericField("field2", 2, Field.Store.YES, true), fieldNoTrie = new NumericField( "field" + Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true), ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true), ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct // splitting of range and inclusive/exclusive doc.add(ascfield8); doc.add(ascfield6); doc.add(ascfield4); doc.add(ascfield2); // Add a series of noDocs docs with increasing long values, by updating the fields for (int l = 0; l < noDocs; l++) { long val = distance * l + startOffset; field8.setLongValue(val); field6.setLongValue(val); field4.setLongValue(val); field2.setLongValue(val); fieldNoTrie.setLongValue(val); val = l - (noDocs / 2); ascfield8.setLongValue(val); ascfield6.setLongValue(val); ascfield4.setLongValue(val); ascfield2.setLongValue(val); writer.addDocument(doc); } writer.optimize(); writer.close(); searcher = new IndexSearcher(directory, true); } catch (Exception e) { throw new Error(e); } }
/** * Indexes the given file using the given writer, or if a directory is given, recurses over files * and directories found under the given directory. * * <p>NOTE: This method indexes one document per input file. This is slow. For good throughput, * put multiple documents into your input file(s). An example of this is in the benchmark module, * which can create "line doc" files, one document per line, using the <a * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" // message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
@BeforeClass public static void beforeClass() throws Exception { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter( random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); NumericField field8 = new NumericField("field8", 8, Field.Store.YES, true), field6 = new NumericField("field6", 6, Field.Store.YES, true), field4 = new NumericField("field4", 4, Field.Store.YES, true), field2 = new NumericField("field2", 2, Field.Store.YES, true), fieldNoTrie = new NumericField("field" + Integer.MAX_VALUE, Integer.MAX_VALUE, Field.Store.YES, true), ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true), ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); Document doc = new Document(); // add fields, that have a distance to test general functionality doc.add(field8); doc.add(field6); doc.add(field4); doc.add(field2); doc.add(fieldNoTrie); // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct // splitting of range and inclusive/exclusive doc.add(ascfield8); doc.add(ascfield6); doc.add(ascfield4); doc.add(ascfield2); // Add a series of noDocs docs with increasing long values, by updating the fields for (int l = 0; l < noDocs; l++) { long val = distance * l + startOffset; field8.setLongValue(val); field6.setLongValue(val); field4.setLongValue(val); field2.setLongValue(val); fieldNoTrie.setLongValue(val); val = l - (noDocs / 2); ascfield8.setLongValue(val); ascfield6.setLongValue(val); ascfield4.setLongValue(val); ascfield2.setLongValue(val); writer.addDocument(doc); } reader = writer.getReader(); searcher = newSearcher(reader); writer.close(); }
/** * Create Field instances, according to the attribute configurations inside the fieldConfig * parameter * * @param strAttName the attributes name * @param strAttValue the attributes value * @param fieldConfig the field configuration. Here you can specify whether a specific field * should be analyzed, etc. You can also set default values. * @return the field, with Store, Index and TermVector configuration as given in fieldConfig. Null * in the case the field could'nt generated */ public static AbstractField createField( String strAttName, String strAttValue, FieldConfig fieldConfig) { // Der Store Store store = fieldConfig.defaultFieldMapping.store; FieldMapping fieldMapping4Att = fieldConfig.hsFieldName2FieldMapping.get(strAttName); Index index = fieldMapping4Att != null ? fieldMapping4Att.index : null; if (index == null) index = fieldConfig.defaultFieldMapping.index; TermVector termVector = fieldMapping4Att != null ? fieldMapping4Att.termVector : null; if (termVector == null) termVector = fieldConfig.defaultFieldMapping.termVector; // welches Field erzeugt wird, steht ebenfalls in der config FieldType fieldType = fieldMapping4Att != null ? fieldMapping4Att.fieldType : null; if (fieldType == null) fieldType = fieldConfig.defaultFieldMapping.fieldType; AbstractField newField = null; if (strAttValue == null) strAttValue = ""; if (FieldType.STRING.equals(fieldType)) { newField = new Field(strAttName, strAttValue, store, index, termVector); } else if (StringUtils.nullOrWhitespace(strAttValue)) { // wir können keine leeren numericValues eintragen - wir probieren, ob man string und number // mit einem attributnamen mischen kann^^ // scheint zu gehen // wenn das value leer ist und wir ein numerisches Field haben, dann indexieren wir dieses // value nicht - ansonsten gibt es // Probleme beim Sortieren nach diesem numerischen Field newField = new Field(strAttName, strAttValue, store, Index.NO, TermVector.NO); } else if (m_llNumberTypes.contains(fieldType)) { boolean bIndex = false; if (index == Index.ANALYZED || index == Index.ANALYZED_NO_NORMS) bIndex = true; newField = new NumericField(strAttName, store, bIndex); if (FieldType.INTEGER.equals(fieldType)) ((NumericField) newField).setIntValue(Integer.parseInt(strAttValue)); else if (FieldType.LONG.equals(fieldType)) ((NumericField) newField).setLongValue(Long.parseLong(strAttValue)); else if (FieldType.FLOAT.equals(fieldType)) ((NumericField) newField).setFloatValue(Float.parseFloat(strAttValue)); else if (FieldType.DOUBLE.equals(fieldType)) ((NumericField) newField).setDoubleValue(Double.parseDouble(strAttValue)); } else if (FieldType.DATE.equals(fieldType)) { boolean bIndex = false; if (index == Index.ANALYZED || index == Index.ANALYZED_NO_NORMS) bIndex = true; newField = new NumericField(strAttName, store, bIndex); Date parsedDate = DateParser.parseDateString(strAttValue); if (parsedDate == null) { return null; } ((NumericField) newField).setLongValue(DateUtils.date2Number(parsedDate)); } else if (FieldType.TIME.equals(fieldType)) { boolean bIndex = false; if (index == Index.ANALYZED || index == Index.ANALYZED_NO_NORMS) bIndex = true; newField = new NumericField(strAttName, store, bIndex); Date parsedDate = DateParser.parseDateString(strAttValue); ((NumericField) newField) .setIntValue(Integer.parseInt(new SimpleDateFormat("HHmmssSSS").format(parsedDate))); } return newField; }