private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); for (Object o : doc.getFields()) { Fieldable fieldable = (Fieldable) o; SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(fieldable.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(fieldable)); Term t = new Term( fieldable.name(), ftype != null ? ftype.storedToIndexed(fieldable) : fieldable.stringValue()); f.add("value", (ftype == null) ? null : ftype.toExternal(fieldable)); // TODO: this really should be "stored" f.add("internal", fieldable.stringValue()); // may be a binary number byte[] arr = fieldable.getBinaryValue(); if (arr != null) { f.add("binary", Base64.byteArrayToBase64(arr, 0, arr.length)); } f.add("boost", fieldable.getBoost()); f.add( "docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields // If we have a term vector, return that if (fieldable.isTermVectorStored()) { try { TermFreqVector v = reader.getTermFreqVector(docId, fieldable.name()); if (v != null) { SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>(); for (int i = 0; i < v.size(); i++) { tfv.add(v.getTerms()[i], v.getTermFrequencies()[i]); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(fieldable.name(), f); } return finfo; }
@Override public void validate(ParseContext context) throws MapperParsingException { String routing = context.sourceToParse().routing(); if (path != null && routing != null) { // we have a path, check if we can validate we have the same routing value as the one in the // doc... String value = null; Fieldable field = context.doc().getFieldable(path); if (field != null) { value = field.stringValue(); if (value == null) { // maybe its a numeric field... if (field instanceof NumberFieldMapper.CustomNumericField) { value = ((NumberFieldMapper.CustomNumericField) field).numericAsString(); } } } if (value == null) { value = context.ignoredValue(path); } if (value == null) { // maybe its a numeric field } if (!routing.equals(value)) { throw new MapperParsingException( "External routing [" + routing + "] and document path routing [" + value + "] mismatch"); } } }
public void doTest(int[] docs) throws Exception { Directory dir = makeIndex(); IndexReader reader = IndexReader.open(dir, true); for (int i = 0; i < docs.length; i++) { Document d = reader.document(docs[i], SELECTOR); d.get(MAGIC_FIELD); List<Fieldable> fields = d.getFields(); for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) { Fieldable f = null; try { f = fi.next(); String fname = f.name(); String fval = f.stringValue(); assertNotNull(docs[i] + " FIELD: " + fname, fval); String[] vals = fval.split("#"); if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) { fail("FIELD:" + fname + ",VAL:" + fval); } } catch (Exception e) { throw new Exception(docs[i] + " WTF: " + f.name(), e); } } } reader.close(); dir.close(); }
@Override public String decodeStored(String fieldName, Fieldable value) throws Exception { if (value.isBinary()) { return b.decodeStored(fieldName, value); } return decodeTerm(fieldName, value.stringValue()); }
@Override public boolean reload(String collectionName, int docNum) { if (collectionName == null) return false; CrescentCollectionHandler collectionHandler = SpringApplicationContext.getBean( "crescentCollectionHandler", CrescentCollectionHandler.class); CrescentCollection collection = collectionHandler.getCrescentCollections().getCrescentCollection(collectionName); if (collection == null) { logger.debug("doesn't Collection Info => {}", collectionName); return false; } List<String> fieldName = new ArrayList<String>(); List<String> flag = new ArrayList<String>(); List<String> norm = new ArrayList<String>(); List<String> value = new ArrayList<String>(); try { Directory directory = FSDirectory.open(new File(collection.getIndexingDirectory())); IndexReader reader = IndexReader.open(directory); Document document = null; try { document = reader.document(docNum); } catch (IllegalArgumentException e) { e.printStackTrace(); return false; } String fName = null; for (Fieldable field : document.getFields()) { fName = field.name(); fieldName.add(fName); flag.add(fieldFlag(field)); if (reader.hasNorms(fName)) { norm.add(String.valueOf(Similarity.decodeNorm(reader.norms(fName)[docNum]))); } else { norm.add("---"); } value.add(field.stringValue()); } } catch (IOException e) { e.printStackTrace(); return false; } result.put("collection", collectionName); result.put("docNum", docNum); result.put("fieldName", fieldName); result.put("flag", flag); result.put("norm", norm); result.put("value", value); return true; }
public SearchItem toResult(int documentId) throws IOException { Document document = searcher.doc(documentId); String type = document.getFieldable(FieldNames.TYPE).stringValue(); NumericField date = (NumericField) document.getFieldable(FieldNames.DATE); Fieldable path = document.getFieldable(FieldNames.PATH); NumericField version = (NumericField) document.getFieldable(FieldNames.VERSION); return new SearchItem( Integer.parseInt(type), path.stringValue(), (version != null) ? version.getNumericValue().intValue() : -1, new Date(date.getNumericValue().longValue())); }
@Override protected void onRecord(DiscoveredRecord record, Document document) { Map<String, List<String>> fieldMap = new HashMap<String, List<String>>(); for (Fieldable field : document.getFields()) { String name = field.name(); List<String> fieldValues = fieldMap.get(name); if (fieldValues == null) { fieldValues = new ArrayList<String>(); fieldMap.put(name, fieldValues); } fieldValues.add(field.stringValue()); } mapping.put(record, fieldMap); }
// escidoc:2120373 released item with 1 component; escidoc:2120374 text/plain (public) // has reference @Test public void testReleasedItem_2120373() throws Exception { indexer.indexItemsStart(new File(TEST_RESOURCES_OBJECTS + "escidoc_2120373")); indexer.finalizeIndex(); assertTrue( "Expected 1 found " + indexer.getIndexingReport().getFilesIndexingDone(), indexer.getIndexingReport().getFilesIndexingDone() == 1); assertTrue(indexer.getIndexingReport().getFilesErrorOccured() == 0); assertTrue(indexer.getIndexingReport().getFilesSkippedBecauseOfTime() == 0); assertTrue(indexer.getIndexingReport().getFilesSkippedBecauseOfStatusOrType() == 0); validator = new Validator(indexer); Map<String, Set<Fieldable>> fieldMap = validator.getFieldsOfDocument(); // one component has audience visibility, the other is of mime-type text/html (non supported for // text extraction) Set<Fieldable> fields = fieldMap.get(getFieldNameFor("stored_filename")); assertTrue(fields != null); assertTrue(fieldMap.get(getFieldNameFor("stored_filename")) != null); assertTrue(fieldMap.get(getFieldNameFor("stored_fulltext")) != null); Iterator<Fieldable> it = fieldMap.get(getFieldNameFor("stored_fulltext")).iterator(); boolean found = false; while (it.hasNext()) { Fieldable f = (Fieldable) it.next(); if (f.stringValue().contains("Book")) { found = true; break; } } assertTrue(found); validator.compareToReferenceIndex(); assertTrue( Arrays.toString(indexer.getIndexingReport().getErrorList().toArray()), indexer.getIndexingReport().getErrorList().size() == 0); }
// escidoc:2110541 item with 1 components (escidoc:2111415 internal, public visibility) // has reference @Test public void testItemWithVisibleComponent() throws Exception { indexer.indexItemsStart(new File(TEST_RESOURCES_OBJECTS + "escidoc_2110541")); indexer.finalizeIndex(); assertTrue( "Expected 1 Found " + indexer.getIndexingReport().getFilesIndexingDone(), indexer.getIndexingReport().getFilesIndexingDone() == 1); assertTrue(indexer.getIndexingReport().getFilesErrorOccured() == 0); assertTrue(indexer.getIndexingReport().getFilesIndexingDone() == 1); assertTrue(indexer.getIndexingReport().getFilesSkippedBecauseOfTime() == 0); assertTrue(indexer.getIndexingReport().getErrorList().size() == 0); validator = new Validator(indexer); validator.compareToReferenceIndex(); Map<String, Set<Fieldable>> fieldMap = validator.getFieldsOfDocument(); Set<Fieldable> fields = null; switch (indexer.currentIndexMode) { case LATEST_RELEASE: fields = fieldMap.get("xml_representation"); break; case LATEST_VERSION: fields = fieldMap.get("aa_xml_representation"); break; } assertTrue(fields != null); assertTrue(fields.size() == 1); Fieldable xml_representation = fields.iterator().next(); // check if checksum element exists assertTrue(xml_representation.stringValue().contains("checksum")); }
protected Taxon[] findTaxon(String fieldName1, String fieldValue) throws IOException { Taxon[] terms = new TaxonImpl[0]; if (StringUtils.isNotBlank(fieldValue) && indexSearcher != null) { PhraseQuery query = new PhraseQuery(); query.add(new Term(fieldName1, fieldValue)); int maxHits = 3; TopDocs docs = indexSearcher.search(query, maxHits); if (docs.totalHits > 0) { terms = new TaxonImpl[docs.totalHits]; for (int i = 0; i < docs.totalHits && i < maxHits; i++) { ScoreDoc scoreDoc = docs.scoreDocs[i]; Document foundDoc = indexSearcher.doc(scoreDoc.doc); Taxon term = new TaxonImpl(); Fieldable idField = foundDoc.getFieldable(FIELD_ID); if (idField != null) { term.setExternalId(idField.stringValue()); } Fieldable rankPathField = foundDoc.getFieldable(FIELD_RANK_PATH); if (rankPathField != null) { term.setPath(rankPathField.stringValue()); } Fieldable rankPathIdsField = foundDoc.getFieldable(FIELD_RANK_PATH_IDS); if (rankPathIdsField != null) { term.setPathIds(rankPathIdsField.stringValue()); } Fieldable rankPathNamesField = foundDoc.getFieldable(FIELD_RANK_PATH_NAMES); if (rankPathNamesField != null) { term.setPathNames(rankPathNamesField.stringValue()); } Fieldable commonNamesFields = foundDoc.getFieldable(FIELD_COMMON_NAMES); if (commonNamesFields != null) { term.setCommonNames(commonNamesFields.stringValue()); } Fieldable fieldName = foundDoc.getFieldable(FIELD_RECOMMENDED_NAME); if (fieldName != null) { term.setName(fieldName.stringValue()); } terms[i] = term; } } } return terms; }
@Override public String value(Fieldable field) { return field.stringValue(); }
@Override public Uid value(Fieldable field) { return Uid.createUid(field.stringValue()); }
public void parse(ParseContext context) throws IOException { if (!enabled) { context.parser().skipChildren(); return; } XContentParser parser = context.parser(); String currentFieldName = parser.currentName(); XContentParser.Token token = parser.currentToken(); if (token == XContentParser.Token.VALUE_NULL) { // the object is null ("obj1" : null), simply bail return; } Document restoreDoc = null; if (nested.isNested()) { Document nestedDoc = new Document(); // pre add the uid field if possible (id was already provided) Fieldable uidField = context.doc().getFieldable(UidFieldMapper.NAME); if (uidField != null) { // we don't need to add it as a full uid field in nested docs, since we don't need // versioning // we also rely on this for UidField#loadVersion // this is a deeply nested field if (uidField.stringValue() != null) { nestedDoc.add( new Field( UidFieldMapper.NAME, uidField.stringValue(), Field.Store.NO, Field.Index.NOT_ANALYZED)); } else { nestedDoc.add( new Field( UidFieldMapper.NAME, ((UidField) uidField).uid(), Field.Store.NO, Field.Index.NOT_ANALYZED)); } } // the type of the nested doc starts with __, so we can identify that its a nested one in // filters // note, we don't prefix it with the type of the doc since it allows us to execute a nested // query // across types (for example, with similar nested objects) nestedDoc.add( new Field( TypeFieldMapper.NAME, nestedTypePath, Field.Store.NO, Field.Index.NOT_ANALYZED)); restoreDoc = context.switchDoc(nestedDoc); context.addDoc(nestedDoc); } ContentPath.Type origPathType = context.path().pathType(); context.path().pathType(pathType); // if we are at the end of the previous object, advance if (token == XContentParser.Token.END_OBJECT) { token = parser.nextToken(); } if (token == XContentParser.Token.START_OBJECT) { // if we are just starting an OBJECT, advance, this is the object we are parsing, we need the // name first token = parser.nextToken(); } while (token != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.START_OBJECT) { serializeObject(context, currentFieldName); } else if (token == XContentParser.Token.START_ARRAY) { serializeArray(context, currentFieldName); } else if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.VALUE_NULL) { serializeNullValue(context, currentFieldName); } else if (token == null) { throw new MapperParsingException( "object mapping for [" + name + "] tried to parse as object, but got EOF, has a concrete value been provided to it?"); } else if (token.isValue()) { serializeValue(context, currentFieldName, token); } token = parser.nextToken(); } // restore the enable path flag context.path().pathType(origPathType); if (nested.isNested()) { Document nestedDoc = context.switchDoc(restoreDoc); if (nested.isIncludeInParent()) { for (Fieldable field : nestedDoc.getFields()) { if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) { continue; } else { context.doc().add(field); } } } if (nested.isIncludeInRoot()) { // don't add it twice, if its included in parent, and we are handling the master doc... if (!(nested.isIncludeInParent() && context.doc() == context.rootDoc())) { for (Fieldable field : nestedDoc.getFields()) { if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) { continue; } else { context.rootDoc().add(field); } } } } } }
@Override protected GetResponse shardOperation(GetRequest request, int shardId) throws ElasticSearchException { IndexService indexService = indicesService.indexServiceSafe(request.index()); BloomCache bloomCache = indexService.cache().bloomCache(); IndexShard indexShard = indexService.shardSafe(shardId); DocumentMapper docMapper = indexService.mapperService().documentMapper(request.type()); if (docMapper == null) { throw new TypeMissingException(new Index(request.index()), request.type()); } if (request.refresh()) { indexShard.refresh(new Engine.Refresh(false)); } Engine.Searcher searcher = indexShard.searcher(); boolean exists = false; byte[] source = null; Map<String, GetField> fields = null; long version = -1; try { UidField.DocIdAndVersion docIdAndVersion = loadCurrentVersionFromIndex( bloomCache, searcher, docMapper.uidMapper().term(request.type(), request.id())); if (docIdAndVersion != null && docIdAndVersion.docId != Lucene.NO_DOC) { if (docIdAndVersion.version > 0) { version = docIdAndVersion.version; } exists = true; FieldSelector fieldSelector = buildFieldSelectors(docMapper, request.fields()); if (fieldSelector != null) { Document doc = docIdAndVersion.reader.document(docIdAndVersion.docId, fieldSelector); source = extractSource(doc, docMapper); for (Object oField : doc.getFields()) { Fieldable field = (Fieldable) oField; String name = field.name(); Object value = null; FieldMappers fieldMappers = docMapper.mappers().indexName(field.name()); if (fieldMappers != null) { FieldMapper mapper = fieldMappers.mapper(); if (mapper != null) { name = mapper.names().fullName(); value = mapper.valueForSearch(field); } } if (value == null) { if (field.isBinary()) { value = field.getBinaryValue(); } else { value = field.stringValue(); } } if (fields == null) { fields = newHashMapWithExpectedSize(2); } GetField getField = fields.get(name); if (getField == null) { getField = new GetField(name, new ArrayList<Object>(2)); fields.put(name, getField); } getField.values().add(value); } } // now, go and do the script thingy if needed if (request.fields() != null && request.fields().length > 0) { SearchLookup searchLookup = null; for (String field : request.fields()) { String script = null; if (field.contains("_source.") || field.contains("doc[")) { script = field; } else { FieldMappers x = docMapper.mappers().smartName(field); if (x != null && !x.mapper().stored()) { script = "_source." + x.mapper().names().fullName(); } } if (script != null) { if (searchLookup == null) { searchLookup = new SearchLookup( indexService.mapperService(), indexService.cache().fieldData()); } SearchScript searchScript = scriptService.search(searchLookup, "mvel", script, null); searchScript.setNextReader(docIdAndVersion.reader); searchScript.setNextDocId(docIdAndVersion.docId); try { Object value = searchScript.run(); if (fields == null) { fields = newHashMapWithExpectedSize(2); } GetField getField = fields.get(field); if (getField == null) { getField = new GetField(field, new ArrayList<Object>(2)); fields.put(field, getField); } getField.values().add(value); } catch (RuntimeException e) { if (logger.isTraceEnabled()) { logger.trace("failed to execute get request script field [{}]", e, script); } // ignore } } } } } } catch (IOException e) { throw new ElasticSearchException( "Failed to get type [" + request.type() + "] and id [" + request.id() + "]", e); } finally { searcher.release(); } return new GetResponse( request.index(), request.type(), request.id(), version, exists, source, fields); }
@Override public String toExternal(Fieldable f) { return indexedToReadable(f.stringValue()); }
@Override public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { writer.writeBool(name, f.stringValue().charAt(0) == 'T'); }
static String badFieldString(Fieldable f) { String s = f.stringValue(); return "ERROR:SCHEMA-INDEX-MISMATCH,stringValue=" + s; }