@Override public void modifyIndex(final IndexWriter writer, final IndexSearcher searcher) throws ModifyKnowledgeBaseException { for (final Map.Entry<String, HashMap<String, String>> entry : this.attributes.entrySet()) { final String key = entry.getKey(); final HashMap<String, String> hash = entry.getValue(); final QueryParser qp = new QueryParser(this.docPrimaryKey, new DoserIDAnalyzer()); try { final TopDocs top = searcher.search(qp.parse(QueryParserBase.escape(key)), 1); final ScoreDoc[] scores = top.scoreDocs; if (scores.length > 0) { final Document doc = new Document(); final Document currentDoc = searcher.getIndexReader().document(scores[0].doc); // BugFix create new Document und copy Fields. final List<IndexableField> fields = currentDoc.getFields(); for (final IndexableField field : fields) { if (field.stringValue() != null) { if (field.name().equalsIgnoreCase(docPrimaryKey)) { doc.add(new StringField(field.name(), field.stringValue(), Field.Store.YES)); } else { doc.add(new TextField(field.name(), field.stringValue(), Field.Store.YES)); } } } final List<Document> docListToAdd = new LinkedList<Document>(); docListToAdd.add(doc); for (final Map.Entry<String, String> subentry : hash.entrySet()) { final IndexableField field = doc.getField(subentry.getKey()); if (field == null) { throw new ModifyKnowledgeBaseException("UpdateField no found", null); } if (this.action.equals(KBModifications.OVERRIDEFIELD)) { doc.removeFields(subentry.getKey()); String[] newentries = generateSeperatedFieldStrings(subentry.getValue()); for (int i = 0; i < newentries.length; i++) { doc.add(new TextField(subentry.getKey(), newentries[i], Field.Store.YES)); } } else if (this.action.equals(KBModifications.UPDATERELATEDLABELS)) { doc.removeFields(subentry.getKey()); doc.add(updateOccurrences(subentry.getValue(), field, "surroundinglabels")); } else if (this.action.equals(KBModifications.UPDATEOCCURRENCES)) { doc.removeFields(subentry.getKey()); IndexableField f = updateOccurrences(subentry.getValue(), field, "occurrences"); doc.add(f); } } writer.updateDocuments(new Term(this.docPrimaryKey, key), docListToAdd); } else { throw new ModifyKnowledgeBaseException("Document not found", null); } } catch (final IOException e) { throw new ModifyKnowledgeBaseException("IOException in IndexSearcher", e); } catch (ParseException e) { throw new ModifyKnowledgeBaseException("Queryparser Exception", e); } } }
public static void recoverSrcData( SenseiResult res, SenseiHit[] hits, boolean isFetchStoredFields) { if (hits != null) { for (SenseiHit hit : hits) { try { byte[] dataBytes = hit.getStoredValue(); if (dataBytes == null || dataBytes.length == 0) { Document doc = hit.getStoredFields(); if (doc != null) { dataBytes = doc.getBinaryValue(AbstractZoieIndexable.DOCUMENT_STORE_FIELD); if (dataBytes == null || dataBytes.length == 0) { dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME); if (dataBytes == null || dataBytes.length == 0) { dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_FIELD_NAME); if (dataBytes != null && dataBytes.length > 0) { hit.setSrcData(new String(dataBytes, "UTF-8")); dataBytes = null; // set to null to avoid gunzip. } } doc.removeFields(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME); doc.removeFields(SenseiSchema.SRC_DATA_FIELD_NAME); } } } if (dataBytes != null && dataBytes.length > 0) { byte[] data; try { data = DefaultJsonSchemaInterpreter.decompress(dataBytes); } catch (Exception ex) { data = dataBytes; } hit.setSrcData(new String(data, "UTF-8")); } } catch (Exception e) { logger.error(e.getMessage(), e); res.getErrors().add(new SenseiError(e.getMessage(), ErrorType.BrokerGatherError)); } recoverSrcData(res, hit.getSenseiGroupHits(), isFetchStoredFields); // Remove stored fields since the user is not requesting: if (!isFetchStoredFields) hit.setStoredFields(null); } } }
public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer) throws IOException, ConfigurationException { Document doc = reader.document(docId); doc.removeFields(fieldName); Field uri = doc.getField("URI"); Term term = new Term("URI", uri.stringValue()); writer.updateDocument(term, doc, analyzer); }
protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException { if (shiftDays != 0) { IndexWriter iw = ctx.getIndexWriter(); for (int docNum = 0; docNum < ctx.getIndexReader().maxDoc(); docNum++) { if (!ctx.getIndexReader().isDeleted(docNum)) { Document doc = ctx.getIndexReader().document(docNum); String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED); if (lastModified != null) { long lm = Long.parseLong(lastModified); lm = lm + (shiftDays * A_DAY_MILLIS); doc.removeFields(ArtifactInfo.LAST_MODIFIED); doc.add( new Field( ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES, Field.Index.NO)); iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc); } } } iw.optimize(); iw.close(); // shift timestamp too if (ctx.getTimestamp() != null) { ctx.updateTimestamp( true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS))); } else { ctx.updateTimestamp( true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS))); } } }
@Override DocWriter processDocument() throws IOException { // this is where we process the geo-search components of the document Document doc = docState.doc; int docID = docState.docID; List<Fieldable> fields = doc.getFields(); List<GeoCoordinateField> geoFields = new Vector<GeoCoordinateField>(); for (Fieldable field : fields) { if (field instanceof GeoCoordinateField) { geoFields.add((GeoCoordinateField) field); } } for (GeoCoordinateField geoField : geoFields) { // process field into GeoIndex here geoIndexer.index(docID, geoField); doc.removeFields(geoField.name()); } return defaultDocConsumerPerThread.processDocument(); }
@Override public Document handleDocument(IndexWriter indexWriter, IndexState state, File file) throws Exception { Document doc = super.handleDocument(indexWriter, state, file); /* create/update document */ if (doc != null && file.length() > 0) { /* ultra fast header analysis, with fileinputstream - too many filedescripor * exeption on samba network */ try { ImageInfo imageInfo = Sanselan.getImageInfo(file); /* default image meta informations */ int w = imageInfo.getWidth(); int h = imageInfo.getHeight(); /* image width */ get(ImageWidthField.NAME).add(doc, w); /* image height */ get(ImageHeightField.NAME).add(doc, h); /* bits per pixel */ get(BitsPerPixelField.NAME).add(doc, imageInfo.getBitsPerPixel()); /* complete size */ get(PixelSizeField.NAME).add(doc, w * h); /* image aspect ratio */ get(AspectRatioField.NAME).add(doc, w / (float) h); /* try to extract metadata from jpeg files */ String mime = imageInfo.getMimeType(); /* override because they are different content types than extensions */ doc.removeFields(MimeTypeField.NAME); get(MimeTypeField.NAME).add(doc, mime); /* special jpeg handling */ if (mime != null && mime.equals("image/jpeg")) { IImageMetadata metadata = Sanselan.getMetadata(file); if (metadata instanceof JpegImageMetadata) { JpegImageMetadata jpegMetadata = (JpegImageMetadata) metadata; /* create date of the image */ TiffField field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_CREATE_DATE); if (field != null) { Date date = DateUtil.parseEXIFFormat(field.getStringValue()); /* else unknown format */ if (date != null) { /* override file last modified date */ doc.removeFields(LastModifiedField.NAME); get(LastModifiedField.NAME).add(doc, date.getTime()); } } /* user comment tag */ field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_USER_COMMENT); if (field != null) { get(CommentField.NAME).add(doc, field.getStringValue().trim()); } /* make tag */ field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_MAKE); if (field != null) { get(ExifMakeField.NAME).add(doc, field.getStringValue().trim()); } /* model tag */ field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_MODEL); if (field != null) { get(ExifModelField.NAME).add(doc, field.getStringValue().trim()); } /* date time tag */ field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_CREATE_DATE); if (field != null) { Date date = DateUtil.parseEXIFFormat(field.getStringValue()); /* else unknown format */ if (date != null) { get(ExifDateField.NAME).add(doc, date.getTime()); } } /* try to find gps informations */ TiffImageMetadata exifMetadata = jpegMetadata.getExif(); if (exifMetadata != null) { try { TiffImageMetadata.GPSInfo gpsInfo = exifMetadata.getGPS(); if (null != gpsInfo) { get(LatField.NAME).add(doc, gpsInfo.getLatitudeAsDegreesNorth()); get(LonField.NAME).add(doc, gpsInfo.getLongitudeAsDegreesEast()); // doc.add(new // NumericField(FIELD_EXIF_GPS_IMG_DIRECTION, Field.Store.YES, // true).setIntValue(gps.getRational( // // GpsDirectory.TAG_GPS_IMG_DIRECTION).intValue())); // // System.out.println(exifMetadata.findField(GPSTagConstants.GPS_TAG_GPS_IMG_DIRECTION)); } } catch (ImageReadException e) { } } } } /* run some classification */ if (classifyImage) { int[] v = ImageUtil.analyzeRGB(file); for (int i = 0; i < v.length; i++) { doc.add(new IntPoint(COLORMEAN + i, v[i])); } } } catch (Throwable e) { /* only log */ log.log(Level.INFO, "can not extract image informations!", e); } } return doc; }