@Override
 public void modifyIndex(final IndexWriter writer, final IndexSearcher searcher)
     throws ModifyKnowledgeBaseException {
   for (final Map.Entry<String, HashMap<String, String>> entry : this.attributes.entrySet()) {
     final String key = entry.getKey();
     final HashMap<String, String> hash = entry.getValue();
     final QueryParser qp = new QueryParser(this.docPrimaryKey, new DoserIDAnalyzer());
     try {
       final TopDocs top = searcher.search(qp.parse(QueryParserBase.escape(key)), 1);
       final ScoreDoc[] scores = top.scoreDocs;
       if (scores.length > 0) {
         final Document doc = new Document();
         final Document currentDoc = searcher.getIndexReader().document(scores[0].doc);
         // BugFix create new Document und copy Fields.
         final List<IndexableField> fields = currentDoc.getFields();
         for (final IndexableField field : fields) {
           if (field.stringValue() != null) {
             if (field.name().equalsIgnoreCase(docPrimaryKey)) {
               doc.add(new StringField(field.name(), field.stringValue(), Field.Store.YES));
             } else {
               doc.add(new TextField(field.name(), field.stringValue(), Field.Store.YES));
             }
           }
         }
         final List<Document> docListToAdd = new LinkedList<Document>();
         docListToAdd.add(doc);
         for (final Map.Entry<String, String> subentry : hash.entrySet()) {
           final IndexableField field = doc.getField(subentry.getKey());
           if (field == null) {
             throw new ModifyKnowledgeBaseException("UpdateField no found", null);
           }
           if (this.action.equals(KBModifications.OVERRIDEFIELD)) {
             doc.removeFields(subentry.getKey());
             String[] newentries = generateSeperatedFieldStrings(subentry.getValue());
             for (int i = 0; i < newentries.length; i++) {
               doc.add(new TextField(subentry.getKey(), newentries[i], Field.Store.YES));
             }
           } else if (this.action.equals(KBModifications.UPDATERELATEDLABELS)) {
             doc.removeFields(subentry.getKey());
             doc.add(updateOccurrences(subentry.getValue(), field, "surroundinglabels"));
           } else if (this.action.equals(KBModifications.UPDATEOCCURRENCES)) {
             doc.removeFields(subentry.getKey());
             IndexableField f = updateOccurrences(subentry.getValue(), field, "occurrences");
             doc.add(f);
           }
         }
         writer.updateDocuments(new Term(this.docPrimaryKey, key), docListToAdd);
       } else {
         throw new ModifyKnowledgeBaseException("Document not found", null);
       }
     } catch (final IOException e) {
       throw new ModifyKnowledgeBaseException("IOException in IndexSearcher", e);
     } catch (ParseException e) {
       throw new ModifyKnowledgeBaseException("Queryparser Exception", e);
     }
   }
 }
Example #2
0
  public static void recoverSrcData(
      SenseiResult res, SenseiHit[] hits, boolean isFetchStoredFields) {
    if (hits != null) {
      for (SenseiHit hit : hits) {
        try {
          byte[] dataBytes = hit.getStoredValue();
          if (dataBytes == null || dataBytes.length == 0) {
            Document doc = hit.getStoredFields();
            if (doc != null) {
              dataBytes = doc.getBinaryValue(AbstractZoieIndexable.DOCUMENT_STORE_FIELD);

              if (dataBytes == null || dataBytes.length == 0) {
                dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME);

                if (dataBytes == null || dataBytes.length == 0) {
                  dataBytes = doc.getBinaryValue(SenseiSchema.SRC_DATA_FIELD_NAME);
                  if (dataBytes != null && dataBytes.length > 0) {
                    hit.setSrcData(new String(dataBytes, "UTF-8"));
                    dataBytes = null; // set to null to avoid gunzip.
                  }
                }
                doc.removeFields(SenseiSchema.SRC_DATA_COMPRESSED_FIELD_NAME);
                doc.removeFields(SenseiSchema.SRC_DATA_FIELD_NAME);
              }
            }
          }
          if (dataBytes != null && dataBytes.length > 0) {
            byte[] data;
            try {
              data = DefaultJsonSchemaInterpreter.decompress(dataBytes);
            } catch (Exception ex) {

              data = dataBytes;
            }
            hit.setSrcData(new String(data, "UTF-8"));
          }
        } catch (Exception e) {
          logger.error(e.getMessage(), e);
          res.getErrors().add(new SenseiError(e.getMessage(), ErrorType.BrokerGatherError));
        }

        recoverSrcData(res, hit.getSenseiGroupHits(), isFetchStoredFields);

        // Remove stored fields since the user is not requesting:
        if (!isFetchStoredFields) hit.setStoredFields(null);
      }
    }
  }
 public void deleteFieldFromIndex(String fieldName, int docId, Analyzer analyzer)
     throws IOException, ConfigurationException {
   Document doc = reader.document(docId);
   doc.removeFields(fieldName);
   Field uri = doc.getField("URI");
   Term term = new Term("URI", uri.stringValue());
   writer.updateDocument(term, doc, analyzer);
 }
Example #4
0
  protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException {
    if (shiftDays != 0) {
      IndexWriter iw = ctx.getIndexWriter();

      for (int docNum = 0; docNum < ctx.getIndexReader().maxDoc(); docNum++) {
        if (!ctx.getIndexReader().isDeleted(docNum)) {
          Document doc = ctx.getIndexReader().document(docNum);

          String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED);

          if (lastModified != null) {
            long lm = Long.parseLong(lastModified);

            lm = lm + (shiftDays * A_DAY_MILLIS);

            doc.removeFields(ArtifactInfo.LAST_MODIFIED);

            doc.add(
                new Field(
                    ArtifactInfo.LAST_MODIFIED,
                    Long.toString(lm),
                    Field.Store.YES,
                    Field.Index.NO));

            iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc);
          }
        }
      }

      iw.optimize();

      iw.close();

      // shift timestamp too
      if (ctx.getTimestamp() != null) {
        ctx.updateTimestamp(
            true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS)));
      } else {
        ctx.updateTimestamp(
            true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS)));
      }
    }
  }
  @Override
  DocWriter processDocument() throws IOException {
    // this is where we process the geo-search components of the document
    Document doc = docState.doc;
    int docID = docState.docID;

    List<Fieldable> fields = doc.getFields();
    List<GeoCoordinateField> geoFields = new Vector<GeoCoordinateField>();

    for (Fieldable field : fields) {
      if (field instanceof GeoCoordinateField) {
        geoFields.add((GeoCoordinateField) field);
      }
    }

    for (GeoCoordinateField geoField : geoFields) {
      // process field into GeoIndex here
      geoIndexer.index(docID, geoField);

      doc.removeFields(geoField.name());
    }

    return defaultDocConsumerPerThread.processDocument();
  }
 @Override
 public Document handleDocument(IndexWriter indexWriter, IndexState state, File file)
     throws Exception {
   Document doc = super.handleDocument(indexWriter, state, file);
   /* create/update document */
   if (doc != null && file.length() > 0) {
     /* ultra fast header analysis, with fileinputstream - too many filedescripor
      * exeption on samba network */
     try {
       ImageInfo imageInfo = Sanselan.getImageInfo(file);
       /* default image meta informations */
       int w = imageInfo.getWidth();
       int h = imageInfo.getHeight();
       /* image width */
       get(ImageWidthField.NAME).add(doc, w);
       /* image height */
       get(ImageHeightField.NAME).add(doc, h);
       /* bits per pixel */
       get(BitsPerPixelField.NAME).add(doc, imageInfo.getBitsPerPixel());
       /* complete size */
       get(PixelSizeField.NAME).add(doc, w * h);
       /* image aspect ratio */
       get(AspectRatioField.NAME).add(doc, w / (float) h);
       /* try to extract metadata from jpeg files */
       String mime = imageInfo.getMimeType();
       /* override because they are different content types than extensions */
       doc.removeFields(MimeTypeField.NAME);
       get(MimeTypeField.NAME).add(doc, mime);
       /* special jpeg handling */
       if (mime != null && mime.equals("image/jpeg")) {
         IImageMetadata metadata = Sanselan.getMetadata(file);
         if (metadata instanceof JpegImageMetadata) {
           JpegImageMetadata jpegMetadata = (JpegImageMetadata) metadata;
           /* create date of the image */
           TiffField field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_CREATE_DATE);
           if (field != null) {
             Date date = DateUtil.parseEXIFFormat(field.getStringValue());
             /* else unknown format */
             if (date != null) {
               /* override file last modified date */
               doc.removeFields(LastModifiedField.NAME);
               get(LastModifiedField.NAME).add(doc, date.getTime());
             }
           }
           /* user comment tag */
           field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_USER_COMMENT);
           if (field != null) {
             get(CommentField.NAME).add(doc, field.getStringValue().trim());
           }
           /* make tag */
           field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_MAKE);
           if (field != null) {
             get(ExifMakeField.NAME).add(doc, field.getStringValue().trim());
           }
           /* model tag */
           field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_MODEL);
           if (field != null) {
             get(ExifModelField.NAME).add(doc, field.getStringValue().trim());
           }
           /* date time tag */
           field = jpegMetadata.findEXIFValue(ExifTagConstants.EXIF_TAG_CREATE_DATE);
           if (field != null) {
             Date date = DateUtil.parseEXIFFormat(field.getStringValue());
             /* else unknown format */
             if (date != null) {
               get(ExifDateField.NAME).add(doc, date.getTime());
             }
           }
           /* try to find gps informations */
           TiffImageMetadata exifMetadata = jpegMetadata.getExif();
           if (exifMetadata != null) {
             try {
               TiffImageMetadata.GPSInfo gpsInfo = exifMetadata.getGPS();
               if (null != gpsInfo) {
                 get(LatField.NAME).add(doc, gpsInfo.getLatitudeAsDegreesNorth());
                 get(LonField.NAME).add(doc, gpsInfo.getLongitudeAsDegreesEast());
                 //                                    doc.add(new
                 // NumericField(FIELD_EXIF_GPS_IMG_DIRECTION, Field.Store.YES,
                 //                                    true).setIntValue(gps.getRational(
                 //
                 // GpsDirectory.TAG_GPS_IMG_DIRECTION).intValue()));
                 //
                 // System.out.println(exifMetadata.findField(GPSTagConstants.GPS_TAG_GPS_IMG_DIRECTION));
               }
             } catch (ImageReadException e) {
             }
           }
         }
       }
       /* run some classification */
       if (classifyImage) {
         int[] v = ImageUtil.analyzeRGB(file);
         for (int i = 0; i < v.length; i++) {
           doc.add(new IntPoint(COLORMEAN + i, v[i]));
         }
       }
     } catch (Throwable e) {
       /* only log */
       log.log(Level.INFO, "can not extract image informations!", e);
     }
   }
   return doc;
 }