private boolean matchesContentType(IResultSetContext context) {
   String documentType = context.getDocumentContentType();
   if (contentTypes.isEmpty() || CommonUtils.isEmpty(documentType)) {
     return false;
   }
   for (MimeType mimeType : contentTypes) {
     try {
       if (mimeType.match(documentType)) {
         return true;
       }
     } catch (MimeTypeParseException e) {
       log.warn("Bad document content type: " + documentType, e);
     }
   }
   return false;
 }
Exemplo n.º 2
0
  /** Insert or delete RDF statements in a model at a URL. */
  public void modifyModel(long model, Statements statements, boolean occurs)
      throws ResolverException {
    if (logger.isDebugEnabled()) {
      logger.debug("Modify URL model " + model);
    }

    try {
      FullTextStringIndex stringIndex = getFullTextStringIndex(model);

      statements.beforeFirst();
      while (statements.next()) {
        Node subjectNode = resolverSession.globalize(statements.getSubject());

        // Do not insert the triple if it contains a blank node in subject.
        if (subjectNode instanceof BlankNode) {
          if (logger.isInfoEnabled()) {
            logger.info(statements.getSubject() + " is blank node; ignoring Lucene insert.");
          }

          continue;
        }

        Node predicateNode = resolverSession.globalize(statements.getPredicate());
        Node objectNode = resolverSession.globalize(statements.getObject());

        // Get the subject's string value.
        String subject = ((URIReference) subjectNode).getURI().toString();

        // Predicates can only ever be URIReferences.
        String predicate = ((URIReference) predicateNode).getURI().toString();

        if (objectNode instanceof URIReference) {
          URIReference objectURI = (URIReference) objectNode;
          String resource = objectURI.getURI().toString();

          try {
            // Assert or deny the statement in the Lucene model
            if (occurs) {
              InputStream input = null;
              Reader reader = null;
              try {
                // Connect to the resource's content
                URLConnection connection = objectURI.getURI().toURL().openConnection();
                String contentType = connection.getContentType();

                if (logger.isDebugEnabled()) {
                  logger.debug("Content type of resource is " + contentType);
                }

                MimeType contentMimeType;

                try {
                  contentMimeType = new MimeType(contentType);
                } catch (MimeTypeParseException e) {
                  logger.warn("\"" + contentType + "\" didn't parse as MIME type", e);
                  try {
                    contentMimeType = new MimeType("content", "unknown");
                  } catch (MimeTypeParseException em) {
                    throw new ResolverException("Failed to create mime-type", em);
                  }
                }

                assert contentMimeType != null;

                // If no character encoding is specified, guess at Latin-1
                String charSet = contentMimeType.getParameter("charset");
                if (charSet == null) {
                  charSet = "ISO8859-1";
                }

                assert charSet != null;

                // Get the content, performing appropriate character encoding
                input = connection.getInputStream();
                reader = new InputStreamReader(input, charSet);

                // Add a filter if the content type is text/html, to strip out
                // HTML keywords that will clutter the index
                try {
                  if (contentMimeType.match(new MimeType("text", "html"))) {
                    reader = HtmlToTextConverter.convert(reader);
                  }
                } catch (MimeTypeParseException em) {
                  throw new ResolverException("Failed to create mime-type", em);
                }

                if (logger.isDebugEnabled()) {
                  logger.debug("Inserting " + subject + " " + predicate + " " + resource);
                }

                if (!stringIndex.add(subject, predicate, resource, reader)) {
                  logger.warn(
                      "Unable to add {"
                          + subject
                          + ", "
                          + predicate
                          + ", "
                          + resource
                          + "} to full text string index");
                }
              } catch (MalformedURLException e) {
                logger.info(resource + " is not a URL; ignoring Lucene insert");
              } catch (IOException e) {
                throw new ResolverException("Can't obtain content of " + resource, e);
              } catch (org.mulgara.util.conversion.html.ParseException e) {
                throw new ResolverException("Couldn't parse content of " + resource, e);
              } finally {
                try {
                  if (reader != null) reader.close();
                  else if (input != null) input.close();
                } catch (IOException e) {
                  logger.warn("Ignoring error closing resource content", e);
                }
              }
            } else { // (!occurs)
              if (logger.isDebugEnabled()) {
                logger.debug("Deleting " + subject + " " + predicate + " " + resource);
              }

              if (!stringIndex.remove(subject, predicate, resource)) {
                logger.warn(
                    "Unable to remove {"
                        + subject
                        + ", "
                        + predicate
                        + ", "
                        + resource
                        + "} from full text string index");
              }
            }
          } catch (FullTextStringIndexException e) {
            throw new ResolverException("Unable to modify full text index\n" + new StackTrace(e));
          }

        } else if (objectNode instanceof Literal) {
          Literal objectLiteral = (Literal) objectNode;
          String literal = objectLiteral.getLexicalForm();

          // Insert the statement into the text index
          try {
            if (occurs) {
              if (logger.isDebugEnabled()) {
                logger.debug("Inserting " + subject + " " + predicate + " " + literal);
              }

              if (!stringIndex.add(subject, predicate, literal)) {
                logger.warn(
                    "Unable to add {"
                        + subject
                        + ", "
                        + predicate
                        + ", "
                        + literal
                        + "} to full text string index");
              }
            } else {
              if (logger.isDebugEnabled()) {
                logger.debug("Deleting " + subject + " " + predicate + " " + literal);
              }

              if (!stringIndex.remove(subject, predicate, literal)) {
                logger.warn(
                    "Unable to remove {"
                        + subject
                        + ", "
                        + predicate
                        + ", "
                        + literal
                        + "} from full text string index");
              }
            }
          } catch (FullTextStringIndexException e) {
            throw new ResolverException(
                "Unable to "
                    + (occurs ? "add" : "delete")
                    + "'"
                    + literal
                    + "' to full text string index\n"
                    + new StackTrace(e));
          }
        } else {
          if (logger.isInfoEnabled()) {
            logger.info(objectNode + " is blank node; ignoring Lucene insert.");
          }
        }
      }
    } catch (TuplesException et) {
      throw new ResolverException("Error fetching statements", et);
    } catch (GlobalizeException eg) {
      throw new ResolverException("Error localizing statements", eg);
    } catch (IOException ioe) {
      throw new ResolverException("Failed to open string index", ioe);
    } catch (FullTextStringIndexException ef) {
      throw new ResolverException("Error in string index\n" + new StackTrace(ef));
    }
  }