public DataSourceSource(DataSource source) throws MimeTypeParseException {
    this.source = source;

    String ct = source.getContentType();
    if (ct == null) {
      charset = null;
    } else {
      MimeType mimeType = new MimeType(ct);
      this.charset = mimeType.getParameter("charset");
    }
  }
  /**
   * Load MIMEType and corresponding extension.
   *
   * @param aLine the a line
   */
  protected void processLine(String aLine) {
    aLine = aLine.toLowerCase();
    int p = aLine.indexOf("=");

    String ext = aLine.substring(0, p);
    String mimetype = aLine.substring(p + 1);

    try {
      MimeType mimeType = new MimeType(mimetype);
      String mode = mimeType.getParameter(ExtendedMimeTypeResolver.X_TYPE_MODE);
      if (mode != null && mode.length() > 0) {
        String baseType = mimeType.getBaseType();
        Set<String> modeList = modes.get(baseType);
        if (modeList == null) {
          modeList = new LinkedHashSet<String>();
        }
        modeList.add(mode);
        modes.put(baseType, modeList);
      }
    } catch (MimeTypeParseException e) {
      LOG.warn("Error parsing mimetype " + mimetype + ": " + e.getMessage());
    }

    // add mimetype
    List<String> values = mimeTypes.get(ext);
    if (values == null) {
      values = new ArrayList<String>();
      mimeTypes.put(ext, values);
    }
    values.add(mimetype);

    // add extension
    values = extentions.get(mimetype);
    if (values == null) {
      values = new ArrayList<String>();
      extentions.put(mimetype, values);
    }
    values.add(ext);
  }
 /**
  * Return optional representation (UI) mode parameter (x-type-mode) for given MIME type or/and a
  * file name. If type is <code>null</code> then the type will be defined from given file name
  * first. If mode cannot be determined for given type and file name not <code>null</code>, then an
  * attempt will be tried for a type defined for this name. Method returns <code>null</code> if
  * mode cannot be determined from given parameters.
  *
  * @param type {@link String} a MIME type string or <code>null</code>
  * @param name {@link String} a file name or <code>null</code>
  * @return {@link String} with UI mode for given MIME type or <code>null</code> if mode cannot be
  *     determined
  */
 public String getMimeTypeMode(String type, String name) {
   if (type == null && name != null) {
     type = getMimeType(name);
   }
   if (type != null) {
     try {
       boolean tryResolved;
       do {
         tryResolved = false;
         MimeType mimeType = new MimeType(type);
         String mode = mimeType.getParameter(ExtendedMimeTypeResolver.X_TYPE_MODE);
         if (mode == null || mode.length() == 0) {
           // try in this resolved map
           Set<String> modeList = modes.get(mimeType.getBaseType());
           if (modeList != null && modeList.size() == 1) {
             // if have one-to-one relation - OK, else will try by filename
             return modeList.iterator().next();
           } else if (name != null) {
             // try with a type resolved from given filename
             type = getMimeType(name);
             tryResolved = true;
             name = null; // null to do not repeat this attempt
           } else if (modeList != null && modeList.size() > 0) {
             // worse case: we have several modes for given type and need choose... first one
             return modeList.iterator().next();
           }
         } else {
           return mode;
         }
       } while (type != null && tryResolved);
     } catch (MimeTypeParseException e) {
       LOG.warn("Error parsing mimetype " + type + ": " + e.getMessage());
     }
   }
   return null;
 }
Beispiel #4
0
  /** Insert or delete RDF statements in a model at a URL. */
  public void modifyModel(long model, Statements statements, boolean occurs)
      throws ResolverException {
    if (logger.isDebugEnabled()) {
      logger.debug("Modify URL model " + model);
    }

    try {
      FullTextStringIndex stringIndex = getFullTextStringIndex(model);

      statements.beforeFirst();
      while (statements.next()) {
        Node subjectNode = resolverSession.globalize(statements.getSubject());

        // Do not insert the triple if it contains a blank node in subject.
        if (subjectNode instanceof BlankNode) {
          if (logger.isInfoEnabled()) {
            logger.info(statements.getSubject() + " is blank node; ignoring Lucene insert.");
          }

          continue;
        }

        Node predicateNode = resolverSession.globalize(statements.getPredicate());
        Node objectNode = resolverSession.globalize(statements.getObject());

        // Get the subject's string value.
        String subject = ((URIReference) subjectNode).getURI().toString();

        // Predicates can only ever be URIReferences.
        String predicate = ((URIReference) predicateNode).getURI().toString();

        if (objectNode instanceof URIReference) {
          URIReference objectURI = (URIReference) objectNode;
          String resource = objectURI.getURI().toString();

          try {
            // Assert or deny the statement in the Lucene model
            if (occurs) {
              InputStream input = null;
              Reader reader = null;
              try {
                // Connect to the resource's content
                URLConnection connection = objectURI.getURI().toURL().openConnection();
                String contentType = connection.getContentType();

                if (logger.isDebugEnabled()) {
                  logger.debug("Content type of resource is " + contentType);
                }

                MimeType contentMimeType;

                try {
                  contentMimeType = new MimeType(contentType);
                } catch (MimeTypeParseException e) {
                  logger.warn("\"" + contentType + "\" didn't parse as MIME type", e);
                  try {
                    contentMimeType = new MimeType("content", "unknown");
                  } catch (MimeTypeParseException em) {
                    throw new ResolverException("Failed to create mime-type", em);
                  }
                }

                assert contentMimeType != null;

                // If no character encoding is specified, guess at Latin-1
                String charSet = contentMimeType.getParameter("charset");
                if (charSet == null) {
                  charSet = "ISO8859-1";
                }

                assert charSet != null;

                // Get the content, performing appropriate character encoding
                input = connection.getInputStream();
                reader = new InputStreamReader(input, charSet);

                // Add a filter if the content type is text/html, to strip out
                // HTML keywords that will clutter the index
                try {
                  if (contentMimeType.match(new MimeType("text", "html"))) {
                    reader = HtmlToTextConverter.convert(reader);
                  }
                } catch (MimeTypeParseException em) {
                  throw new ResolverException("Failed to create mime-type", em);
                }

                if (logger.isDebugEnabled()) {
                  logger.debug("Inserting " + subject + " " + predicate + " " + resource);
                }

                if (!stringIndex.add(subject, predicate, resource, reader)) {
                  logger.warn(
                      "Unable to add {"
                          + subject
                          + ", "
                          + predicate
                          + ", "
                          + resource
                          + "} to full text string index");
                }
              } catch (MalformedURLException e) {
                logger.info(resource + " is not a URL; ignoring Lucene insert");
              } catch (IOException e) {
                throw new ResolverException("Can't obtain content of " + resource, e);
              } catch (org.mulgara.util.conversion.html.ParseException e) {
                throw new ResolverException("Couldn't parse content of " + resource, e);
              } finally {
                try {
                  if (reader != null) reader.close();
                  else if (input != null) input.close();
                } catch (IOException e) {
                  logger.warn("Ignoring error closing resource content", e);
                }
              }
            } else { // (!occurs)
              if (logger.isDebugEnabled()) {
                logger.debug("Deleting " + subject + " " + predicate + " " + resource);
              }

              if (!stringIndex.remove(subject, predicate, resource)) {
                logger.warn(
                    "Unable to remove {"
                        + subject
                        + ", "
                        + predicate
                        + ", "
                        + resource
                        + "} from full text string index");
              }
            }
          } catch (FullTextStringIndexException e) {
            throw new ResolverException("Unable to modify full text index\n" + new StackTrace(e));
          }

        } else if (objectNode instanceof Literal) {
          Literal objectLiteral = (Literal) objectNode;
          String literal = objectLiteral.getLexicalForm();

          // Insert the statement into the text index
          try {
            if (occurs) {
              if (logger.isDebugEnabled()) {
                logger.debug("Inserting " + subject + " " + predicate + " " + literal);
              }

              if (!stringIndex.add(subject, predicate, literal)) {
                logger.warn(
                    "Unable to add {"
                        + subject
                        + ", "
                        + predicate
                        + ", "
                        + literal
                        + "} to full text string index");
              }
            } else {
              if (logger.isDebugEnabled()) {
                logger.debug("Deleting " + subject + " " + predicate + " " + literal);
              }

              if (!stringIndex.remove(subject, predicate, literal)) {
                logger.warn(
                    "Unable to remove {"
                        + subject
                        + ", "
                        + predicate
                        + ", "
                        + literal
                        + "} from full text string index");
              }
            }
          } catch (FullTextStringIndexException e) {
            throw new ResolverException(
                "Unable to "
                    + (occurs ? "add" : "delete")
                    + "'"
                    + literal
                    + "' to full text string index\n"
                    + new StackTrace(e));
          }
        } else {
          if (logger.isInfoEnabled()) {
            logger.info(objectNode + " is blank node; ignoring Lucene insert.");
          }
        }
      }
    } catch (TuplesException et) {
      throw new ResolverException("Error fetching statements", et);
    } catch (GlobalizeException eg) {
      throw new ResolverException("Error localizing statements", eg);
    } catch (IOException ioe) {
      throw new ResolverException("Failed to open string index", ioe);
    } catch (FullTextStringIndexException ef) {
      throw new ResolverException("Error in string index\n" + new StackTrace(ef));
    }
  }