private boolean matchesContentType(IResultSetContext context) { String documentType = context.getDocumentContentType(); if (contentTypes.isEmpty() || CommonUtils.isEmpty(documentType)) { return false; } for (MimeType mimeType : contentTypes) { try { if (mimeType.match(documentType)) { return true; } } catch (MimeTypeParseException e) { log.warn("Bad document content type: " + documentType, e); } } return false; }
/** Insert or delete RDF statements in a model at a URL. */ public void modifyModel(long model, Statements statements, boolean occurs) throws ResolverException { if (logger.isDebugEnabled()) { logger.debug("Modify URL model " + model); } try { FullTextStringIndex stringIndex = getFullTextStringIndex(model); statements.beforeFirst(); while (statements.next()) { Node subjectNode = resolverSession.globalize(statements.getSubject()); // Do not insert the triple if it contains a blank node in subject. if (subjectNode instanceof BlankNode) { if (logger.isInfoEnabled()) { logger.info(statements.getSubject() + " is blank node; ignoring Lucene insert."); } continue; } Node predicateNode = resolverSession.globalize(statements.getPredicate()); Node objectNode = resolverSession.globalize(statements.getObject()); // Get the subject's string value. String subject = ((URIReference) subjectNode).getURI().toString(); // Predicates can only ever be URIReferences. String predicate = ((URIReference) predicateNode).getURI().toString(); if (objectNode instanceof URIReference) { URIReference objectURI = (URIReference) objectNode; String resource = objectURI.getURI().toString(); try { // Assert or deny the statement in the Lucene model if (occurs) { InputStream input = null; Reader reader = null; try { // Connect to the resource's content URLConnection connection = objectURI.getURI().toURL().openConnection(); String contentType = connection.getContentType(); if (logger.isDebugEnabled()) { logger.debug("Content type of resource is " + contentType); } MimeType contentMimeType; try { contentMimeType = new MimeType(contentType); } catch (MimeTypeParseException e) { logger.warn("\"" + contentType + "\" didn't parse as MIME type", e); try { contentMimeType = new MimeType("content", "unknown"); } catch (MimeTypeParseException em) { throw new ResolverException("Failed to create mime-type", em); } } assert contentMimeType != null; // If no character encoding is specified, guess at Latin-1 String charSet = contentMimeType.getParameter("charset"); if (charSet == null) { charSet = "ISO8859-1"; } assert charSet != null; // Get the content, performing appropriate character encoding input = connection.getInputStream(); reader = new InputStreamReader(input, charSet); // Add a filter if the content type is text/html, to strip out // HTML keywords that will clutter the index try { if (contentMimeType.match(new MimeType("text", "html"))) { reader = HtmlToTextConverter.convert(reader); } } catch (MimeTypeParseException em) { throw new ResolverException("Failed to create mime-type", em); } if (logger.isDebugEnabled()) { logger.debug("Inserting " + subject + " " + predicate + " " + resource); } if (!stringIndex.add(subject, predicate, resource, reader)) { logger.warn( "Unable to add {" + subject + ", " + predicate + ", " + resource + "} to full text string index"); } } catch (MalformedURLException e) { logger.info(resource + " is not a URL; ignoring Lucene insert"); } catch (IOException e) { throw new ResolverException("Can't obtain content of " + resource, e); } catch (org.mulgara.util.conversion.html.ParseException e) { throw new ResolverException("Couldn't parse content of " + resource, e); } finally { try { if (reader != null) reader.close(); else if (input != null) input.close(); } catch (IOException e) { logger.warn("Ignoring error closing resource content", e); } } } else { // (!occurs) if (logger.isDebugEnabled()) { logger.debug("Deleting " + subject + " " + predicate + " " + resource); } if (!stringIndex.remove(subject, predicate, resource)) { logger.warn( "Unable to remove {" + subject + ", " + predicate + ", " + resource + "} from full text string index"); } } } catch (FullTextStringIndexException e) { throw new ResolverException("Unable to modify full text index\n" + new StackTrace(e)); } } else if (objectNode instanceof Literal) { Literal objectLiteral = (Literal) objectNode; String literal = objectLiteral.getLexicalForm(); // Insert the statement into the text index try { if (occurs) { if (logger.isDebugEnabled()) { logger.debug("Inserting " + subject + " " + predicate + " " + literal); } if (!stringIndex.add(subject, predicate, literal)) { logger.warn( "Unable to add {" + subject + ", " + predicate + ", " + literal + "} to full text string index"); } } else { if (logger.isDebugEnabled()) { logger.debug("Deleting " + subject + " " + predicate + " " + literal); } if (!stringIndex.remove(subject, predicate, literal)) { logger.warn( "Unable to remove {" + subject + ", " + predicate + ", " + literal + "} from full text string index"); } } } catch (FullTextStringIndexException e) { throw new ResolverException( "Unable to " + (occurs ? "add" : "delete") + "'" + literal + "' to full text string index\n" + new StackTrace(e)); } } else { if (logger.isInfoEnabled()) { logger.info(objectNode + " is blank node; ignoring Lucene insert."); } } } } catch (TuplesException et) { throw new ResolverException("Error fetching statements", et); } catch (GlobalizeException eg) { throw new ResolverException("Error localizing statements", eg); } catch (IOException ioe) { throw new ResolverException("Failed to open string index", ioe); } catch (FullTextStringIndexException ef) { throw new ResolverException("Error in string index\n" + new StackTrace(ef)); } }