private MediaType parseMediaType(String mediaTypeStr) { MediaType mediaType = MediaType.parse(mediaTypeStr.trim().toLowerCase(Locale.ROOT)); return mediaType.getBaseType(); };
public SolrCell( CommandBuilder builder, Config config, Command parent, Command child, MorphlineContext context) { super(builder, config, parent, child, context); Config solrLocatorConfig = getConfigs().getConfig(config, "solrLocator"); SolrLocator locator = new SolrLocator(solrLocatorConfig, context); LOG.debug("solrLocator: {}", locator); this.schema = locator.getIndexSchema(); Preconditions.checkNotNull(schema); LOG.trace( "Solr schema: \n{}", Joiner.on("\n").join(new TreeMap(schema.getFields()).values())); ListMultimap<String, String> cellParams = ArrayListMultimap.create(); String uprefix = getConfigs().getString(config, ExtractingParams.UNKNOWN_FIELD_PREFIX, null); if (uprefix != null) { cellParams.put(ExtractingParams.UNKNOWN_FIELD_PREFIX, uprefix); } for (String capture : getConfigs() .getStringList( config, ExtractingParams.CAPTURE_ELEMENTS, Collections.<String>emptyList())) { cellParams.put(ExtractingParams.CAPTURE_ELEMENTS, capture); } Config fmapConfig = getConfigs().getConfig(config, "fmap", null); if (fmapConfig != null) { for (Map.Entry<String, Object> entry : new Configs().getEntrySet(fmapConfig)) { cellParams.put(ExtractingParams.MAP_PREFIX + entry.getKey(), entry.getValue().toString()); } } String captureAttributes = getConfigs().getString(config, ExtractingParams.CAPTURE_ATTRIBUTES, null); if (captureAttributes != null) { cellParams.put(ExtractingParams.CAPTURE_ATTRIBUTES, captureAttributes); } String lowerNames = getConfigs().getString(config, ExtractingParams.LOWERNAMES, null); if (lowerNames != null) { cellParams.put(ExtractingParams.LOWERNAMES, lowerNames); } String defaultField = getConfigs().getString(config, ExtractingParams.DEFAULT_FIELD, null); if (defaultField != null) { cellParams.put(ExtractingParams.DEFAULT_FIELD, defaultField); } xpathExpr = getConfigs().getString(config, ExtractingParams.XPATH_EXPRESSION, null); if (xpathExpr != null) { cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr); } this.dateFormats = getConfigs() .getStringList(config, "dateFormats", new ArrayList<>(DateUtil.DEFAULT_DATE_FORMATS)); String handlerStr = getConfigs() .getString( config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName()); Class<? extends SolrContentHandlerFactory> factoryClass; try { factoryClass = (Class<? extends SolrContentHandlerFactory>) Class.forName(handlerStr); } catch (ClassNotFoundException cnfe) { throw new MorphlineCompilationException( "Could not find class " + handlerStr + " to use for " + "solrContentHandlerFactory", config, cnfe); } this.solrContentHandlerFactory = getSolrContentHandlerFactory(factoryClass, dateFormats, config); this.locale = getLocale(getConfigs().getString(config, "locale", "")); this.mediaTypeToParserMap = new HashMap<>(); // MimeTypes mimeTypes = MimeTypes.getDefaultMimeTypes(); // FIXME // getMediaTypeRegistry.normalize() List<? extends Config> parserConfigs = getConfigs().getConfigList(config, "parsers"); for (Config parserConfig : parserConfigs) { String parserClassName = getConfigs().getString(parserConfig, "parser"); Object obj; try { obj = Class.forName(parserClassName).newInstance(); } catch (Throwable e) { throw new MorphlineCompilationException( "Cannot instantiate Tika parser: " + parserClassName, config, e); } if (!(obj instanceof Parser)) { throw new MorphlineCompilationException( "Tika parser " + obj.getClass().getName() + " must be an instance of class " + Parser.class.getName(), config); } Parser parser = (Parser) obj; this.parsers.add(parser); List<String> mediaTypes = getConfigs() .getStringList(parserConfig, SUPPORTED_MIME_TYPES, Collections.<String>emptyList()); for (String mediaTypeStr : mediaTypes) { MediaType mediaType = parseMediaType(mediaTypeStr); addSupportedMimeType(mediaTypeStr); this.mediaTypeToParserMap.put(mediaType, parser); } if (!parserConfig.hasPath(SUPPORTED_MIME_TYPES)) { for (MediaType mediaType : parser.getSupportedTypes(new ParseContext())) { mediaType = mediaType.getBaseType(); addSupportedMimeType(mediaType.toString()); this.mediaTypeToParserMap.put(mediaType, parser); } List<String> extras = getConfigs() .getStringList( parserConfig, ADDITIONAL_SUPPORTED_MIME_TYPES, Collections.<String>emptyList()); for (String mediaTypeStr : extras) { MediaType mediaType = parseMediaType(mediaTypeStr); addSupportedMimeType(mediaTypeStr); this.mediaTypeToParserMap.put(mediaType, parser); } } } // LOG.info("mediaTypeToParserMap="+mediaTypeToParserMap); Map<String, String[]> tmp = new HashMap(); for (Map.Entry<String, Collection<String>> entry : cellParams.asMap().entrySet()) { tmp.put(entry.getKey(), entry.getValue().toArray(new String[entry.getValue().size()])); } this.solrParams = new MultiMapSolrParams(tmp); validateArguments(); }