public static Analyzer createAnalyzer(Class<? extends Analyzer> analyzerClass, Version version) { try { return ClassUtils.instantiateAs( analyzerClass, Analyzer.class, new Class<?>[] {Version.class}, new Object[] {version}); } catch (IllegalStateException e) { return ClassUtils.instantiateAs(analyzerClass, Analyzer.class); } }
@Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.t1Option().create()); addOption(DefaultOptionCreator.t2Option().create()); addOption(DefaultOptionCreator.overwriteOption().create()); Map<String, List<String>> argMap = parseArguments(args); if (argMap == null) { return -1; } Path input = getInputPath(); Path output = getOutputPath(); if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(new Configuration(), output); } String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); double t1 = Double.parseDouble(getOption(DefaultOptionCreator.T1_OPTION)); double t2 = Double.parseDouble(getOption(DefaultOptionCreator.T2_OPTION)); DistanceMeasure measure = ClassUtils.instantiateAs(measureClass, DistanceMeasure.class); run(input, output, measure, t1, t2); return 0; }
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration conf = context.getConfiguration(); measure = ClassUtils.instantiateAs( conf.get(RepresentativePointsDriver.DISTANCE_MEASURE_KEY), DistanceMeasure.class); representativePoints = getRepresentativePoints(conf); }
@Override protected void setup(Context ctx) throws IOException, InterruptedException { similarity = ClassUtils.instantiateAs( ctx.getConfiguration().get(SIMILARITY_CLASSNAME), VectorSimilarityMeasure.class); norms = new RandomAccessSparseVector(Integer.MAX_VALUE); nonZeroEntries = new RandomAccessSparseVector(Integer.MAX_VALUE); maxValues = new RandomAccessSparseVector(Integer.MAX_VALUE); threshold = Double.parseDouble(ctx.getConfiguration().get(THRESHOLD)); }
@Override protected void setup(Context ctx) throws IOException, InterruptedException { similarity = ClassUtils.instantiateAs( ctx.getConfiguration().get(SIMILARITY_CLASSNAME), VectorSimilarityMeasure.class); numberOfColumns = ctx.getConfiguration().getInt(NUMBER_OF_COLUMNS, -1); Preconditions.checkArgument(numberOfColumns > 0, "Incorrect number of columns!"); excludeSelfSimilarity = ctx.getConfiguration().getBoolean(EXCLUDE_SELF_SIMILARITY, false); norms = Vectors.read(new Path(ctx.getConfiguration().get(NORMS_PATH)), ctx.getConfiguration()); treshold = Double.parseDouble(ctx.getConfiguration().get(THRESHOLD)); }
@Override protected void setup(Context ctx) throws IOException, InterruptedException { similarity = ClassUtils.instantiateAs( ctx.getConfiguration().get(SIMILARITY_CLASSNAME), VectorSimilarityMeasure.class); numNonZeroEntries = Vectors.readAsIntMap( new Path(ctx.getConfiguration().get(NUM_NON_ZERO_ENTRIES_PATH)), ctx.getConfiguration()); maxValues = Vectors.read( new Path(ctx.getConfiguration().get(MAXVALUES_PATH)), ctx.getConfiguration()); threshold = Double.parseDouble(ctx.getConfiguration().get(THRESHOLD)); }
/** * Takes in two arguments: * * <ol> * <li>The input {@link org.apache.hadoop.fs.Path} where the input documents live * <li>The output {@link org.apache.hadoop.fs.Path} where to write the classifier as a {@link * org.apache.hadoop.io.SequenceFile} * </ol> */ public static void main(String[] args) throws IOException, InterruptedException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); Option dirInputPathOpt = DefaultOptionCreator.inputOption().create(); Option dirOutputPathOpt = DefaultOptionCreator.outputOption().create(); Option categoriesOpt = obuilder .withLongName("categories") .withRequired(true) .withArgument(abuilder.withName("categories").withMinimum(1).withMaximum(1).create()) .withDescription( "Location of the categories file. One entry per line. " + "Will be used to make a string match in Wikipedia Category field") .withShortName("c") .create(); Option exactMatchOpt = obuilder .withLongName("exactMatch") .withDescription( "If set, then the category name must exactly match the " + "entry in the categories file. Default is false") .withShortName("e") .create(); Option analyzerOpt = obuilder .withLongName("analyzer") .withRequired(false) .withArgument(abuilder.withName("analyzer").withMinimum(1).withMaximum(1).create()) .withDescription("The analyzer to use, must have a no argument constructor") .withShortName("a") .create(); Option helpOpt = DefaultOptionCreator.helpOption(); Group group = gbuilder .withName("Options") .withOption(categoriesOpt) .withOption(dirInputPathOpt) .withOption(dirOutputPathOpt) .withOption(exactMatchOpt) .withOption(analyzerOpt) .withOption(helpOpt) .create(); Parser parser = new Parser(); parser.setGroup(group); try { CommandLine cmdLine = parser.parse(args); if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelp(group); return; } String inputPath = (String) cmdLine.getValue(dirInputPathOpt); String outputPath = (String) cmdLine.getValue(dirOutputPathOpt); String catFile = (String) cmdLine.getValue(categoriesOpt); Class<? extends Analyzer> analyzerClass = WikipediaAnalyzer.class; if (cmdLine.hasOption(analyzerOpt)) { String className = cmdLine.getValue(analyzerOpt).toString(); analyzerClass = Class.forName(className).asSubclass(Analyzer.class); // try instantiating it, b/c there isn't any point in setting it if // you can't instantiate it ClassUtils.instantiateAs(analyzerClass, Analyzer.class); } runJob(inputPath, outputPath, catFile, cmdLine.hasOption(exactMatchOpt), analyzerClass); } catch (OptionException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } catch (ClassNotFoundException e) { log.error("Exception", e); CommandLineUtil.printHelp(group); } }