public void plus(WeightedIntDocVector otherVector) { // sLogger.debug ("plus (otherVector: " + otherVector + ")"); // sLogger.debug ("weightedTerms == null: " + (weightedTerms == null)); // sLogger.debug ("otherVector.mWeightedTerms == null: " + (otherVector.mWeightedTerms == // null)); weightedTerms.plus(otherVector.weightedTerms); docLength += otherVector.docLength; }
@SuppressWarnings("unchecked") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(Settings.HELP_OPTION, false, "print the help message"); options.addOption( OptionBuilder.withArgName(Settings.PATH_INDICATOR) .hasArg() .withDescription("input beta file") .create(Settings.INPUT_OPTION)); options.addOption( OptionBuilder.withArgName(Settings.PATH_INDICATOR) .hasArg() .withDescription("term index file") .create(ParseCorpus.INDEX)); options.addOption( OptionBuilder.withArgName(Settings.INTEGER_INDICATOR) .hasArg() .withDescription("display top terms only (default - 10)") .create(TOP_DISPLAY_OPTION)); String betaString = null; String indexString = null; int topDisplay = TOP_DISPLAY; CommandLineParser parser = new GnuParser(); HelpFormatter formatter = new HelpFormatter(); try { CommandLine line = parser.parse(options, args); if (line.hasOption(Settings.HELP_OPTION)) { formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } if (line.hasOption(Settings.INPUT_OPTION)) { betaString = line.getOptionValue(Settings.INPUT_OPTION); } else { throw new ParseException( "Parsing failed due to " + Settings.INPUT_OPTION + " not initialized..."); } if (line.hasOption(ParseCorpus.INDEX)) { indexString = line.getOptionValue(ParseCorpus.INDEX); } else { throw new ParseException( "Parsing failed due to " + ParseCorpus.INDEX + " not initialized..."); } if (line.hasOption(TOP_DISPLAY_OPTION)) { topDisplay = Integer.parseInt(line.getOptionValue(TOP_DISPLAY_OPTION)); } } catch (ParseException pe) { System.err.println(pe.getMessage()); formatter.printHelp(ParseCorpus.class.getName(), options); System.exit(0); } catch (NumberFormatException nfe) { System.err.println(nfe.getMessage()); System.exit(0); } JobConf conf = new JobConf(DisplayTopic.class); FileSystem fs = FileSystem.get(conf); Path indexPath = new Path(indexString); Preconditions.checkArgument( fs.exists(indexPath) && fs.isFile(indexPath), "Invalid index path..."); Path betaPath = new Path(betaString); Preconditions.checkArgument(fs.exists(betaPath) && fs.isFile(betaPath), "Invalid beta path..."); SequenceFile.Reader sequenceFileReader = null; try { IntWritable intWritable = new IntWritable(); Text text = new Text(); Map<Integer, String> termIndex = new HashMap<Integer, String>(); sequenceFileReader = new SequenceFile.Reader(fs, indexPath, conf); while (sequenceFileReader.next(intWritable, text)) { termIndex.put(intWritable.get(), text.toString()); } PairOfIntFloat pairOfIntFloat = new PairOfIntFloat(); HMapIFW hmap = new HMapIFW(); TreeMap<Float, Integer> treeMap = new TreeMap<Float, Integer>(); sequenceFileReader = new SequenceFile.Reader(fs, betaPath, conf); while (sequenceFileReader.next(pairOfIntFloat, hmap)) { treeMap.clear(); System.out.println("=============================="); System.out.println( "Top ranked " + topDisplay + " terms for Topic " + pairOfIntFloat.getLeftElement()); System.out.println("=============================="); Iterator<Integer> itr1 = hmap.keySet().iterator(); int temp1 = 0; while (itr1.hasNext()) { temp1 = itr1.next(); treeMap.put(-hmap.get(temp1), temp1); if (treeMap.size() > topDisplay) { treeMap.remove(treeMap.lastKey()); } } Iterator<Float> itr2 = treeMap.keySet().iterator(); float temp2 = 0; while (itr2.hasNext()) { temp2 = itr2.next(); if (termIndex.containsKey(treeMap.get(temp2))) { System.out.println(termIndex.get(treeMap.get(temp2)) + "\t\t" + -temp2); } else { System.out.println("How embarrassing! Term index not found..."); } } } } finally { IOUtils.closeStream(sequenceFileReader); } return 0; }
public boolean containsTerm(int termid) { return weightedTerms.containsKey(termid); }
@Override public String toString() { return weightedTerms.toString(); }
public void normalizeWith(float l) { for (int f : weightedTerms.keySet()) { weightedTerms.put(f, weightedTerms.get(f) / l); } }
public float dot(WeightedIntDocVector otherVector) { // sLogger.debug ("dot (otherVector: " + otherVector + ")"); float result = weightedTerms.dot(otherVector.weightedTerms); // sLogger.debug ("in KMeansClusterDocs mapper dotProduct () returning: " + result); return result; }
public void readFields(DataInput in) throws IOException { docLength = WritableUtils.readVInt(in); weightedTerms = new HMapIFW(); weightedTerms.readFields(in); }
public void write(DataOutput out) throws IOException { WritableUtils.writeVInt(out, docLength); weightedTerms.write(out); }
public float getWeight(int termid) { return weightedTerms.get(termid); }