Esempio n. 1
0
  /**
   * The main method takes an argument of a string array. The first element of the array selects a
   * data structure to help analyze the correlation between two documents. The second and third
   * element are the names of the files to be compared. The correlation number is printed.
   */
  public static void main(String[] args) {
    DataCounter<String> counter1 = new BinarySearchTree<String>(new StringComparator());

    DataCounter<String> counter2 = new BinarySearchTree<String>(new StringComparator());
    if (args.length != 3) {
      System.out.println("Usage: [ -b | -a | -m | -h ] <filename1> <filename2>");
    }

    if (args[0].equals("-b")) {
      counter1 = new BinarySearchTree<String>(new StringComparator());
      counter2 = new BinarySearchTree<String>(new StringComparator());
    } else if (args[0].equals("-a")) {
      counter1 = new AVLTree<String>(new StringComparator());
      counter2 = new AVLTree<String>(new StringComparator());
    } else if (args[0].equals("-m")) {
      counter1 = new MoveToFrontList<String>(new StringComparator());
      counter2 = new MoveToFrontList<String>(new StringComparator());
    } else if (args[0].equals("-h")) {
      counter1 = new Hashtable<String>(new StringComparator(), new StringHasher());
      counter2 = new Hashtable<String>(new StringComparator(), new StringHasher());
    } else {
      System.out.println("Incorrect argument. Usage: [ -b | -a | -m | -h ] ");
      System.exit(1);
    }
    int sum1 = parseAndCount(args[1], counter1);
    int sum2 = parseAndCount(args[2], counter2);
    if (counter1.getSize() > counter2.getSize()) {
      System.out.println(compare(counter2.getIterator(), counter1, sum2, sum1));
    } else {
      System.out.println(compare(counter1.getIterator(), counter2, sum1, sum2));
    }
  }
Esempio n. 2
0
 /**
  * A private method that parses each file, counting the frequency of words and keeping track of
  * the total number of words.
  *
  * @param file, String of filename to be parsed
  * @param counter, data structure to hold counts
  * @return sum, representing the total number of words in the file
  */
 private static int parseAndCount(String file, DataCounter<String> counter) {
   int sum = 0;
   try {
     FileWordReader reader = new FileWordReader(file);
     String word = reader.nextWord();
     while (word != null) {
       counter.incCount(word);
       sum++;
       word = reader.nextWord();
     }
   } catch (IOException e) {
     System.err.println("Error processing " + file + " " + e);
     System.exit(1);
   }
   return sum;
 }
Esempio n. 3
0
 /**
  * This method calculates the correlation in a running sum.
  *
  * @param iterator, SimpleIterator iterating over the data structure (the file with less distinct
  *     words)
  * @param counter, the "other" data structure (the file with more distinct words)
  * @param iterSum, total number of words in the file with less distinct words
  * @param counterSum, total number of words in the "other" file
  * @return correlation, a double.
  */
 private static double compare(
     SimpleIterator<DataCount<String>> iterator,
     DataCounter<String> counter,
     int iterSum,
     int counterSum) {
   double corr = 0;
   while (iterator.hasNext()) {
     DataCount<String> current = iterator.next();
     double i = (double) current.count;
     double freq = i / iterSum;
     if (freq < .01 && freq > .0001) {
       double iCounter = (double) counter.getCount(current.data);
       double freqCounter = iCounter / counterSum;
       if (freqCounter < .01 && freqCounter > .0001) {
         corr += Math.pow((freq - freqCounter), 2);
       }
     }
   }
   return corr;
 }