Exemple #1
0
 /**
  * Expands list of files to process into full list of all files that can be found by recursively
  * descending directories.
  */
 void expand(File dir, String[] files, boolean isUpdate) {
   if (files == null) {
     return;
   }
   for (int i = 0; i < files.length; i++) {
     File f;
     if (dir == null) {
       f = new File(files[i]);
     } else {
       f = new File(dir, files[i]);
     }
     if (f.isFile()) {
       if (entries.add(f)) {
         if (isUpdate) entryMap.put(entryName(f.getPath()), f);
       }
     } else if (f.isDirectory()) {
       if (entries.add(f)) {
         if (isUpdate) {
           String dirPath = f.getPath();
           dirPath = (dirPath.endsWith(File.separator)) ? dirPath : (dirPath + File.separator);
           entryMap.put(entryName(dirPath), f);
         }
         expand(f, f.list(), isUpdate);
       }
     } else {
       error(formatMsg("error.nosuch.fileordir", String.valueOf(f)));
       ok = false;
     }
   }
 }
Exemple #2
0
  /*
  	- This method is used has a helper method to run the algo for the archive dataset
  	- Note the archive set has multiple directories ( one for each url )
  	- So Read all of the directories in first and for each directory run the code
  */
  private static void runArchiveSet() throws Exception {

    System.out.println("Running TDDD archive");
    directory = "../../thesis-datasets/datasets2/";
    File file = new File(directory);
    String[] directory_list =
        file.list(
            new FilenameFilter() {
              @Override
              public boolean accept(File current, String name) {
                return new File(current, name).isDirectory(); // make sure its a directory
              }
            });

    int totalRuns = 0; // used to avg the runs in the end
    int total_iter_count =
        0; // this is used check how many times we will iterate through the data so we can make an
           // array of that size
    for (int i = startBoundary; i <= endBoundary; i += increment) total_iter_count++;

    // System.out.println(Arrays.toString(directory_list));
    int sets = 0;
    // make the arrays to hold the respecitve info for the different verions\
    // run it simulateounsly to speed the from the program!
    double[] block_size_list_last_year = new double[total_iter_count];
    double[] ratio_size_list_last_year = new double[total_iter_count];

    double[] block_size_list_six_month = new double[total_iter_count];
    double[] ratio_size_list__six_month = new double[total_iter_count];

    double[] block_size_list_two_year = new double[total_iter_count];
    double[] ratio_size_list_two_year = new double[total_iter_count];

    int current = 0;
    int six_month = 2;
    int last_year = 1;
    int two_year = 3;
    // loop through and run the cdc for each directory
    for (String dir : directory_list) {

      ReadFile.readFile(directory + dir, fileList); // read all the files in this directory
      preliminaryStep(directory + dir + "/"); // call the preliminaryStep on all the files

      totalRuns++;

      totalSize =
          fileArray.get(current)
              .length; // get the length of the file we will be running it against!

      // run it against six month
      startCDC(
          block_size_list_six_month,
          ratio_size_list__six_month,
          fileArray.get(current),
          fileArray.get(six_month),
          hashed_File_List.get(current),
          hashed_File_List.get(six_month));

      // run it against last year
      startCDC(
          block_size_list_last_year,
          ratio_size_list_last_year,
          fileArray.get(current),
          fileArray.get(last_year),
          hashed_File_List.get(current),
          hashed_File_List.get(last_year));

      // run it against 2
      startCDC(
          block_size_list_two_year,
          ratio_size_list_two_year,
          fileArray.get(current),
          fileArray.get(two_year),
          hashed_File_List.get(current),
          hashed_File_List.get(two_year));

      // // clear the fileList and hashed_file_list array
      fileArray.clear();
      hashed_File_List.clear();
      fileList.clear();

      // if (Double.isNaN(ratio_size_list[0])){
      // 	System.out.println(sets+" "+Arrays.toString(ratio_size_list));
      // 	test = true;
      // 	break;
      // }
      if (sets % 200 == 0) System.out.println(sets);
      ++sets;
    } // end of directory list for loop

    // now output the avged value for all the runs
    // System.out.println(Arrays.toString(ratio_size_list));
    System.out.println("Printing six_month");
    int index = 0;
    for (int i = startBoundary; i <= endBoundary; i += increment) {
      // avg out the outputs
      double blockSize = block_size_list_six_month[index] / (double) totalRuns;
      double ratio = ratio_size_list__six_month[index] / (double) totalRuns;
      System.out.println(i + " " + i / 2 + 1 + " " + i / 4 + 1 + " " + blockSize + " " + ratio);
      index++;
    }
    System.out.println("Printing last year");
    index = 0;
    for (int i = startBoundary; i <= endBoundary; i += increment) {
      double blockSize = block_size_list_last_year[index] / (double) totalRuns;
      double ratio = ratio_size_list_last_year[index] / (double) totalRuns;
      System.out.println(i + " " + blockSize + " " + ratio);
      index++;
    }

    System.out.println("Printing two year");
    index = 0;
    for (int i = startBoundary; i <= endBoundary; i += increment) {
      double blockSize = block_size_list_two_year[index] / (double) totalRuns;
      double ratio = ratio_size_list_two_year[index] / (double) totalRuns;
      System.out.println(i + " " + blockSize + " " + ratio);
      index++;
    }
  }