/*
   * index all child directories(only first level directories) in parent directory
   * and indexed data is stored in the same name source directory
   */
  private long indexDirectories(String parent, String[] dirs, String index, SetupParameters Pa)
      throws FileHandlerException, IOException {
    long sumDocs = 0;
    // index each directory in parent directory

    for (int i = 0; i < dirs.length; i++) {
      System.out.println("\t-----FOLDER----- :" + dirs[i].toUpperCase());
      String dir_index = index + "/" + dirs[i];
      if ((index.endsWith("\\")) || (index.endsWith("/"))) {
        dir_index = index + dirs[i];
      }
      Directory di = FSDirectory.getDirectory(new File(dir_index), true);
      Pa.setDir(di);
      Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true));

      //             //get name of directory contains website to index
      //            int begin=dirs[i].lastIndexOf("\\");
      //            if(begin==-1) begin=dirs[i].lastIndexOf("/");
      //            int end=dirs[i].length()-1;
      //            String dir_site=dirs[i].substring(begin, end);
      this.index(dirs[i].toLowerCase(), Pa.getWriter(), new File(parent + "\\" + dirs[i]));

      Pa.getWriter().optimize();
      Pa.getWriter().close();
      IndexReader reader = Pa.getReader().open(Pa.getDir());
      sumDocs += reader.numDocs();
      reader.close();
    }
    return sumDocs;
  }
  public static void main(String[] args) throws Exception {
    if (args.length < 3) {
      usage();
      System.exit(0);
    }

    try {
      Properties prop = new Properties();

      prop.load(
          new FileInputStream(System.getProperty("user.dir") + "/conf/sitehandler.properties"));

      // set parameters
      SetupParameters Pa = new SetupParameters();
      Pa.setAnalyzer(GetAnalyzer.getAnalysis());
      Indexer indexer = new Indexer(prop);

      long start = new Date().getTime();
      long numDocs = 0;
      if (args.length == 3 && args[2].equals("1")) // index 1 directory
      {
        numDocs = indexer.indexDirectory(args[0], args[1], Pa);
      } else if (args.length == 3 && args[2].equals("0")) // index many directory at the time
      {
        Directories d = new Directories();
        String[] dirs = d.getListDirectories(new File(args[0]));
        numDocs = indexer.indexDirectories(args[0], dirs, args[1], Pa);
      }

      long end = new Date().getTime();
      System.out.println("Documents indexed: " + numDocs);
      System.out.println("Total time: " + (end - start) + " ms");
    } catch (FileNotFoundException ex) {
      System.out.println("file 'sitehandler.properties' in config directory not found");
      System.out.println("please check again");
    }
  }
  /*
   * index specific directory-all file in one directory
   */
  private long indexDirectory(String directory, String index, SetupParameters Pa)
      throws FileHandlerException, IOException {
    long sumDocs = 0;
    Directory di = FSDirectory.getDirectory(new File(index), true);
    // RAMDirectory di = new RAMDirectory(new Directory());
    Pa.setDir(di);
    Pa.setWriter(new IndexWriter(Pa.getDir(), Pa.getAnalyzer(), true));

    // get name of directory contains website to index
    int begin = directory.lastIndexOf("\\");
    if (begin == -1) begin = directory.lastIndexOf("/");
    int end = directory.length();
    String dir_site = directory.substring(begin + 1, end).toLowerCase();
    index(dir_site, Pa.getWriter(), new File(directory));

    Pa.getWriter().optimize();
    Pa.getWriter().close();
    IndexReader reader = Pa.getReader().open(Pa.getDir());
    sumDocs += reader.numDocs();
    reader.close();
    return sumDocs;
  }
Beispiel #4
0
  /**
   * Main function
   *
   * @param args Arguments to the main method
   * @throws FileNotFoundException
   * @throws IOException
   */
  public static void main(String[] args) throws FileNotFoundException, IOException {

    if (args.length <= 0) {
      System.err.println("No parameters file");
      System.exit(1);
    }

    SetupParameters global = new SetupParameters();
    global.LoadParameters(args[0]);

    OpenDataset train = new OpenDataset();
    OpenDataset test = null;
    OpenDataset validation = null;

    boolean isTrain;
    train.processClassifierDataset(global.train_file, true);

    global.n_test_patterns = 0;
    global.n_train_patterns = train.getndatos();
    if (global.test_data) {
      test = new OpenDataset();
      test.processClassifierDataset(global.test_file, false);
      global.n_test_patterns = test.getndatos();
    }
    global.n_val_patterns = 0;
    if (global.val_data) {
      validation = new OpenDataset();
      validation.processClassifierDataset(global.val_file, false);
      global.n_val_patterns = validation.getndatos();
    }

    // Assign data and parameters to internal variables
    // Number of inputs
    global.Ninputs = 0;
    for (int i = 0; i < train.getnentradas(); i++) {
      if (train.getTiposAt(i) == 0) {
        Vector in_values = train.getRangosVar(i);
        global.Ninputs += in_values.size();
      } else {
        global.Ninputs++;
      }
    }

    // Number of outputs
    if (train.getTiposAt(train.getnentradas()) != 0) {
      global.Noutputs = train.getnsalidas();
    } else {
      Vector out_values = train.getRangosVar(train.getnentradas());

      global.Noutputs = out_values.size();
    }

    Data data =
        new Data(
            global.Ninputs + global.Noutputs, global.n_train_patterns, global.n_test_patterns, 0);

    Genesis.DatasetToArray(data.train, train);
    if (global.test_data) {
      Genesis.DatasetToArray(data.test, test);
    }
    if (global.val_data) {
      Genesis.DatasetToArray(data.validation, validation);
    }

    if (global.tipify_inputs == true) {
      double mean, sigma, sq_sum; /* Tipify input data. */

      /* Scale input. */
      for (int i = 0; i < global.Ninputs; i++) {
        /* Get the mean and variance. */
        mean = sigma = sq_sum = 0.;

        for (int j = 0; j < global.n_train_patterns; j++) {
          mean += data.train[j][i];
          sq_sum += data.train[j][i] * data.train[j][i];
        }

        mean /= global.n_train_patterns;
        sigma = Math.sqrt(sq_sum / global.n_train_patterns - mean * mean);

        /* Tipify: z = (x - mean)/std. dev. */
        /* If std. dev. is 0 do nothing. */
        if (sigma > 0.000001) {
          for (int j = 0; j < global.n_train_patterns; j++) {
            data.train[j][i] = (data.train[j][i] - mean) / sigma;
          }

          for (int j = 0; j < global.n_test_patterns; j++) {
            data.test[j][i] = (data.test[j][i] - mean) / sigma;
          }
        }
      }
    }

    sonn SelfOrganizingNetwork = new sonn(global, data);

    SelfOrganizingNetwork.SaveNetwork("SONN_Network", global.seed, false);

    if (global.problem.compareToIgnoreCase("Classification") == 0) {
      double result =
          SelfOrganizingNetwork.TestSONNInClassification(
              global, data.train, global.n_train_patterns);
      System.out.print("Train accuracy: " + result + "\t");
      result =
          SelfOrganizingNetwork.TestSONNInClassification(global, data.test, global.n_test_patterns);
      System.out.println("Test accuracy: " + result);
    } else {
      double result =
          SelfOrganizingNetwork.TestSONNInRegression(global, data.train, global.n_train_patterns);
      System.out.print("Train accuracy: " + result + "\t");
      result =
          SelfOrganizingNetwork.TestSONNInRegression(global, data.test, global.n_test_patterns);
      System.out.println("Test accuracy: " + result);
    }

    SelfOrganizingNetwork.SaveOutputFile(
        global.train_output, data.train, global.n_train_patterns, global);
    SelfOrganizingNetwork.SaveOutputFile(
        global.test_output, data.test, global.n_test_patterns, global);
  }
Beispiel #5
0
  /**
   * Constructor that takes only the setup parameters (NOT USED)
   *
   * @param global Global Definition parameters
   * @throws FileNotFoundException
   * @throws IOException
   */
  public Data(SetupParameters global) throws FileNotFoundException, IOException {
    String line;
    int pos1, pos2;

    try {
      // Training data
      FileInputStream file = new FileInputStream(global.train_file);
      BufferedReader f = new BufferedReader(new InputStreamReader(file));

      // Number of patterns
      line = f.readLine();
      global.n_train_patterns = Integer.parseInt(line);

      // Number of inputs
      line = f.readLine();
      global.Ninputs = Integer.parseInt(line);

      // Number of outputs
      line = f.readLine();
      global.Noutputs = Integer.parseInt(line);

      // Read data
      train = new double[global.n_train_patterns][global.Ninputs + global.Noutputs];

      for (int i = 0; i < global.n_train_patterns; i++) {
        line = f.readLine();
        pos1 = 0;
        for (int j = 0; j < global.Ninputs + global.Noutputs - 1; j++) {
          pos2 = line.indexOf(" ", pos1);
          train[i][j] = Double.parseDouble(line.substring(pos1, pos2));
          pos1 = pos2 + 1;
        }
        train[i][global.Ninputs + global.Noutputs - 1] = Double.parseDouble(line.substring(pos1));
      }

      file.close();
    } catch (FileNotFoundException e) {
      System.err.println("Training file does not exist");
      System.exit(-1);
    }

    if (global.test_data) {
      try {
        // Training data
        FileInputStream file = new FileInputStream(global.test_file);
        BufferedReader f = new BufferedReader(new InputStreamReader(file));

        // Number of patterns
        line = f.readLine();
        global.n_test_patterns = Integer.parseInt(line);

        // Number of inputs
        line = f.readLine();
        global.Ninputs = Integer.parseInt(line);

        // Number of outputs
        line = f.readLine();
        global.Noutputs = Integer.parseInt(line);

        // Read data
        test = new double[global.n_test_patterns][global.Ninputs + global.Noutputs];

        for (int i = 0; i < global.n_test_patterns; i++) {
          line = f.readLine();
          pos1 = 0;
          for (int j = 0; j < global.Ninputs + global.Noutputs - 1; j++) {
            pos2 = line.indexOf(" ", pos1);
            test[i][j] = Double.parseDouble(line.substring(pos1, pos2));
            pos1 = pos2 + 1;
          }
          test[i][global.Ninputs + global.Noutputs - 1] = Double.parseDouble(line.substring(pos1));
        }

        file.close();
      } catch (FileNotFoundException f) {
        System.err.println("Testing file does not exist");
        System.exit(-1);
      }
    }

    if (global.val_data) {
      try {
        // Training data
        FileInputStream file = new FileInputStream(global.val_file);
        BufferedReader f = new BufferedReader(new InputStreamReader(file));

        // Number of patterns
        line = f.readLine();
        global.n_val_patterns = Integer.parseInt(line);

        // Number of inputs
        line = f.readLine();
        global.Ninputs = Integer.parseInt(line);

        // Number of outputs
        line = f.readLine();
        global.Noutputs = Integer.parseInt(line);
        global.Nhidden[global.Nhidden_layers] = global.Noutputs;

        // Read data
        validation = new double[global.n_val_patterns][global.Ninputs + global.Noutputs];

        for (int i = 0; i < global.n_val_patterns; i++) {
          line = f.readLine();
          pos1 = 0;
          for (int j = 0; j < global.Ninputs + global.Noutputs - 1; j++) {
            pos2 = line.indexOf(" ", pos1);
            validation[i][j] = Double.parseDouble(line.substring(pos1, pos2));
            pos1 = pos2 + 1;
          }
          validation[i][global.Ninputs + global.Noutputs - 1] =
              Double.parseDouble(line.substring(pos1));
        }

        file.close();
      } catch (FileNotFoundException e) {
        System.err.println("Validation file does not exist");
        System.exit(-1);
      }
    }
  }