/**
   * Extract the code data from the project
   *
   * @param srcDirPath
   * @param fileType
   * @return
   * @throws Exception
   */
  public static SourceCodeCorpus extractCodeData() throws Exception {
    String srcDirPath = Config.getInstance().getDatasetDir();
    String fileType = Config.getInstance().getFileType();
    int segmentationLength = Config.getInstance().getSegmentationLength();
    SourceCodeCorpus corpus = new SourceCodeCorpus(segmentationLength, fileType);
    File srcDir = new File(srcDirPath);
    if (!srcDir.isDirectory()) {
      System.out.println("The input directory path is invalid");
      return corpus;
    }
    ArrayList<String> fileList = new ArrayList<String>();
    detectAllFiles(srcDirPath, fileType, fileList);
    for (String oneFilePath : fileList) {
      SourceCode oneCodeFile = new SourceCode();
      FileParser parser = new FileParser(oneFilePath);

      // set the full class name(package+fileName)
      String packageName = parser.getPackageName();
      String fullClassName = new String();
      if (packageName.trim().equals("")) {
        // no package, the name only
        fullClassName = new File(oneFilePath).getName();
      } else {
        // full class name = package name + file name
        fullClassName = packageName + "." + new File(oneFilePath).getName();
      }
      oneCodeFile.setFullClassName(fullClassName);

      // set the file content
      String[] terms = parser.getContent();
      String fileContent = new String();
      for (String term : terms) {
        String stemmedTerm = Stem.stem(term.toLowerCase());
        //				term = term.toLowerCase();
        if (!(Stopword.isKeyword(term) || Stopword.isEnglishStopword(term))) {
          fileContent += stemmedTerm + " ";
        }
      }

      // append the class and method names in a file to the end of a file
      String[] classAndMethodNameString = parser.getClassNameAndMethodName();
      for (String term : classAndMethodNameString) {
        fileContent += Stem.stem(term.toLowerCase()) + " ";
      }
      oneCodeFile.setContent(fileContent);

      // set the class names in the file
      String classNamesString = parser.getAllClassName();
      String[] classNameArray = classNamesString.split(" ");
      for (String oneClassName : classNameArray) {
        oneCodeFile.addClassName(oneClassName);
      }

      // set the method names in the file
      String methodNamesString = parser.getAllMethodName();
      String[] methodNameArray = methodNamesString.split(" ");
      for (String oneMethodName : methodNameArray) {
        oneCodeFile.addMethodName(oneMethodName);
      }

      // add the source code file information to the corpus
      corpus.addSourceCode(oneCodeFile);
    }

    // set the original code file count
    Config.getInstance().setFileCount(corpus.getSourceCodeList().size());

    // segment each source code file
    corpus.segment();

    return corpus;
  }