/** * Extract the code data from the project * * @param srcDirPath * @param fileType * @return * @throws Exception */ public static SourceCodeCorpus extractCodeData() throws Exception { String srcDirPath = Config.getInstance().getDatasetDir(); String fileType = Config.getInstance().getFileType(); int segmentationLength = Config.getInstance().getSegmentationLength(); SourceCodeCorpus corpus = new SourceCodeCorpus(segmentationLength, fileType); File srcDir = new File(srcDirPath); if (!srcDir.isDirectory()) { System.out.println("The input directory path is invalid"); return corpus; } ArrayList<String> fileList = new ArrayList<String>(); detectAllFiles(srcDirPath, fileType, fileList); for (String oneFilePath : fileList) { SourceCode oneCodeFile = new SourceCode(); FileParser parser = new FileParser(oneFilePath); // set the full class name(package+fileName) String packageName = parser.getPackageName(); String fullClassName = new String(); if (packageName.trim().equals("")) { // no package, the name only fullClassName = new File(oneFilePath).getName(); } else { // full class name = package name + file name fullClassName = packageName + "." + new File(oneFilePath).getName(); } oneCodeFile.setFullClassName(fullClassName); // set the file content String[] terms = parser.getContent(); String fileContent = new String(); for (String term : terms) { String stemmedTerm = Stem.stem(term.toLowerCase()); // term = term.toLowerCase(); if (!(Stopword.isKeyword(term) || Stopword.isEnglishStopword(term))) { fileContent += stemmedTerm + " "; } } // append the class and method names in a file to the end of a file String[] classAndMethodNameString = parser.getClassNameAndMethodName(); for (String term : classAndMethodNameString) { fileContent += Stem.stem(term.toLowerCase()) + " "; } oneCodeFile.setContent(fileContent); // set the class names in the file String classNamesString = parser.getAllClassName(); String[] classNameArray = classNamesString.split(" "); for (String oneClassName : classNameArray) { oneCodeFile.addClassName(oneClassName); } // set the method names in the file String methodNamesString = parser.getAllMethodName(); String[] methodNameArray = methodNamesString.split(" "); for (String oneMethodName : methodNameArray) { oneCodeFile.addMethodName(oneMethodName); } // add the source code file information to the corpus corpus.addSourceCode(oneCodeFile); } // set the original code file count Config.getInstance().setFileCount(corpus.getSourceCodeList().size()); // segment each source code file corpus.segment(); return corpus; }