/** * Import the source code corpus from the given directory * * @param srcDirPath * @return * @throws Exception */ public static SourceCodeCorpus importCodeData() throws Exception { String srcDirPath = Config.getInstance().getCodeCorpusDir(); SourceCodeCorpus corpus = new SourceCodeCorpus(); File srcDir = new File(srcDirPath); if (!srcDir.isDirectory()) { System.out.println("The input directory path is invalid!"); return corpus; } // read the basic information from the "/basicInfo" String basicInfoFilePath = Paths.get(srcDirPath, "basicInfo").toString(); if (!new File(basicInfoFilePath).isFile()) { System.out.println("The file recording the basic information is missing!"); } else { BufferedReader basicInfoReader = new BufferedReader(new FileReader(basicInfoFilePath)); // read the first line: fileType=\tfileType String[] strs = basicInfoReader.readLine().split("\t"); String fileType = strs[1].trim(); // read the second line: segmentationLength=\tsegmentationLength strs = basicInfoReader.readLine().split("\t"); int segmentationLength = Integer.parseInt(strs[1].trim()); corpus.setFileType(fileType); corpus.setSegmentationLength(segmentationLength); basicInfoReader.close(); } // read the code content information from the "/codeContentCorpus" String codeContentDirPath = Paths.get(srcDirPath, "codeContentCorpus").toString(); File codeContentDir = new File(codeContentDirPath); if (!codeContentDir.isDirectory()) { System.out.println("The directory for original code contents is missing!"); } else { for (File oneFile : codeContentDir.listFiles()) { if (oneFile.isFile()) { String fullClassName = oneFile.getName(); BufferedReader reader = new BufferedReader(new FileReader(oneFile)); String codeContent = reader.readLine(); // If exists the full class name, set the code content; // else create a new SourceCode object and set the code consent boolean isExist = false; for (SourceCode sourceCode : corpus.getSourceCodeList()) { if (sourceCode.getFullClassName().equals(fullClassName.trim())) { sourceCode.setContent(codeContent); isExist = true; break; } } if (!isExist) { SourceCode newSourceCode = new SourceCode(); newSourceCode.setFullClassName(fullClassName); newSourceCode.setContent(codeContent); corpus.addSourceCode(newSourceCode); } reader.close(); } } } // read the code segment information from code segment corpus String codeSegmentDirPath = Paths.get(srcDirPath, "codeSegmentCorpus").toString(); File codeSegmentDir = new File(codeSegmentDirPath); if (!codeSegmentDir.isDirectory()) { System.out.println("The directory for code segments is missing!"); } else { for (File oneFile : codeContentDir.listFiles()) { if (oneFile.isFile()) { // fullClassName+@+"segment index".java String oneSegmentName = oneFile.getName(); BufferedReader reader = new BufferedReader(new FileReader(oneFile)); String codeSegment = reader.readLine(); String[] strs = oneSegmentName.split("@"); String fullClassName = strs[0].trim(); // If exists the full class name, add the segment to the list; // else create a new SourceCode object and add the segment to the list boolean isExist = false; for (SourceCode sourceCode : corpus.getSourceCodeList()) { if (sourceCode.getFullClassName().equals(fullClassName.trim())) { if (!sourceCode.getCodeSegmentList().contains(codeSegment)) { sourceCode.addCodeSegment(codeSegment); } isExist = true; break; } } if (!isExist) { SourceCode newSourceCode = new SourceCode(); newSourceCode.setFullClassName(fullClassName); newSourceCode.addCodeSegment(codeSegment); corpus.addSourceCode(newSourceCode); } reader.close(); } } } // read the class names information from the "/classNameCorpus" String classNameDirPath = Paths.get(srcDirPath, "classNameCorpus").toString(); File classNameDir = new File(classNameDirPath); if (!classNameDir.isDirectory()) { System.out.println("The directory for class names is missing!"); } else { for (File oneFile : classNameDir.listFiles()) { if (oneFile.isFile()) { String fullClassName = oneFile.getName(); BufferedReader reader = new BufferedReader(new FileReader(oneFile)); String classNamesString = reader.readLine(); // If exists the full class name, add the class names to the list; // else create a new SourceCode object and add all the class names to the list boolean isExist = false; for (SourceCode sourceCode : corpus.getSourceCodeList()) { if (sourceCode.getFullClassName().equals(fullClassName.trim())) { if (classNamesString == null) { isExist = true; break; } for (String oneClassName : classNamesString.split(" ")) { if (!sourceCode.getClassNameList().contains(oneClassName.trim())) { sourceCode.addClassName(oneClassName.trim()); } } isExist = true; break; } } if (!isExist) { SourceCode newSourceCode = new SourceCode(); newSourceCode.setFullClassName(fullClassName); if (classNamesString != null) { for (String oneClassName : classNamesString.split(" ")) { newSourceCode.addClassName(oneClassName.trim()); } } corpus.addSourceCode(newSourceCode); } reader.close(); } } } // read the method names information from the "/methodNameCorpus" String methodNameDirPath = Paths.get(srcDirPath, "methodNameCorpus").toString(); File methodNameDir = new File(methodNameDirPath); if (!methodNameDir.isDirectory()) { System.out.println("The directory for method names is missing!"); } else { for (File oneFile : methodNameDir.listFiles()) { if (oneFile.isFile()) { String fullClassName = oneFile.getName(); BufferedReader reader = new BufferedReader(new FileReader(oneFile)); String methodNamesString = reader.readLine(); // If exists the full class name, add the class names to the list; // else create a new SourceCode object and add all the method names to the list boolean isExist = false; for (SourceCode sourceCode : corpus.getSourceCodeList()) { if (sourceCode.getFullClassName().equals(fullClassName.trim())) { if (methodNamesString == null) { isExist = true; break; } for (String oneMethodName : methodNamesString.split(" ")) { if (!sourceCode.getMethodNameList().contains(oneMethodName.trim())) sourceCode.addMethodName(oneMethodName.trim()); } isExist = true; break; } } if (!isExist) { SourceCode newSourceCode = new SourceCode(); newSourceCode.setFullClassName(fullClassName); if (methodNamesString != null) { for (String oneMethodName : methodNamesString.split(" ")) { newSourceCode.addMethodName(oneMethodName.trim()); } } corpus.addSourceCode(newSourceCode); } reader.close(); } } } return corpus; }