public static void main(String[] args) throws IOException { String input = FileUtility.textFileToString("input/20.txt"); int minimum = Integer.parseInt(input); // Part one FileUtility.stringToTextFile(Integer.toString(firstHouse(minimum)), "output/20A.txt"); // Part two FileUtility.stringToTextFile(Integer.toString(firstHouseLimited(minimum)), "output/20B.txt"); }
public byte[] getFile(String name) { FileData fileData = files.get(name.toLowerCase()); if (fileData == null) { return null; } if (fileData.data != null) { return fileData.data; } return FileUtility.readFile(fileData.path, debug); }
/** * Creates ZIP file with the files inside directory <code>contentsDir</code> . * * @param newZipFile the ZIP file to create * @param contentsDir the directory containing the files to compress. * @return the created ZIP file. * @throws IOException if something goes wrong with creation of the ZIP file or the reading of the * files to compress. */ public static File createZIPFile(File newZipFile, File contentsDir) throws IOException { List<File> contentAbsoluteFiles = FileUtility.listFilesRecursively(contentsDir); JarOutputStream jarOutputStream = new JarOutputStream(new BufferedOutputStream(new FileOutputStream(newZipFile))); // ZipOutputStream zipOutputStream = new ZipOutputStream( // new BufferedOutputStream(new FileOutputStream(newZipFile))); // Create a buffer for reading the files byte[] buffer = new byte[BUFFER_SIZE]; Iterator<File> iterator = contentAbsoluteFiles.iterator(); while (iterator.hasNext()) { File absoluteFile = iterator.next(); String relativeFile = getFilePathRelativeTo(absoluteFile, contentsDir); BufferedInputStream in = new BufferedInputStream(new FileInputStream(absoluteFile)); // Add ZIP entry to output stream. // zipOutputStream.putNextEntry(new // ZipEntry(relativeFile.toString())); jarOutputStream.putNextEntry(new JarEntry(relativeFile)); logger.trace("Adding " + relativeFile); int length; while ((length = in.read(buffer)) > 0) { // zipOutputStream.write(buffer, 0, length); jarOutputStream.write(buffer, 0, length); } // Complete the entry // zipOutputStream.closeEntry(); jarOutputStream.closeEntry(); in.close(); } // Complete the ZIP file // zipOutputStream.close(); jarOutputStream.close(); return newZipFile; }
public void testFileRename() { try { String source = "C:\\Data\\Images\\renameToTestFile"; FileUtility src = new FileUtility(source); if (!src.exists()) { src.createNewFile(); } String dest = "C:\\Data\\Videos\\notpresent\\renamed"; assertTrue("Rename failed", src.renameTo(dest)); FileUtility newOne = new FileUtility(dest); assertTrue("New File not created", newOne.exists()); } catch (Exception e) { assertTrue("Test Failed. Unexpected Exception: " + e, false); } }
@Test public void testGetFileCount() { int fileCount = FileUtility.getFileCount(new File(this.getClass().getResource("/dir1").getFile())); assertThat(fileCount, CoreMatchers.is(6)); }
/** * @param isSimplifyEntity * @param parsedFileName * @param aimedDataFileName * @param outputFile * @param medtType * @param isRemoveOverlappingEntities * @throws Exception */ public void generateTPWFvectorOutput( ArrayList<Sentence> listSentence, String outputFile, int medtType, String entPairFileName, ClauseAnalyser.eDataFilterOption relToBeConsidered, String inClauseBoundFileName) throws Exception { boolean useWalkFeatures = true, useRegExPatterns = false, useDepPatterns = true, useTriggers = true, useNegativeCues = true, discardDepRelUsingProbabilityInReducedGraph = false, triggersFromWholeRGinsteadOfLCP = true; if (TextUtility.isEmptyString(TKOutputGenerator.triggerFileName)) useTriggers = false; String str = ""; if (discardDepRelUsingProbabilityInReducedGraph) str += "discardDepRelUsingProbabilityInReducedGraph "; if (useWalkFeatures) str += "WalkFeatures "; if (useRegExPatterns) str += "RegExPatterns "; if (useDepPatterns) str += "DepPatterns "; if (useTriggers) str += "Triggers "; if (triggersFromWholeRGinsteadOfLCP) str += "TriggersFromWholeRGinsteadOfLCP "; if (useNegativeCues) str += "NegativeCues "; System.out.println(str); PatternsDepRelFromGraph clsWVG = new PatternsDepRelFromGraph(); if (PatternsDepRelFromGraph.listOfAllPatterns.size() == 0) { clsWVG.collectAllDepRelPatternsFromTrainData( listSentence, discardDepRelUsingProbabilityInReducedGraph); } int[][] arrClauseBoundOfSen = new TKOutputPST().getClauseBoundOfAllSen(inClauseBoundFileName); FileUtility.writeInFile(outputFile, "", false); // read trigger word list Triggers.readTriggersAndNegativeWord(); for (int s = 0; s < listSentence.size(); s++) { Sentence objCurSen = listSentence.get(s); int senIndex = TKOutputPST.listAllSenIDs.indexOf(objCurSen.senID); // only those sentences are taken into account which has more than one entity annotations if (objCurSen.listOfEntities.size() > 1) { generateVectorForSen( objCurSen, medtType, entPairFileName, discardDepRelUsingProbabilityInReducedGraph, useWalkFeatures, useRegExPatterns, useDepPatterns, useTriggers, triggersFromWholeRGinsteadOfLCP, useNegativeCues, relToBeConsidered, senIndex > 0 ? arrClauseBoundOfSen[senIndex] : null); } } FileUtility.writeInFile(outputFile, GenericFeatVect.getInstanceVectors(), false); }
/** * @param isSimplifyEntity * @param tokenWithPos * @param senID * @param sentence * @param listEnt * @param listRel * @param listDependencies * @param medtType * @param isResolveOverlappingEntities * @param relToBeConsidered * @param arrClauseBoundOfSen * @return * @throws IOException */ public boolean setInpVectFromDepGraphOfPairsAndTrigger( Relation objRel, Sentence objCurSen, int medtType, boolean discardDepRelUsingProbabilityInReducedGraph, boolean useWalkFeatures, boolean useRegExPatterns, boolean useDepPatterns, boolean useTriggers, boolean triggersFromWholeRGinsteadOfLCP, boolean useNegativeCues, Entity e1, Entity e2) throws IOException { ArrayList<Integer> listFeatIndsOfCurInp = new ArrayList<Integer>(), listFeatCountOfCurInp = new ArrayList<Integer>(); DepTreeNode headOfEnt1 = objCurSen.depTree.getHeadWordFromWordBoundaries( e1.getAllWordIndexes(), true, objCurSen), headOfEnt2 = objCurSen.depTree.getHeadWordFromWordBoundaries( e2.getAllWordIndexes(), true, objCurSen); DepTreeNode dn = null; // System.out.println(e1.id + " " + e2.id); // All nodes in the shortest path connecting the target pairs must be retained // All nodes satisfying the 3 rules of MEDT kernel must be retained dn = objCurSen .depTree .clone() .findMinimalSubTreeWithEntities(false, headOfEnt1, medtType, headOfEnt2); /* * If there is no minimal subtree/path between target entities then we do not consider to generate * instance for training/testing. */ if (dn == null) { FileUtility.writeInFile( GenericFeatVect.vectOutFile, e1.id + " " + e1.name + "\t" + e2.id + " " + e2.name + "\n\n", true); FileUtility.writeInFile(GenericFeatVect.vectOutFile, objCurSen.text + "\n\n", true); return false; } GenericFeatVect.listOfAllInstances.add( e1.id + " " + e1.name + "\t" + e2.id + " " + e2.name + "\n\n" + objCurSen.text + "\n\n"); // Construct feature set using e-walks and v-walks if (dn != null && useWalkFeatures) { createNgramFeatures(objCurSen.depTree, 1, e1, listFeatIndsOfCurInp, listFeatCountOfCurInp); createNgramFeatures(objCurSen.depTree, 1, e2, listFeatIndsOfCurInp, listFeatCountOfCurInp); // System.out.println(objCurSen.senID + " " + dn.wordIndex); objCurSen.depTree.replaceEntitiesWithDummies( dn, e1.boundaries, e2.boundaries, new ArrayList<Integer>()); createFeaturesFromInputGraph( objCurSen.depTree, dn, "", listFeatIndsOfCurInp, listFeatCountOfCurInp, new ArrayList<Integer>(), e1.boundaries, e2.boundaries); } if (useRegExPatterns) { matchPPIpatternOnSentence( objCurSen.text, e1.name, e2.name, listFeatIndsOfCurInp, listFeatCountOfCurInp); matchPPIpatternOnSentence( objCurSen.text, e2.name, e1.name, listFeatIndsOfCurInp, listFeatCountOfCurInp); } ArrayList<String> listOfDepRelsInReducedGraph = new ArrayList<String>(); ArrayList<ArrayList<Integer>> listNodesAndLCP = new PatternsDepRelFromGraph() .extractDepRelsInReducedGraph( objRel, objCurSen, listOfDepRelsInReducedGraph, discardDepRelUsingProbabilityInReducedGraph, false); if (useDepPatterns) extractDepPatternFeatures( listOfDepRelsInReducedGraph, listFeatIndsOfCurInp, listFeatCountOfCurInp); // NOTE: listNodesAndLCP has two elements - (0) all the nodes in Reduced graph, and (1) the // least common parents (LCPs) in Reduced graph. if (listNodesAndLCP == null) listNodesAndLCP = new ArrayList<ArrayList<Integer>>(); if (useTriggers && listNodesAndLCP.size() > 0) { if (triggersFromWholeRGinsteadOfLCP) for (int i = 0; i < listNodesAndLCP.get(0).size(); i++) addTriggerWordFeatures( objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].word, listFeatIndsOfCurInp, listFeatCountOfCurInp); else for (int i = 0; i < listNodesAndLCP.get(1).size(); i++) addTriggerWordFeatures( objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(1).get(i)].word, listFeatIndsOfCurInp, listFeatCountOfCurInp); } // * if (useNegativeCues && listNodesAndLCP.size() > 0) { for (int i = 0; i < listNodesAndLCP.get(0).size(); i++) { // System.out.println(objCurSen.senID + " " + dn.wordIndex); // addNegativeWordFeatures( graph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].word, // listFeatIndsOfCurInp, listFeatCountOfCurInp); if (Triggers.listOfNegativeWords.contains( objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma)) { // add a feature indicating that there is a negative word in the reduced graph String[] feature = new String[] { "HasNegWord@$" + objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma, }; GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1); if (objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1].lemma .equalsIgnoreCase("be") || objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1].pos .equalsIgnoreCase("IN")) { feature = new String[] { "WordNextToNegCue@$" + objCurSen .depGraph .allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 2] .lemma, }; } else { feature = new String[] { "WordNextToNegCue@$" + objCurSen .depGraph .allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1] .lemma, }; } GenericFeatVect.addNewFeatureInList( feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1); // * // extract negation scope features if (objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma.matches( "(no|not)")) extractNegationScopeFeatures( objCurSen, listFeatIndsOfCurInp, listFeatCountOfCurInp, e1, e2, listNodesAndLCP.get(0).get(i), listNodesAndLCP.get(0)); // */ } } } // */ // add Zhou et al. 2005 features new ExtAceFeatVect() .getZhouEtAl2005FeatVal(objCurSen, e1, e2, listFeatIndsOfCurInp, listFeatCountOfCurInp); extractNonTargetEntityFeatures(objCurSen, listFeatIndsOfCurInp, listFeatCountOfCurInp, e1, e2); GenericFeatVect.sortFeatValByIndx(listFeatIndsOfCurInp, listFeatCountOfCurInp); GenericFeatVect.listOfAllInstancesWithFeat.add(listFeatIndsOfCurInp); GenericFeatVect.listOfAllInstancesWithFeatCount.add(listFeatCountOfCurInp); return true; }
/** * @param tokenWithPos * @param senID * @param sentence * @param listEnt * @param listRel * @param listDependencies * @param medtType * @param entPairFileName * @return * @throws IOException */ private void generateVectorForSen( Sentence objCurSen, int medtType, String entPairFileName, boolean discardDepRelUsingProbabilityInReducedGraph, boolean useWalkFeatures, boolean useRegExPatterns, boolean useDepPatterns, boolean useTriggers, boolean triggersFromWholeRGinsteadOfLCP, boolean useNegativeCues, ClauseAnalyser.eDataFilterOption relToBeConsidered, int[] arrClauseBoundOfSen) throws IOException { // for each pair of entities, find minimal subtrees and output it with 1 or 0 // 1 represents there exists a relation between those entities for (int r = 0; r < objCurSen.listRels.size(); r++) { Entity e1 = objCurSen.getEntityById(objCurSen.listRels.get(r).arg1); Entity e2 = objCurSen.getEntityById(objCurSen.listRels.get(r).arg2); // checking relation type if (TKOutputPST.skipInstance(arrClauseBoundOfSen, relToBeConsidered, e1, e2, objCurSen, r)) continue; if (!objCurSen.listRels.get(r).isPositive) TKOutputPST.totalRelNeg++; else TKOutputPST.totalRelPos++; boolean isSet = setInpVectFromDepGraphOfPairsAndTrigger( objCurSen.listRels.get(r), objCurSen, medtType, discardDepRelUsingProbabilityInReducedGraph, useWalkFeatures, useRegExPatterns, useDepPatterns, useTriggers, triggersFromWholeRGinsteadOfLCP, useNegativeCues, e1, e2); if (isSet) GenericFeatVect.listOfAllInstancePolarity.add( objCurSen.listRels.get(r).isPositive ? 1 : -1); // */ if (!TextUtility.isEmptyString(entPairFileName)) { if (isSet) FileUtility.writeInFile(entPairFileName, e1.id + "\t" + e2.id + "\n", true); /* //if ( !str.isEmpty() ) if ( !isSet ) FileUtility.writeInFile(entPairFileName, objCurSen.listRels.get(r).printString() + "\tFOUND\n", true); else { FileUtility.writeInFile(entPairFileName, objCurSen.listRels.get(r).printString() + "\tNOT_FOUND\n", true); } */ } } }
/** * Remove the words that are in the same NP of an entity. Note: This particular pre-processing * didn't improve results. * * @param bioRelExInpFile * @param psgParsedFileName * @throws Exception */ public void removeOtherWordsInLeastNPofEnt(String bioRelExInpFile, String psgParsedFileName) throws Exception { ArrayList<ArrayList<String>> allSen = FileUtility.readAllMultiLineInputs(bioRelExInpFile); ArrayList<CFGParseOfSen> listCFGParseOfAllSen = CFGParseOfSen.readCFGParseForAllSen(psgParsedFileName); FileUtility.writeInFile(bioRelExInpFile, "", false); String psg = "", prevId = ""; for (int s = 0; s < allSen.size(); s = s + 2) { StringBuilder sb = new StringBuilder(); if (!prevId.equals(allSen.get(s).get(0))) psg = CFGParseOfSen.getBySenId(listCFGParseOfAllSen, allSen.get(s).get(0)).psgParse; prevId = allSen.get(s).get(0); // find the least phrasal category for each token String[] tmp = psg.replaceAll("\\)\\)", ") ()").replaceAll("\\s+", " ").trim().split("\\("); ArrayList<String[]> listTokPhraseCat = new ArrayList<String[]>(); int phraseCatIndex = 0, rBrack = 0; // 1st element in tmp is empty for (int i = 1; i < tmp.length; i++) { String[] str = tmp[i].trim().split("\\s+"); rBrack = 0; if (str.length > 1) { for (int k = listTokPhraseCat.size() - 1; k >= 0; k--) { if (listTokPhraseCat.get(k).length == 1 && !listTokPhraseCat.get(k)[0].equals(")") && rBrack == 0) { // str[0] = str[1]; str[1] = listTokPhraseCat.get(k)[0]; listTokPhraseCat.add(str); break; } else if (listTokPhraseCat.get(k).length == 1 && listTokPhraseCat.get(k)[0].equals(")")) rBrack++; else if (listTokPhraseCat.get(k).length == 1 && !listTokPhraseCat.get(k)[0].equals(")") && rBrack != 0) rBrack--; } } else if (str.length == 1 && !str[0].equals(")")) { str[0] = str[0] + "-" + phraseCatIndex; listTokPhraseCat.add(str); phraseCatIndex++; } else if (str.length == 1 && str[0].equals(")")) listTokPhraseCat.add(str); } // remove all elements other than words for (int i = listTokPhraseCat.size() - 1; i >= 0; i--) { if (listTokPhraseCat.get(i).length == 1) { listTokPhraseCat.remove(i); } } // remove the words which are in the least NP of a protein // the 1st item in the list is sentence id // search from the 2nd token = 3rd item in the list for (int i = 2; i < allSen.get(s).size(); i++) { tmp = allSen.get(s).get(i).split("\\s+"); if (!tmp[3].equals("O") && listTokPhraseCat.get(i - 1)[1].contains("NP-")) { // checking previous token if (i > 1 && !(allSen.get(s).get(i - 1).contains("\tB-e") || allSen.get(s).get(i - 1).contains("\tI-e")) && listTokPhraseCat.get(i - 1)[1].equals(listTokPhraseCat.get(i - 1 - 1)[1]) // && !listTokPhraseCat.get(i-1-1)[0].equals("DT") ) { listTokPhraseCat.remove(i - 1 - 1); allSen.get(s).remove(i - 1); i--; } /*/ checking next token else if ( i < allSen.get(s).size() && listTokPhraseCat.get(i-1)[1].equals(listTokPhraseCat.get(i)[1]) ) { listTokPhraseCat.remove(i); allSen.remove(s).get(i); i--; }*/ } } // re-writing the input for (int i = 0; i < allSen.get(s).size(); i++) { sb.append(allSen.get(s).get(i) + "\n"); } sb.append("\n"); sb.append(allSen.get(s + 1).get(0) + "\n"); sb.append("\n"); FileUtility.writeInFile(bioRelExInpFile, sb.toString(), true); } }
public static void main(String args[]) { System.out.println("Solution to HomeWork2 -- Machine learning"); System.out.println("Implementation of ID3 Algorithm : "); System.out.println("=========================================="); // Parse arguments int L = Integer.parseInt(args[0]); int K = Integer.parseInt(args[1]); String trainingset = args[2]; String validationset = args[3]; String testset = args[4]; boolean toPrint = args[5].equalsIgnoreCase("yes"); // ------------------------------------------------------------READING CSV FILE // ------------------------------------------------------------------------------// FileUtility fu1 = new FileUtility(); parsedTrainingAttributes = fu1.parseCSVFile(trainingset); parsedValidationAttributes = fu1.parseCSVFile(validationset); parsedTestAttributes = fu1.parseCSVFile(testset); System.out.println("Read Successfull.........."); System.out.println(); // ------------------------------------------------------------PRINTING CSV FILE // ------------------------------------------------------------------------------// // Test print fu1.printSets(parsedTrainingAttributes); // ------------------------------------ Build tree using information gain and print its accuracy // over test set ------------------------------------------------// DecisionTree dtree = new DecisionTree(); dtree.buildTree(parsedTrainingAttributes, new Node()); // ------------------------------------------------------------PRINTING-------------------------------------------------------------------------------------------------------// System.out.println( "----------------------------------------------------------------------------------"); AccuracyCalculator ac1 = new AccuracyCalculator(); // Printing the decision tree and accuracy before pruning if (toPrint) { System.out.println("Decision tree before pruning: "); System.out.println(""); System.out.println(dtree); System.out.println( "Accuracy of decision tree before pruning : " + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode) + "%"); System.out.println("Total Matched classes : " + (int) ac1.matchCount); System.out.println( "----------------------------------------------------------------------------------"); System.out.println(""); // Perform Post pruning the tree try { dtree.performPostPruning(L, K, parsedValidationAttributes); } catch (CloneNotSupportedException e) { e.printStackTrace(); } System.out.println("Decision tree after pruning: "); System.out.println(""); System.out.println(dtree); System.out.println( "Accuracy of decision tree after pruning: " + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode) + "%"); System.out.println("Total Matched classes : " + (int) ac1.matchCount); System.out.println( "----------------------------------------------------------------------------------"); } else { System.out.println( "Accuracy of decision tree before pruning : " + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode) + "%"); System.out.println("Total Matched classes : " + (int) ac1.matchCount); System.out.println(""); // Perform Post pruning the tree try { dtree.performPostPruning(L, K, parsedValidationAttributes); } catch (CloneNotSupportedException e) { e.printStackTrace(); } System.out.println( "Accuracy of decision tree after pruning: " + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode) + "%"); System.out.println("Total Matched classes : " + (int) ac1.matchCount); System.out.println( "----------------------------------------------------------------------------------"); } }