Esempio n. 1
0
  public static void main(String[] args) throws IOException {
    String input = FileUtility.textFileToString("input/20.txt");

    int minimum = Integer.parseInt(input);

    // Part one
    FileUtility.stringToTextFile(Integer.toString(firstHouse(minimum)), "output/20A.txt");

    // Part two
    FileUtility.stringToTextFile(Integer.toString(firstHouseLimited(minimum)), "output/20B.txt");
  }
Esempio n. 2
0
  public byte[] getFile(String name) {
    FileData fileData = files.get(name.toLowerCase());
    if (fileData == null) {
      return null;
    }
    if (fileData.data != null) {
      return fileData.data;
    }

    return FileUtility.readFile(fileData.path, debug);
  }
Esempio n. 3
0
  /**
   * Creates ZIP file with the files inside directory <code>contentsDir</code> .
   *
   * @param newZipFile the ZIP file to create
   * @param contentsDir the directory containing the files to compress.
   * @return the created ZIP file.
   * @throws IOException if something goes wrong with creation of the ZIP file or the reading of the
   *     files to compress.
   */
  public static File createZIPFile(File newZipFile, File contentsDir) throws IOException {

    List<File> contentAbsoluteFiles = FileUtility.listFilesRecursively(contentsDir);

    JarOutputStream jarOutputStream =
        new JarOutputStream(new BufferedOutputStream(new FileOutputStream(newZipFile)));
    // ZipOutputStream zipOutputStream = new ZipOutputStream(
    // new BufferedOutputStream(new FileOutputStream(newZipFile)));

    // Create a buffer for reading the files
    byte[] buffer = new byte[BUFFER_SIZE];

    Iterator<File> iterator = contentAbsoluteFiles.iterator();
    while (iterator.hasNext()) {
      File absoluteFile = iterator.next();
      String relativeFile = getFilePathRelativeTo(absoluteFile, contentsDir);

      BufferedInputStream in = new BufferedInputStream(new FileInputStream(absoluteFile));

      // Add ZIP entry to output stream.
      // zipOutputStream.putNextEntry(new
      // ZipEntry(relativeFile.toString()));
      jarOutputStream.putNextEntry(new JarEntry(relativeFile));

      logger.trace("Adding " + relativeFile);

      int length;
      while ((length = in.read(buffer)) > 0) {
        // zipOutputStream.write(buffer, 0, length);
        jarOutputStream.write(buffer, 0, length);
      }

      // Complete the entry
      // zipOutputStream.closeEntry();
      jarOutputStream.closeEntry();
      in.close();
    }

    // Complete the ZIP file
    // zipOutputStream.close();
    jarOutputStream.close();

    return newZipFile;
  }
  public void testFileRename() {
    try {
      String source = "C:\\Data\\Images\\renameToTestFile";
      FileUtility src = new FileUtility(source);
      if (!src.exists()) {
        src.createNewFile();
      }

      String dest = "C:\\Data\\Videos\\notpresent\\renamed";

      assertTrue("Rename failed", src.renameTo(dest));
      FileUtility newOne = new FileUtility(dest);
      assertTrue("New File not created", newOne.exists());
    } catch (Exception e) {
      assertTrue("Test Failed. Unexpected Exception: " + e, false);
    }
  }
 @Test
 public void testGetFileCount() {
   int fileCount =
       FileUtility.getFileCount(new File(this.getClass().getResource("/dir1").getFile()));
   assertThat(fileCount, CoreMatchers.is(6));
 }
Esempio n. 6
0
  /**
   * @param isSimplifyEntity
   * @param parsedFileName
   * @param aimedDataFileName
   * @param outputFile
   * @param medtType
   * @param isRemoveOverlappingEntities
   * @throws Exception
   */
  public void generateTPWFvectorOutput(
      ArrayList<Sentence> listSentence,
      String outputFile,
      int medtType,
      String entPairFileName,
      ClauseAnalyser.eDataFilterOption relToBeConsidered,
      String inClauseBoundFileName)
      throws Exception {

    boolean useWalkFeatures = true,
        useRegExPatterns = false,
        useDepPatterns = true,
        useTriggers = true,
        useNegativeCues = true,
        discardDepRelUsingProbabilityInReducedGraph = false,
        triggersFromWholeRGinsteadOfLCP = true;

    if (TextUtility.isEmptyString(TKOutputGenerator.triggerFileName)) useTriggers = false;

    String str = "";
    if (discardDepRelUsingProbabilityInReducedGraph)
      str += "discardDepRelUsingProbabilityInReducedGraph ";
    if (useWalkFeatures) str += "WalkFeatures ";
    if (useRegExPatterns) str += "RegExPatterns ";
    if (useDepPatterns) str += "DepPatterns ";
    if (useTriggers) str += "Triggers ";
    if (triggersFromWholeRGinsteadOfLCP) str += "TriggersFromWholeRGinsteadOfLCP ";
    if (useNegativeCues) str += "NegativeCues ";

    System.out.println(str);

    PatternsDepRelFromGraph clsWVG = new PatternsDepRelFromGraph();
    if (PatternsDepRelFromGraph.listOfAllPatterns.size() == 0) {
      clsWVG.collectAllDepRelPatternsFromTrainData(
          listSentence, discardDepRelUsingProbabilityInReducedGraph);
    }

    int[][] arrClauseBoundOfSen = new TKOutputPST().getClauseBoundOfAllSen(inClauseBoundFileName);

    FileUtility.writeInFile(outputFile, "", false);

    // read trigger word list
    Triggers.readTriggersAndNegativeWord();

    for (int s = 0; s < listSentence.size(); s++) {

      Sentence objCurSen = listSentence.get(s);
      int senIndex = TKOutputPST.listAllSenIDs.indexOf(objCurSen.senID);

      // only those sentences are taken into account which has more than one entity annotations
      if (objCurSen.listOfEntities.size() > 1) {
        generateVectorForSen(
            objCurSen,
            medtType,
            entPairFileName,
            discardDepRelUsingProbabilityInReducedGraph,
            useWalkFeatures,
            useRegExPatterns,
            useDepPatterns,
            useTriggers,
            triggersFromWholeRGinsteadOfLCP,
            useNegativeCues,
            relToBeConsidered,
            senIndex > 0 ? arrClauseBoundOfSen[senIndex] : null);
      }
    }

    FileUtility.writeInFile(outputFile, GenericFeatVect.getInstanceVectors(), false);
  }
Esempio n. 7
0
  /**
   * @param isSimplifyEntity
   * @param tokenWithPos
   * @param senID
   * @param sentence
   * @param listEnt
   * @param listRel
   * @param listDependencies
   * @param medtType
   * @param isResolveOverlappingEntities
   * @param relToBeConsidered
   * @param arrClauseBoundOfSen
   * @return
   * @throws IOException
   */
  public boolean setInpVectFromDepGraphOfPairsAndTrigger(
      Relation objRel,
      Sentence objCurSen,
      int medtType,
      boolean discardDepRelUsingProbabilityInReducedGraph,
      boolean useWalkFeatures,
      boolean useRegExPatterns,
      boolean useDepPatterns,
      boolean useTriggers,
      boolean triggersFromWholeRGinsteadOfLCP,
      boolean useNegativeCues,
      Entity e1,
      Entity e2)
      throws IOException {

    ArrayList<Integer> listFeatIndsOfCurInp = new ArrayList<Integer>(),
        listFeatCountOfCurInp = new ArrayList<Integer>();

    DepTreeNode
        headOfEnt1 =
            objCurSen.depTree.getHeadWordFromWordBoundaries(
                e1.getAllWordIndexes(), true, objCurSen),
        headOfEnt2 =
            objCurSen.depTree.getHeadWordFromWordBoundaries(
                e2.getAllWordIndexes(), true, objCurSen);

    DepTreeNode dn = null;

    //	System.out.println(e1.id + "  " + e2.id);

    // All nodes in the shortest path connecting the target pairs must be retained
    // All nodes satisfying the 3 rules of MEDT kernel must be retained
    dn =
        objCurSen
            .depTree
            .clone()
            .findMinimalSubTreeWithEntities(false, headOfEnt1, medtType, headOfEnt2);

    /*
     *  If there is no minimal subtree/path between target entities then we do not consider to generate
     *  instance for training/testing.
     */
    if (dn == null) {
      FileUtility.writeInFile(
          GenericFeatVect.vectOutFile,
          e1.id + " " + e1.name + "\t" + e2.id + " " + e2.name + "\n\n",
          true);

      FileUtility.writeInFile(GenericFeatVect.vectOutFile, objCurSen.text + "\n\n", true);

      return false;
    }

    GenericFeatVect.listOfAllInstances.add(
        e1.id + " " + e1.name + "\t" + e2.id + " " + e2.name + "\n\n" + objCurSen.text + "\n\n");

    // Construct feature set using e-walks and v-walks
    if (dn != null && useWalkFeatures) {

      createNgramFeatures(objCurSen.depTree, 1, e1, listFeatIndsOfCurInp, listFeatCountOfCurInp);
      createNgramFeatures(objCurSen.depTree, 1, e2, listFeatIndsOfCurInp, listFeatCountOfCurInp);

      // System.out.println(objCurSen.senID + "  " + dn.wordIndex);
      objCurSen.depTree.replaceEntitiesWithDummies(
          dn, e1.boundaries, e2.boundaries, new ArrayList<Integer>());
      createFeaturesFromInputGraph(
          objCurSen.depTree,
          dn,
          "",
          listFeatIndsOfCurInp,
          listFeatCountOfCurInp,
          new ArrayList<Integer>(),
          e1.boundaries,
          e2.boundaries);
    }

    if (useRegExPatterns) {
      matchPPIpatternOnSentence(
          objCurSen.text, e1.name, e2.name, listFeatIndsOfCurInp, listFeatCountOfCurInp);
      matchPPIpatternOnSentence(
          objCurSen.text, e2.name, e1.name, listFeatIndsOfCurInp, listFeatCountOfCurInp);
    }

    ArrayList<String> listOfDepRelsInReducedGraph = new ArrayList<String>();

    ArrayList<ArrayList<Integer>> listNodesAndLCP =
        new PatternsDepRelFromGraph()
            .extractDepRelsInReducedGraph(
                objRel,
                objCurSen,
                listOfDepRelsInReducedGraph,
                discardDepRelUsingProbabilityInReducedGraph,
                false);

    if (useDepPatterns)
      extractDepPatternFeatures(
          listOfDepRelsInReducedGraph, listFeatIndsOfCurInp, listFeatCountOfCurInp);

    //  NOTE: listNodesAndLCP has two elements - (0) all the nodes in Reduced graph, and (1) the
    // least common parents (LCPs) in Reduced graph.

    if (listNodesAndLCP == null) listNodesAndLCP = new ArrayList<ArrayList<Integer>>();

    if (useTriggers && listNodesAndLCP.size() > 0) {
      if (triggersFromWholeRGinsteadOfLCP)
        for (int i = 0; i < listNodesAndLCP.get(0).size(); i++)
          addTriggerWordFeatures(
              objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].word,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp);
      else
        for (int i = 0; i < listNodesAndLCP.get(1).size(); i++)
          addTriggerWordFeatures(
              objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(1).get(i)].word,
              listFeatIndsOfCurInp,
              listFeatCountOfCurInp);
    }
    // *
    if (useNegativeCues && listNodesAndLCP.size() > 0) {

      for (int i = 0; i < listNodesAndLCP.get(0).size(); i++) {
        // System.out.println(objCurSen.senID + "  " + dn.wordIndex);
        // addNegativeWordFeatures( graph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].word,
        // listFeatIndsOfCurInp, listFeatCountOfCurInp);

        if (Triggers.listOfNegativeWords.contains(
            objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma)) {

          // add a feature indicating that there is a negative word in the reduced graph
          String[] feature =
              new String[] {
                "HasNegWord@$"
                    + objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma,
              };

          GenericFeatVect.addNewFeatureInList(
              feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);

          if (objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1].lemma
                  .equalsIgnoreCase("be")
              || objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1].pos
                  .equalsIgnoreCase("IN")) {
            feature =
                new String[] {
                  "WordNextToNegCue@$"
                      + objCurSen
                          .depGraph
                          .allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 2]
                          .lemma,
                };
          } else {
            feature =
                new String[] {
                  "WordNextToNegCue@$"
                      + objCurSen
                          .depGraph
                          .allNodesByWordIndex[listNodesAndLCP.get(0).get(i) + 1]
                          .lemma,
                };
          }

          GenericFeatVect.addNewFeatureInList(
              feature, 1, listFeatIndsOfCurInp, listFeatCountOfCurInp, 1);

          // *
          // extract negation scope features
          if (objCurSen.depGraph.allNodesByWordIndex[listNodesAndLCP.get(0).get(i)].lemma.matches(
              "(no|not)"))
            extractNegationScopeFeatures(
                objCurSen,
                listFeatIndsOfCurInp,
                listFeatCountOfCurInp,
                e1,
                e2,
                listNodesAndLCP.get(0).get(i),
                listNodesAndLCP.get(0));
          // */

        }
      }
    }
    // */

    // add Zhou et al. 2005 features
    new ExtAceFeatVect()
        .getZhouEtAl2005FeatVal(objCurSen, e1, e2, listFeatIndsOfCurInp, listFeatCountOfCurInp);

    extractNonTargetEntityFeatures(objCurSen, listFeatIndsOfCurInp, listFeatCountOfCurInp, e1, e2);

    GenericFeatVect.sortFeatValByIndx(listFeatIndsOfCurInp, listFeatCountOfCurInp);

    GenericFeatVect.listOfAllInstancesWithFeat.add(listFeatIndsOfCurInp);
    GenericFeatVect.listOfAllInstancesWithFeatCount.add(listFeatCountOfCurInp);

    return true;
  }
Esempio n. 8
0
  /**
   * @param tokenWithPos
   * @param senID
   * @param sentence
   * @param listEnt
   * @param listRel
   * @param listDependencies
   * @param medtType
   * @param entPairFileName
   * @return
   * @throws IOException
   */
  private void generateVectorForSen(
      Sentence objCurSen,
      int medtType,
      String entPairFileName,
      boolean discardDepRelUsingProbabilityInReducedGraph,
      boolean useWalkFeatures,
      boolean useRegExPatterns,
      boolean useDepPatterns,
      boolean useTriggers,
      boolean triggersFromWholeRGinsteadOfLCP,
      boolean useNegativeCues,
      ClauseAnalyser.eDataFilterOption relToBeConsidered,
      int[] arrClauseBoundOfSen)
      throws IOException {

    // for each pair of entities, find minimal subtrees and output it with 1 or 0
    // 1 represents there exists a relation between those entities
    for (int r = 0; r < objCurSen.listRels.size(); r++) {

      Entity e1 = objCurSen.getEntityById(objCurSen.listRels.get(r).arg1);
      Entity e2 = objCurSen.getEntityById(objCurSen.listRels.get(r).arg2);

      // checking relation type
      if (TKOutputPST.skipInstance(arrClauseBoundOfSen, relToBeConsidered, e1, e2, objCurSen, r))
        continue;

      if (!objCurSen.listRels.get(r).isPositive) TKOutputPST.totalRelNeg++;
      else TKOutputPST.totalRelPos++;

      boolean isSet =
          setInpVectFromDepGraphOfPairsAndTrigger(
              objCurSen.listRels.get(r),
              objCurSen,
              medtType,
              discardDepRelUsingProbabilityInReducedGraph,
              useWalkFeatures,
              useRegExPatterns,
              useDepPatterns,
              useTriggers,
              triggersFromWholeRGinsteadOfLCP,
              useNegativeCues,
              e1,
              e2);

      if (isSet)
        GenericFeatVect.listOfAllInstancePolarity.add(
            objCurSen.listRels.get(r).isPositive ? 1 : -1);
      // */

      if (!TextUtility.isEmptyString(entPairFileName)) {
        if (isSet) FileUtility.writeInFile(entPairFileName, e1.id + "\t" + e2.id + "\n", true);
        /*
        //if ( !str.isEmpty() )
        if ( !isSet )
        	FileUtility.writeInFile(entPairFileName, objCurSen.listRels.get(r).printString() + "\tFOUND\n", true);
        else {
        	FileUtility.writeInFile(entPairFileName, objCurSen.listRels.get(r).printString() + "\tNOT_FOUND\n", true);
        }
        */
      }
    }
  }
Esempio n. 9
0
  /**
   * Remove the words that are in the same NP of an entity. Note: This particular pre-processing
   * didn't improve results.
   *
   * @param bioRelExInpFile
   * @param psgParsedFileName
   * @throws Exception
   */
  public void removeOtherWordsInLeastNPofEnt(String bioRelExInpFile, String psgParsedFileName)
      throws Exception {

    ArrayList<ArrayList<String>> allSen = FileUtility.readAllMultiLineInputs(bioRelExInpFile);

    ArrayList<CFGParseOfSen> listCFGParseOfAllSen =
        CFGParseOfSen.readCFGParseForAllSen(psgParsedFileName);
    FileUtility.writeInFile(bioRelExInpFile, "", false);
    String psg = "", prevId = "";

    for (int s = 0; s < allSen.size(); s = s + 2) {
      StringBuilder sb = new StringBuilder();

      if (!prevId.equals(allSen.get(s).get(0)))
        psg = CFGParseOfSen.getBySenId(listCFGParseOfAllSen, allSen.get(s).get(0)).psgParse;

      prevId = allSen.get(s).get(0);

      // find the least phrasal category for each token
      String[] tmp = psg.replaceAll("\\)\\)", ") ()").replaceAll("\\s+", " ").trim().split("\\(");
      ArrayList<String[]> listTokPhraseCat = new ArrayList<String[]>();
      int phraseCatIndex = 0, rBrack = 0;

      // 1st element in tmp is empty
      for (int i = 1; i < tmp.length; i++) {

        String[] str = tmp[i].trim().split("\\s+");
        rBrack = 0;

        if (str.length > 1) {
          for (int k = listTokPhraseCat.size() - 1; k >= 0; k--) {
            if (listTokPhraseCat.get(k).length == 1
                && !listTokPhraseCat.get(k)[0].equals(")")
                && rBrack == 0) {
              // str[0] = str[1];
              str[1] = listTokPhraseCat.get(k)[0];
              listTokPhraseCat.add(str);
              break;
            } else if (listTokPhraseCat.get(k).length == 1
                && listTokPhraseCat.get(k)[0].equals(")")) rBrack++;
            else if (listTokPhraseCat.get(k).length == 1
                && !listTokPhraseCat.get(k)[0].equals(")")
                && rBrack != 0) rBrack--;
          }
        } else if (str.length == 1 && !str[0].equals(")")) {
          str[0] = str[0] + "-" + phraseCatIndex;
          listTokPhraseCat.add(str);
          phraseCatIndex++;
        } else if (str.length == 1 && str[0].equals(")")) listTokPhraseCat.add(str);
      }

      // remove all elements other than words
      for (int i = listTokPhraseCat.size() - 1; i >= 0; i--) {
        if (listTokPhraseCat.get(i).length == 1) {
          listTokPhraseCat.remove(i);
        }
      }

      // remove the words which are in the least NP of a protein
      // the 1st item in the list is sentence id
      // search from the 2nd token = 3rd item in the list
      for (int i = 2; i < allSen.get(s).size(); i++) {
        tmp = allSen.get(s).get(i).split("\\s+");

        if (!tmp[3].equals("O") && listTokPhraseCat.get(i - 1)[1].contains("NP-")) {
          // checking previous token
          if (i > 1
              && !(allSen.get(s).get(i - 1).contains("\tB-e")
                  || allSen.get(s).get(i - 1).contains("\tI-e"))
              && listTokPhraseCat.get(i - 1)[1].equals(listTokPhraseCat.get(i - 1 - 1)[1])
          // && !listTokPhraseCat.get(i-1-1)[0].equals("DT")
          ) {
            listTokPhraseCat.remove(i - 1 - 1);
            allSen.get(s).remove(i - 1);
            i--;
          }
          /*/ checking next token
          else if ( i < allSen.get(s).size() && listTokPhraseCat.get(i-1)[1].equals(listTokPhraseCat.get(i)[1]) ) {
          	listTokPhraseCat.remove(i);
          	allSen.remove(s).get(i);
          	i--;
          }*/
        }
      }

      // re-writing the input
      for (int i = 0; i < allSen.get(s).size(); i++) {
        sb.append(allSen.get(s).get(i) + "\n");
      }

      sb.append("\n");
      sb.append(allSen.get(s + 1).get(0) + "\n");
      sb.append("\n");

      FileUtility.writeInFile(bioRelExInpFile, sb.toString(), true);
    }
  }
Esempio n. 10
0
  public static void main(String args[]) {

    System.out.println("Solution to HomeWork2 -- Machine learning");
    System.out.println("Implementation of ID3 Algorithm : ");
    System.out.println("==========================================");

    // Parse arguments
    int L = Integer.parseInt(args[0]);
    int K = Integer.parseInt(args[1]);
    String trainingset = args[2];
    String validationset = args[3];
    String testset = args[4];
    boolean toPrint = args[5].equalsIgnoreCase("yes");

    // ------------------------------------------------------------READING CSV FILE
    // ------------------------------------------------------------------------------//

    FileUtility fu1 = new FileUtility();
    parsedTrainingAttributes = fu1.parseCSVFile(trainingset);
    parsedValidationAttributes = fu1.parseCSVFile(validationset);
    parsedTestAttributes = fu1.parseCSVFile(testset);

    System.out.println("Read Successfull..........");
    System.out.println();

    // ------------------------------------------------------------PRINTING CSV FILE
    // ------------------------------------------------------------------------------//
    // Test print
    fu1.printSets(parsedTrainingAttributes);

    // ------------------------------------ Build tree using information gain and print its accuracy
    // over test set ------------------------------------------------//
    DecisionTree dtree = new DecisionTree();
    dtree.buildTree(parsedTrainingAttributes, new Node());

    // ------------------------------------------------------------PRINTING-------------------------------------------------------------------------------------------------------//

    System.out.println(
        "----------------------------------------------------------------------------------");
    AccuracyCalculator ac1 = new AccuracyCalculator();

    // Printing the decision tree and accuracy before pruning
    if (toPrint) {

      System.out.println("Decision tree before pruning: ");
      System.out.println("");
      System.out.println(dtree);
      System.out.println(
          "Accuracy of decision tree before pruning : "
              + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode)
              + "%");
      System.out.println("Total Matched classes : " + (int) ac1.matchCount);
      System.out.println(
          "----------------------------------------------------------------------------------");
      System.out.println("");

      // Perform Post pruning the tree
      try {
        dtree.performPostPruning(L, K, parsedValidationAttributes);
      } catch (CloneNotSupportedException e) {
        e.printStackTrace();
      }

      System.out.println("Decision tree after pruning: ");
      System.out.println("");
      System.out.println(dtree);
      System.out.println(
          "Accuracy of decision tree after pruning: "
              + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode)
              + "%");
      System.out.println("Total Matched classes : " + (int) ac1.matchCount);
      System.out.println(
          "----------------------------------------------------------------------------------");

    } else {

      System.out.println(
          "Accuracy of decision tree before pruning : "
              + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode)
              + "%");
      System.out.println("Total Matched classes : " + (int) ac1.matchCount);
      System.out.println("");

      // Perform Post pruning the tree
      try {
        dtree.performPostPruning(L, K, parsedValidationAttributes);
      } catch (CloneNotSupportedException e) {
        e.printStackTrace();
      }

      System.out.println(
          "Accuracy of decision tree after pruning: "
              + ac1.getAccuracy(parsedTestAttributes, dtree.treeRootNode)
              + "%");
      System.out.println("Total Matched classes : " + (int) ac1.matchCount);
      System.out.println(
          "----------------------------------------------------------------------------------");
    }
  }