private CText getTaxonomyList(final String inputText, final String title, boolean breakdown) {
    /*
     * Step 1: Create a document model
     */
    CText document = new CText(title);

    /*
     * Step 2:Extract the N-Grams from the document
     */
    CNGramsExtractor nGramsExtract = new CNGramsExtractor();

    // INFO
    System.out.println("Last execution");
    if (nGramsExtract.extract(document, inputText)) {
      document.setState(CText.E_STATES.NGRAMS);
      System.out.println("NGram extracted");

      /*
       * Step 3: Extract Composite and semantics
       */
      CTaxonomyExtractor taxonomyExtractor = new CTaxonomyExtractor(_taxonomyConnection, breakdown);
      if (taxonomyExtractor.extract(document)) {
        document.setState(CText.E_STATES.TAXONOMY);
      }
    }

    return document;
  }
Exemplo n.º 2
0
  private CNGramsStats extractFragment(final String content, String label)
      throws SemanticAnalysisException {
    int nGramRank = -1;

    CText document = new CText(content);
    CNGramsStats nGramsFrequencyStats = new CNGramsStats(label);

    CNGramsGenerator nGramsExtractor = new CNGramsGenerator(nGramsFrequencyStats);
    if (nGramsExtractor.extract(document, content)) {
      nGramsFrequencyStats = nGramsExtractor.getNGramsFrequencyStats();
      if (nGramsFrequencyStats != null) {
        NTaxonomiesConn taxonomyConnection =
            CTaxonomyConnectionsPool.getInstance().getLabelsAndCatsConnection();
        CTaxonomyExtractor taxonomyExtractor = new CTaxonomyExtractor(taxonomyConnection);
        if (taxonomyExtractor.extract(document)) {

          /*
           * Walk through the list of taxonomyInstance nouns from
           * this document document to extract statistics for the label.
           */
          int rank = 0;
          /*
          for( CTaxonomyInstance taxonomyInstance : document.getObjectsMap().values()) {
          	if( label.compareTo(taxonomyInstance.getLabel()) == 0) {
          		nGramRank = rank;
          		break;
          	}
          	rank++;
          }
          */
        }
      }
    }

    return (nGramRank >= 0) ? nGramsFrequencyStats : null;
  }