private CText getTaxonomyList(final String inputText, final String title, boolean breakdown) { /* * Step 1: Create a document model */ CText document = new CText(title); /* * Step 2:Extract the N-Grams from the document */ CNGramsExtractor nGramsExtract = new CNGramsExtractor(); // INFO System.out.println("Last execution"); if (nGramsExtract.extract(document, inputText)) { document.setState(CText.E_STATES.NGRAMS); System.out.println("NGram extracted"); /* * Step 3: Extract Composite and semantics */ CTaxonomyExtractor taxonomyExtractor = new CTaxonomyExtractor(_taxonomyConnection, breakdown); if (taxonomyExtractor.extract(document)) { document.setState(CText.E_STATES.TAXONOMY); } } return document; }
private CNGramsStats extractFragment(final String content, String label) throws SemanticAnalysisException { int nGramRank = -1; CText document = new CText(content); CNGramsStats nGramsFrequencyStats = new CNGramsStats(label); CNGramsGenerator nGramsExtractor = new CNGramsGenerator(nGramsFrequencyStats); if (nGramsExtractor.extract(document, content)) { nGramsFrequencyStats = nGramsExtractor.getNGramsFrequencyStats(); if (nGramsFrequencyStats != null) { NTaxonomiesConn taxonomyConnection = CTaxonomyConnectionsPool.getInstance().getLabelsAndCatsConnection(); CTaxonomyExtractor taxonomyExtractor = new CTaxonomyExtractor(taxonomyConnection); if (taxonomyExtractor.extract(document)) { /* * Walk through the list of taxonomyInstance nouns from * this document document to extract statistics for the label. */ int rank = 0; /* for( CTaxonomyInstance taxonomyInstance : document.getObjectsMap().values()) { if( label.compareTo(taxonomyInstance.getLabel()) == 0) { nGramRank = rank; break; } rank++; } */ } } } return (nGramRank >= 0) ? nGramsFrequencyStats : null; }